Code · src/agents/baseline/resolution/llm.ts

src/agents/baseline/resolution/llm.ts 5,248 bytes · typescript
/**
 * Resolution — LLM judgment calls (Std 3 + Std 5).
 *
 * Self-contained Anthropic SDK wrapper. Duplicated across agents per
 * the refactor spec — no shared tool layer.
 *
 * Std 12: returns `needs-api-key` failure when ANTHROPIC_API_KEY is
 * not configured; deterministic remediation paths still execute.
 */

import Anthropic from '@anthropic-ai/sdk';
import type { TextBlock } from '@anthropic-ai/sdk/resources/messages.js';
import { z } from 'zod';

import { recordUsage } from '../../../observability/usage.js';
import type { PromptScope } from '../../../standards.js';
import { buildSystemPrompt } from './prompt.js';

const apiKey = process.env.ANTHROPIC_API_KEY;
const client = apiKey ? new Anthropic({ apiKey }) : null;

if (!client) {
  // eslint-disable-next-line no-console
  console.log(`[baseline.resolution] ANTHROPIC_API_KEY not set — LLM-judgment remediation steps will return a structured 'needs-api-key' failure; deterministic remediation still runs.`);
}

const MODEL = 'claude-haiku-4-5';

export interface LlmFailure {
  readonly category: 'needs-api-key' | 'invalid-response' | 'sdk-error' | 'empty-response';
  readonly reason: string;
  readonly hint?: string;
}
export type LlmResult<T> = { ok: true; value: T } | { ok: false; failure: LlmFailure };

function parseJsonResponse(text: string): unknown {
  const cleaned = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
  try { return JSON.parse(cleaned); } catch { /* fall through */ }
  const m = cleaned.match(/\{[\s\S]*\}/);
  if (!m) return null;
  try { return JSON.parse(m[0]); } catch { return null; }
}

/**
 * Single LLM turn — no tools.
 *
 * Resolution triages and remediates issues raised by Normalization
 * over records already in flight (Std 1 — single clear
 * responsibility). It does NOT fetch fresh data: if it did, every
 * remediation could mutate provenance silently and the lineage chain
 * (Std 4, Std 11) would no longer reflect what Source/Extraction
 * actually retrieved. Unresolvable issues escalate to HITL.
 */
async function call(userMessage: string, maxTokens = 400, scope?: PromptScope): Promise<LlmResult<string>> {
  if (!client) {
    return {
      ok: false,
      failure: {
        category: 'needs-api-key',
        reason: 'ANTHROPIC_API_KEY is not configured.',
        hint: 'Set ANTHROPIC_API_KEY or escalate this issue to HITL.',
      },
    };
  }
  let resp;
  try {
    resp = await client.messages.create({
      model: MODEL,
      max_tokens: maxTokens,
      system: buildSystemPrompt(scope),
      messages: [{ role: 'user', content: userMessage }],
    });
  } catch (err) {
    return { ok: false, failure: { category: 'sdk-error', reason: err instanceof Error ? err.message : String(err) } };
  }
  recordUsage('baseline.resolution', MODEL, resp.usage.input_tokens, resp.usage.output_tokens);
  const textBlock = resp.content.find((b): b is TextBlock => b.type === 'text');
  const text = textBlock ? textBlock.text : '';
  if (!text.trim()) return { ok: false, failure: { category: 'empty-response', reason: 'LLM returned no text.' } };
  return { ok: true, value: text };
}

/* ------------------------------------------------------------------ *
 * Step 2/3: triage + remediate. The LLM reviews an unresolved issue,
 * decides severity + remediation path + whether HITL is required.
 * ------------------------------------------------------------------ */

const triageSchema = z.object({
  severity: z.enum(['low', 'medium', 'high', 'critical']),
  remediationPath: z.enum(['automation', 'hitl']),
  recommendedAction: z.string(),
  rationale: z.string(),
  confidence: z.number().min(0).max(1),
});
export type TriageResult = z.infer<typeof triageSchema>;

export interface TriageRequest {
  readonly category: string;
  readonly detail: string;
  readonly context?: Record<string, unknown>;
}

export async function triageIssue(req: TriageRequest): Promise<LlmResult<TriageResult>> {
  const user = [
    `## Runbook step 2 — review-issue`,
    ``,
    `ISSUE CATEGORY: ${req.category}`,
    `ISSUE DETAIL: ${req.detail}`,
    req.context ? `CONTEXT: ${JSON.stringify(req.context)}` : '',
    ``,
    `Determine: severity (low/medium/high/critical), remediation path (automation/hitl), one concrete recommended action, and your confidence in the assessment.`,
    `Return STRICT JSON only:`,
    `{ "severity": "low"|"medium"|"high"|"critical", "remediationPath": "automation"|"hitl", "recommendedAction": string, "rationale": string, "confidence": number }`,
  ].filter(Boolean).join('\n');
  /* Std 5 — cost-appropriate prompt: lightweight single-turn triage
   * classification. Engages Std 3 (explicit judgment with citation),
   * Std 7 (confidence), Std 8 (trigger categorisation), Std 9 (HITL
   * routing), Std 12 (fail safely). */
  const r = await call(user, 400, {
    engagedStandards: [3, 7, 8, 9, 12],
    stepLabel: 'review-issue',
  });
  if (!r.ok) return r;
  const parsed = triageSchema.safeParse(parseJsonResponse(r.value));
  if (!parsed.success) {
    return { ok: false, failure: { category: 'invalid-response', reason: `Triage LLM response did not match schema: ${parsed.error.message}` } };
  }
  return { ok: true, value: parsed.data };
}