Code · src/agents/baseline/normalization/llm.ts

src/agents/baseline/normalization/llm.ts 7,941 bytes · typescript
/**
 * Normalization — LLM judgment calls (Std 3 + Std 5).
 *
 * Self-contained Anthropic SDK wrapper. Code duplicated from sibling
 * agents intentionally — no shared tool layer.
 *
 * Std 12: returns a structured `needs-api-key` failure if the LLM is
 * not configured. Deterministic steps continue.
 */

import Anthropic from '@anthropic-ai/sdk';
import type { TextBlock } from '@anthropic-ai/sdk/resources/messages.js';
import { z } from 'zod';

import { recordUsage } from '../../../observability/usage.js';
import type { PromptScope } from '../../../standards.js';
import { buildSystemPrompt } from './prompt.js';

const apiKey = process.env.ANTHROPIC_API_KEY;
const client = apiKey ? new Anthropic({ apiKey }) : null;

if (!client) {
  // eslint-disable-next-line no-console
  console.log(`[baseline.normalization] ANTHROPIC_API_KEY not set — novel-label mapping will return a 'needs-api-key' failure; learned-rule lookups still work.`);
}

const MODEL = 'claude-haiku-4-5';

export interface LlmFailure {
  readonly category: 'needs-api-key' | 'invalid-response' | 'sdk-error' | 'empty-response' | 'no-match';
  readonly reason: string;
  readonly hint?: string;
}
export type LlmResult<T> = { ok: true; value: T } | { ok: false; failure: LlmFailure };

function parseJsonResponse(text: string): unknown {
  const cleaned = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
  try { return JSON.parse(cleaned); } catch { /* fall through */ }
  const m = cleaned.match(/\{[\s\S]*\}/);
  if (!m) return null;
  try { return JSON.parse(m[0]); } catch { return null; }
}

/**
 * Single LLM turn — no tools.
 *
 * Normalization operates on what Source/Extraction handed it (Std 1 —
 * single clear responsibility, explicit boundaries). It does NOT
 * fetch fresh data: doing so would let the agent silently disagree
 * with upstream provenance and break the lineage chain (Std 4, Std 11).
 * If a label can't be mapped from the supplied vocabulary the agent
 * escalates to HITL via the existing `no-match` failure path.
 */
async function call(userMessage: string, maxTokens = 400, scope?: PromptScope): Promise<LlmResult<string>> {
  if (!client) {
    return {
      ok: false,
      failure: {
        category: 'needs-api-key',
        reason: 'ANTHROPIC_API_KEY is not configured.',
        hint: 'Set ANTHROPIC_API_KEY or escalate this novel label to HITL.',
      },
    };
  }
  let resp;
  try {
    resp = await client.messages.create({
      model: MODEL,
      max_tokens: maxTokens,
      system: buildSystemPrompt(scope),
      messages: [{ role: 'user', content: userMessage }],
    });
  } catch (err) {
    return { ok: false, failure: { category: 'sdk-error', reason: err instanceof Error ? err.message : String(err) } };
  }
  recordUsage('baseline.normalization', MODEL, resp.usage.input_tokens, resp.usage.output_tokens);
  const textBlock = resp.content.find((b): b is TextBlock => b.type === 'text');
  const text = textBlock ? textBlock.text : '';
  if (!text.trim()) return { ok: false, failure: { category: 'empty-response', reason: 'LLM returned no text.' } };
  return { ok: true, value: text };
}

/* ------------------------------------------------------------------ *
 * Step 3 (normalize-terminology): map a novel raw label to a canonical
 * metric key from the supplied vocabulary, with citation.
 * ------------------------------------------------------------------ */

const metricMappingSchema = z.object({
  canonical: z.string().nullable(),
  confidence: z.number().min(0).max(1),
  rationale: z.string(),
});
export type MetricMappingResult = z.infer<typeof metricMappingSchema>;

export interface MetricMappingRequest {
  readonly rawLabel: string;
  readonly entity: string;
  readonly vocabulary: readonly { readonly key: string; readonly definition: string }[];
}

export async function mapTerminology(req: MetricMappingRequest): Promise<LlmResult<MetricMappingResult>> {
  const user = [
    `## Runbook step 3 — normalize-terminology (novel label)`,
    ``,
    `RAW LABEL: ${JSON.stringify(req.rawLabel)}`,
    `ENTITY: ${req.entity}`,
    `VOCABULARY (canonical metric keys with definitions):`,
    ...req.vocabulary.map(v => `  - ${v.key}: ${v.definition}`),
    ``,
    `Choose the single best canonical key for this raw label, or null if no key is a defensible match.`,
    `You are not a domain expert; rely strictly on the supplied definitions (Std 4 — avoid forced mappings).`,
    `Return STRICT JSON only:`,
    `{ "canonical": string | null, "confidence": number, "rationale": string }`,
  ].join('\n');
  /* Std 5 — cost-appropriate prompt: this step is a single-turn JSON
   * classification over a small vocabulary. It directly engages Std 3
   * (deterministic-where-possible judgment), Std 4 (no forced mapping),
   * Std 5 (capability bounds), Std 7 (confidence), Std 12 (fail safely). */
  const r = await call(user, 400, {
    engagedStandards: [3, 4, 5, 7, 12],
    stepLabel: 'normalize-terminology',
  });
  if (!r.ok) return r;
  const parsed = metricMappingSchema.safeParse(parseJsonResponse(r.value));
  if (!parsed.success) {
    return { ok: false, failure: { category: 'invalid-response', reason: `LLM response did not match schema: ${parsed.error.message}` } };
  }
  if (!parsed.data.canonical) {
    return { ok: false, failure: { category: 'no-match', reason: `LLM declined to map "${req.rawLabel}": ${parsed.data.rationale}` } };
  }
  const inVocab = req.vocabulary.some(v => v.key === parsed.data.canonical);
  if (!inVocab) {
    return { ok: false, failure: { category: 'no-match', reason: `LLM chose "${parsed.data.canonical}" which is not in the supplied vocabulary` } };
  }
  return { ok: true, value: parsed.data };
}

/* ------------------------------------------------------------------ *
 * Step 4 (normalize-units-and-periods): convert a raw value+unit to
 * the target unit declared in the JobRequest. The LLM produces a
 * conversion factor with citation (Std 4 — no hardcoded FX in the
 * framework; rate comes with a rationale the audit can review).
 * ------------------------------------------------------------------ */

const unitConversionSchema = z.object({
  convertedValue: z.number().nullable(),
  factor: z.number().nullable(),
  rationale: z.string(),
  confidence: z.number().min(0).max(1),
});
export type UnitConversionResult = z.infer<typeof unitConversionSchema>;

export interface UnitConversionRequest {
  readonly rawValue: number;
  readonly rawUnit: string;
  readonly targetUnit: string;
}

export async function convertUnit(req: UnitConversionRequest): Promise<LlmResult<UnitConversionResult>> {
  const user = [
    `## Runbook step 4 — normalize-units`,
    ``,
    `RAW VALUE: ${req.rawValue}`,
    `RAW UNIT: ${req.rawUnit}`,
    `TARGET UNIT: ${req.targetUnit}`,
    ``,
    `If the conversion is unambiguous (e.g. scale change like MM↔BN), apply it.`,
    `If the conversion requires a snapshot rate (e.g. FX), return your best-effort factor and a citation in the rationale.`,
    `If you cannot defensibly convert, return convertedValue=null and explain why (Std 4 — no fabrication).`,
    `Return STRICT JSON only:`,
    `{ "convertedValue": number | null, "factor": number | null, "rationale": string, "confidence": number }`,
  ].join('\n');
  /* Std 5 — cost-appropriate prompt: lightweight single-turn unit
   * conversion with a citation. Engages Std 4 (no fabrication of
   * rates), Std 5 (capability bounds), Std 7 (confidence). */
  const r = await call(user, 300, {
    engagedStandards: [4, 5, 7, 12],
    stepLabel: 'normalize-units',
  });
  if (!r.ok) return r;
  const parsed = unitConversionSchema.safeParse(parseJsonResponse(r.value));
  if (!parsed.success) {
    return { ok: false, failure: { category: 'invalid-response', reason: `LLM response did not match schema: ${parsed.error.message}` } };
  }
  return { ok: true, value: parsed.data };
}