Code · src/agents/baseline/source-extraction/llm.ts

src/agents/baseline/source-extraction/llm.ts 17,890 bytes · typescript
/**
 * Source/Extraction — LLM judgment + tool-use loop (Std 3 + Std 5).
 *
 * The agent gives Anthropic the four SEC EDGAR tools (sec_edgar_companies,
 * sec_financials, sec_submissions, sec_filing_document) and asks the
 * model to walk the runbook itself: resolve CIKs, fetch facts, extract
 * the requested metric values, and return a JSON array of
 * ExtractedValue records.
 *
 * The agent's index.ts handles deterministic boundary work — input
 * validation, provenance stamping, validation/confidence, handoff
 * envelope. This file is the single LLM seam.
 *
 * Self-contained Anthropic SDK wrapper. Per the refactor spec, code
 * duplicated across agents intentionally — each agent owns its own
 * LLM layer; no shared "ai-fallback" module.
 *
 * Std 12: if ANTHROPIC_API_KEY is missing, returns a structured
 * `needs-api-key` failure so the agent fails cleanly — never fakes a
 * run.
 */

import Anthropic from '@anthropic-ai/sdk';
import type { Tool, ToolUseBlock, MessageParam, ContentBlock, TextBlock } from '@anthropic-ai/sdk/resources/messages.js';
import { z } from 'zod';

import { recordUsage } from '../../../observability/usage.js';
import { buildSystemPrompt } from './prompt.js';
import {
  SEC_TOOLS,
  executeSecTool,
} from '../../../tools/retrieval/connectors/sec-edgar.js';
import {
  SEC_FILING_TOOLS,
  executeSecFilingTool,
} from '../../../tools/retrieval/connectors/sec-edgar-filings.js';
import {
  SEC_XBRL_TOOLS,
  executeSecXbrlTool,
} from '../../../tools/retrieval/connectors/sec-edgar-xbrl.js';
import {
  SEC_INSIDER_TOOLS,
  executeSecInsiderTool,
} from '../../../tools/retrieval/connectors/sec-edgar-insider.js';

const apiKey = process.env.ANTHROPIC_API_KEY;
const client = apiKey ? new Anthropic({ apiKey }) : null;

if (!client) {
  // eslint-disable-next-line no-console
  console.log(`[baseline.source-extraction] ANTHROPIC_API_KEY not set — LLM judgment steps will return a structured 'needs-api-key' failure.`);
}

const MODEL = 'claude-haiku-4-5';
const MAX_TOOL_ITERATIONS = 40;
const MAX_TOKENS_PER_TURN = 8000;

/**
 * The complete SEC tool surface declared on every Anthropic request.
 *
 * Hoisted to module scope so:
 *   - the count and names are visible to callers (the agent's
 *     `index.ts` reads this for the audit trail — no hardcoded
 *     numbers that drift when tools are added or removed);
 *   - the array is built once per process instead of once per call.
 *
 * Std 5: every entry here must fit the agent's matrix-row
 * capabilities (retrieval, API, web, parser, OCR, repository).
 */
export const SEC_TOOL_DESCRIPTORS = [
  ...SEC_TOOLS,
  ...SEC_FILING_TOOLS,
  ...SEC_XBRL_TOOLS,
  ...SEC_INSIDER_TOOLS,
] as const;

export const SEC_TOOL_NAMES: readonly string[] = SEC_TOOL_DESCRIPTORS.map(t => t.name);

const ANTHROPIC_TOOLS: Tool[] = SEC_TOOL_DESCRIPTORS.map(t => ({
  name: t.name,
  description: t.description,
  input_schema: t.input_schema,
})) as Tool[];

export const SEC_TOOL_COUNT = SEC_TOOL_DESCRIPTORS.length;
export const MODEL_NAME = MODEL;

export interface LlmFailure {
  readonly category: 'needs-api-key' | 'invalid-response' | 'sdk-error' | 'empty-response' | 'tool-loop-overrun';
  readonly reason: string;
  readonly hint?: string;
}
export type LlmResult<T> = { ok: true; value: T } | { ok: false; failure: LlmFailure };

/** A single extraction the LLM produced. Mirrors ExtractedValue but
 *  with optional provenance fields the agent fills in if missing. */
const extractedHitSchema = z.object({
  entity: z.string(),
  metricKey: z.string(),
  period: z.string(),
  rawLabel: z.string(),
  value: z.number().nullable(),
  rawUnit: z.string().nullable(),
  snippet: z.string(),
  sourceUrl: z.string().optional().default(''),
  confidence: z.number().min(0).max(1),
});
export type ExtractedHit = z.infer<typeof extractedHitSchema>;

const comparabilityNoteSchema = z.object({
  entities: z.array(z.string()),
  detail: z.string(),
});
export type ExtractedNote = z.infer<typeof comparabilityNoteSchema>;

const extractResponseSchema = z.object({
  hits: z.array(extractedHitSchema),
  comparabilityNotes: z.array(comparabilityNoteSchema).optional().default([]),
});

export interface ToolCallTrace {
  readonly toolName: string;
  readonly input: Record<string, unknown>;
  readonly ok: boolean;
  readonly resultSummary: string;
  readonly errorMessage?: string;
  readonly at: string;
}

export interface ExtractRequest {
  readonly analysisId: string;
  readonly question: string;
  readonly entities: readonly { readonly id: string; readonly aliases: readonly string[] }[];
  readonly targetMetrics: readonly {
    readonly key: string;
    readonly definition: string;
    readonly unit?: string;
  }[];
  readonly period: string;
  readonly sources: readonly string[];
}

export interface ExtractWithToolsOutput {
  readonly hits: readonly ExtractedHit[];
  readonly comparabilityNotes: readonly ExtractedNote[];
  readonly toolCalls: readonly ToolCallTrace[];
}

function buildUserMessage(req: ExtractRequest): string {
  return [
    `## Runbook — Source/Extraction agent (Baseline pillar)`,
    ``,
    `## Job request`,
    `analysisId: ${req.analysisId}`,
    `question:   ${req.question}`,
    `period:     ${req.period}`,
    `sources:    ${req.sources.join(', ')}`,
    `entities:`,
    ...req.entities.map(e => `  - id="${e.id}"  aliases=[${e.aliases.join(', ')}]`),
    `target metrics:`,
    ...req.targetMetrics.map(
      m => `  - key="${m.key}"${m.unit ? ` (target unit ${m.unit})` : ''} — ${m.definition}`,
    ),
    ``,
    `## Available tools (Std 5 — approved tools only)`,
    `You have ten SEC retrieval tools. Pick the smallest, cheapest tool that answers the question.`,
    ``,
    `Identity & structured XBRL (prefer these for numeric metrics):`,
    `  - sec_edgar_companies(searchTerms)         — ticker / name → CIK lookup`,
    `  - sec_company_concept(cik, taxonomy, tag, unit?) — one company × one concept time series (lightest)`,
    `  - sec_financials(cik, concepts?)           — one company × all concepts (heavier; use a concepts filter)`,
    `  - sec_xbrl_frames(taxonomy, tag, unit, period) — every company that reported a concept at one period`,
    `                                                   (peer benchmarking primitive — one call covers all entities)`,
    ``,
    `Filings & unstructured text (fall back here when XBRL doesn't carry the answer):`,
    `  - sec_submissions(cik, filingTypes?)       — filing history (returns primaryDocumentUrl per filing)`,
    `  - sec_filing_index(cik, accessionNumber)   — all files inside a filing (exhibits, XBRL zip, etc.)`,
    `  - sec_filing_sections(documentUrl, items?) — split a 10-K/10-Q into Items (Item 1A, 7, 7A, 8, …)`,
    `  - sec_filing_document(documentUrl, maxChars?) — fetch a single filing doc, HTML-stripped, truncated`,
    `  - sec_full_text_search(query, opts?)       — EDGAR full-text search across all filings`,
    ``,
    `Insider activity:`,
    `  - sec_insider_form4(cik, opts?)            — Form 4 list + optional XML parse of transactions`,
    ``,
    `## Recommended runbook for this job`,
    `1. Call **sec_edgar_companies** once with all entity names/aliases (comma-separated) to resolve every CIK.`,
    `2. For numeric metrics in the requested period (${req.period}), prefer **sec_company_concept** per (entity, tag)`,
    `   or **sec_xbrl_frames** if the same concept across all entities answers the question in one call.`,
    `3. If a bank reports a metric under a non-obvious tag, call **sec_financials(cik)** with no concepts arg once`,
    `   to inspect availableConcepts, then re-call with a targeted filter.`,
    `4. For narrative metrics (risk factors, MD&A, ESG language) call **sec_submissions** → **sec_filing_sections**`,
    `   on the latest 10-K/10-Q primaryDocumentUrl with a tight items filter (e.g. "7,1A").`,
    `5. Match the period: for FY-YYYY prefer fp="FY" + fy=YYYY + form="10-K"; for quarters use fp="Q1/Q2/Q3"`,
    `   + form="10-Q".`,
    ``,
    `## Output (Std 11 — handoff)`,
    `When you are done with all entities + metrics, return ONLY a JSON object — no prose, no markdown fence —`,
    `in exactly this shape:`,
    `{`,
    `  "hits": [`,
    `    {`,
    `      "entity":       string,     // echo the JobRequest entity.id (e.g. "JPMorgan Chase")`,
    `      "metricKey":    string,     // one of the requested target-metric keys`,
    `      "period":       string,     // echo the requested period`,
    `      "rawLabel":     string,     // the XBRL concept name you read the value from`,
    `      "value":        number|null,// the numeric value in its native unit; null if unfound`,
    `      "rawUnit":      string|null,// e.g. "USD", "USD/shares"; null if unknown`,
    `      "snippet":      string,     // short evidence string from the fact entry (e.g. "fy=2024 fp=FY end=2024-12-31 val=158104000000")`,
    `      "sourceUrl":    string,     // companyfacts URL or filing primaryDocumentUrl`,
    `      "confidence":   number      // 0-1 self-assessed confidence`,
    `    }`,
    `  ],`,
    `  "comparabilityNotes": [`,
    `    { "entities": string[], "detail": string }    // e.g. mixed reporting bases, different fiscal year-ends`,
    `  ]`,
    `}`,
    ``,
    `## Standards reminders`,
    `- Std 4: never fabricate a value. If you cannot locate a metric for an entity, emit a hit with value=null`,
    `  and a clear rawLabel/snippet explaining what you searched.`,
    `- Std 4: every hit MUST carry a sourceUrl from a tool response — no values without provenance.`,
    `- Std 7: assign honest confidence; lower it when you had to guess between concepts or when the period`,
    `  matched a non-annual filing.`,
    `- Std 12: if a tool errors, do not retry blindly — log it in comparabilityNotes and move on.`,
  ].join('\n');
}

/**
 * Route an Anthropic tool_use block to the correct executor based on
 * its name's prefix. The tool sets are non-overlapping by design —
 * sec_edgar_*, sec_financials/submissions/filing_document → core;
 * sec_filing_* → filings; sec_company_concept / sec_xbrl_* → xbrl;
 * sec_insider_* → insider. Unknown names fall through to the core
 * executor which returns a structured `unknown-tool` error.
 */
async function dispatchSecTool(name: string, input: unknown) {
  if (
    name === 'sec_filing_index' ||
    name === 'sec_filing_sections' ||
    name === 'sec_full_text_search'
  ) {
    return executeSecFilingTool(name, input);
  }
  if (name === 'sec_company_concept' || name === 'sec_xbrl_frames') {
    return executeSecXbrlTool(name, input);
  }
  if (name === 'sec_insider_form4') {
    return executeSecInsiderTool(name, input);
  }
  return executeSecTool(name, input);
}

function summarizeToolResult(name: string, ok: boolean, result: unknown): string {
  if (!ok) return 'error';
  try {
    if (name === 'sec_edgar_companies' && Array.isArray(result)) {
      return `${result.length} match(es): ${result
        .slice(0, 6)
        .map(r => (r as { ticker?: string; cik?: string }).ticker ?? (r as { cik?: string }).cik ?? '?')
        .join(', ')}`;
    }
    if (name === 'sec_financials') {
      const r = result as { entityName?: string; facts?: Record<string, unknown>; conceptsFilter?: string[] };
      const taxonomies = r.facts ? Object.keys(r.facts) : [];
      let conceptCount = 0;
      if (r.facts) {
        for (const t of Object.values(r.facts)) {
          conceptCount += Object.keys((t as Record<string, unknown>) ?? {}).length;
        }
      }
      return `${r.entityName ?? '?'} — ${conceptCount} concept(s) across [${taxonomies.join(', ')}]${
        r.conceptsFilter ? ` filter=[${r.conceptsFilter.join(',')}]` : ''
      }`;
    }
    if (name === 'sec_submissions') {
      const r = result as { entityName?: string; filings?: unknown[] };
      return `${r.entityName ?? '?'} — ${r.filings?.length ?? 0} filing(s)`;
    }
    if (name === 'sec_filing_document') {
      const r = result as { url?: string; text?: string; truncated?: boolean };
      return `${r.url ?? '?'} — ${(r.text ?? '').length} chars${r.truncated ? ' (truncated)' : ''}`;
    }
    if (name === 'sec_filing_index') {
      const r = result as { directoryName?: string; files?: unknown[] };
      return `${r.directoryName ?? '?'} — ${r.files?.length ?? 0} file(s)`;
    }
    if (name === 'sec_filing_sections') {
      const r = result as { url?: string; sections?: { item?: string; charCount?: number }[]; fullCharCount?: number };
      const items = (r.sections ?? []).map(s => s.item ?? '?').join(',');
      return `${r.url ?? '?'} — items=[${items}] (full ${r.fullCharCount ?? 0} chars)`;
    }
    if (name === 'sec_full_text_search') {
      const r = result as { totalHits?: number; hits?: unknown[]; query?: string };
      return `q="${r.query ?? ''}" — ${r.totalHits ?? 0} total, ${r.hits?.length ?? 0} returned`;
    }
    if (name === 'sec_company_concept') {
      const r = result as { tag?: string; label?: string; units?: Record<string, unknown[]> };
      const units = r.units ? Object.keys(r.units) : [];
      const total = r.units ? Object.values(r.units).reduce((n, arr) => n + (Array.isArray(arr) ? arr.length : 0), 0) : 0;
      return `${r.tag ?? '?'} (${r.label ?? '?'}) — ${total} row(s) across units [${units.join(',')}]`;
    }
    if (name === 'sec_xbrl_frames') {
      const r = result as { tag?: string; unit?: string; period?: string; pts?: number; rows?: unknown[] };
      return `${r.tag ?? '?'}/${r.unit ?? '?'}/${r.period ?? '?'} — ${r.pts ?? 0} reporters, ${r.rows?.length ?? 0} rows`;
    }
    if (name === 'sec_insider_form4') {
      const r = result as { cik?: string; detail?: boolean; filings?: unknown[] };
      return `CIK ${r.cik ?? '?'} — ${r.filings?.length ?? 0} Form 4 filing(s)${r.detail ? ' (parsed)' : ''}`;
    }
    return 'ok';
  } catch {
    return 'ok';
  }
}

function jsonResponseFromText(text: string): unknown {
  const cleaned = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
  try { return JSON.parse(cleaned); } catch { /* fall through */ }
  const m = cleaned.match(/\{[\s\S]*\}/);
  if (!m) return null;
  try { return JSON.parse(m[0]); } catch { return null; }
}

/**
 * Drive the source-extraction tool-use loop end-to-end.
 *
 * Returns a structured ExtractWithToolsOutput on success, with a
 * per-tool trace the agent can fold into the audit trail (Std 6 + Std
 * 10), or a typed LlmFailure on infrastructure failure.
 *
 * Std 12: never throws. Every failure mode is a structured result.
 */
export async function extractWithSecTools(
  req: ExtractRequest,
  onToolCall?: (t: ToolCallTrace) => void,
): Promise<LlmResult<ExtractWithToolsOutput>> {
  if (!client) {
    return {
      ok: false,
      failure: {
        category: 'needs-api-key',
        reason: 'Source/Extraction requires the LLM but ANTHROPIC_API_KEY is not configured.',
        hint: 'Set ANTHROPIC_API_KEY and rerun; this agent does not fake runs.',
      },
    };
  }

  const tools = ANTHROPIC_TOOLS;
  const system = buildSystemPrompt();
  const messages: MessageParam[] = [{ role: 'user', content: buildUserMessage(req) }];
  const toolCalls: ToolCallTrace[] = [];

  let finalText = '';
  for (let iter = 0; iter < MAX_TOOL_ITERATIONS; iter++) {
    let resp;
    try {
      resp = await client.messages.create({
        model: MODEL,
        max_tokens: MAX_TOKENS_PER_TURN,
        system,
        tools,
        messages,
      });
    } catch (err) {
      return {
        ok: false,
        failure: { category: 'sdk-error', reason: err instanceof Error ? err.message : String(err) },
      };
    }
    recordUsage('baseline.source-extraction', MODEL, resp.usage.input_tokens, resp.usage.output_tokens);

    messages.push({ role: 'assistant', content: resp.content as ContentBlock[] });

    if (resp.stop_reason !== 'tool_use') {
      const textBlock = resp.content.find((b): b is TextBlock => b.type === 'text');
      finalText = textBlock ? textBlock.text : '';
      break;
    }

    const toolUses = resp.content.filter((b): b is ToolUseBlock => b.type === 'tool_use');
    const toolResults: { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }[] = [];
    for (const tu of toolUses) {
      const result = await dispatchSecTool(tu.name, tu.input);
      const summary = summarizeToolResult(tu.name, result.ok, result.result);
      const trace: ToolCallTrace = {
        toolName: tu.name,
        input: (tu.input ?? {}) as Record<string, unknown>,
        ok: result.ok,
        resultSummary: summary,
        errorMessage: result.error?.message,
        at: new Date().toISOString(),
      };
      toolCalls.push(trace);
      onToolCall?.(trace);

      const payload = result.ok
        ? JSON.stringify(result.result)
        : JSON.stringify({ error: result.error });
      toolResults.push({
        type: 'tool_result',
        tool_use_id: tu.id,
        content: payload,
        is_error: !result.ok,
      });
    }
    messages.push({ role: 'user', content: toolResults });
  }

  if (!finalText) {
    return {
      ok: false,
      failure: {
        category: 'tool-loop-overrun',
        reason: `Tool-use loop exceeded ${MAX_TOOL_ITERATIONS} iterations without an end_turn.`,
      },
    };
  }

  const json = jsonResponseFromText(finalText);
  const parsed = extractResponseSchema.safeParse(json);
  if (!parsed.success) {
    return {
      ok: false,
      failure: {
        category: 'invalid-response',
        reason: `final response did not match schema: ${parsed.error.message}`,
      },
    };
  }
  return {
    ok: true,
    value: {
      hits: parsed.data.hits,
      comparabilityNotes: parsed.data.comparabilityNotes,
      toolCalls,
    },
  };
}