/**
* Source/Extraction — LLM judgment + tool-use loop (Std 3 + Std 5).
*
* The agent gives Anthropic the four SEC EDGAR tools (sec_edgar_companies,
* sec_financials, sec_submissions, sec_filing_document) and asks the
* model to walk the runbook itself: resolve CIKs, fetch facts, extract
* the requested metric values, and return a JSON array of
* ExtractedValue records.
*
* The agent's index.ts handles deterministic boundary work — input
* validation, provenance stamping, validation/confidence, handoff
* envelope. This file is the single LLM seam.
*
* Self-contained Anthropic SDK wrapper. Per the refactor spec, code
* duplicated across agents intentionally — each agent owns its own
* LLM layer; no shared "ai-fallback" module.
*
* Std 12: if ANTHROPIC_API_KEY is missing, returns a structured
* `needs-api-key` failure so the agent fails cleanly — never fakes a
* run.
*/
import Anthropic from '@anthropic-ai/sdk';
import type { Tool, ToolUseBlock, MessageParam, ContentBlock, TextBlock } from '@anthropic-ai/sdk/resources/messages.js';
import { z } from 'zod';
import { recordUsage } from '../../../observability/usage.js';
import { buildSystemPrompt } from './prompt.js';
import {
SEC_TOOLS,
executeSecTool,
} from '../../../tools/retrieval/connectors/sec-edgar.js';
import {
SEC_FILING_TOOLS,
executeSecFilingTool,
} from '../../../tools/retrieval/connectors/sec-edgar-filings.js';
import {
SEC_XBRL_TOOLS,
executeSecXbrlTool,
} from '../../../tools/retrieval/connectors/sec-edgar-xbrl.js';
import {
SEC_INSIDER_TOOLS,
executeSecInsiderTool,
} from '../../../tools/retrieval/connectors/sec-edgar-insider.js';
const apiKey = process.env.ANTHROPIC_API_KEY;
const client = apiKey ? new Anthropic({ apiKey }) : null;
if (!client) {
// eslint-disable-next-line no-console
console.log(`[baseline.source-extraction] ANTHROPIC_API_KEY not set — LLM judgment steps will return a structured 'needs-api-key' failure.`);
}
const MODEL = 'claude-haiku-4-5';
const MAX_TOOL_ITERATIONS = 40;
const MAX_TOKENS_PER_TURN = 8000;
/**
* The complete SEC tool surface declared on every Anthropic request.
*
* Hoisted to module scope so:
* - the count and names are visible to callers (the agent's
* `index.ts` reads this for the audit trail — no hardcoded
* numbers that drift when tools are added or removed);
* - the array is built once per process instead of once per call.
*
* Std 5: every entry here must fit the agent's matrix-row
* capabilities (retrieval, API, web, parser, OCR, repository).
*/
export const SEC_TOOL_DESCRIPTORS = [
...SEC_TOOLS,
...SEC_FILING_TOOLS,
...SEC_XBRL_TOOLS,
...SEC_INSIDER_TOOLS,
] as const;
export const SEC_TOOL_NAMES: readonly string[] = SEC_TOOL_DESCRIPTORS.map(t => t.name);
const ANTHROPIC_TOOLS: Tool[] = SEC_TOOL_DESCRIPTORS.map(t => ({
name: t.name,
description: t.description,
input_schema: t.input_schema,
})) as Tool[];
export const SEC_TOOL_COUNT = SEC_TOOL_DESCRIPTORS.length;
export const MODEL_NAME = MODEL;
export interface LlmFailure {
readonly category: 'needs-api-key' | 'invalid-response' | 'sdk-error' | 'empty-response' | 'tool-loop-overrun';
readonly reason: string;
readonly hint?: string;
}
export type LlmResult<T> = { ok: true; value: T } | { ok: false; failure: LlmFailure };
/** A single extraction the LLM produced. Mirrors ExtractedValue but
* with optional provenance fields the agent fills in if missing. */
const extractedHitSchema = z.object({
entity: z.string(),
metricKey: z.string(),
period: z.string(),
rawLabel: z.string(),
value: z.number().nullable(),
rawUnit: z.string().nullable(),
snippet: z.string(),
sourceUrl: z.string().optional().default(''),
confidence: z.number().min(0).max(1),
});
export type ExtractedHit = z.infer<typeof extractedHitSchema>;
const comparabilityNoteSchema = z.object({
entities: z.array(z.string()),
detail: z.string(),
});
export type ExtractedNote = z.infer<typeof comparabilityNoteSchema>;
const extractResponseSchema = z.object({
hits: z.array(extractedHitSchema),
comparabilityNotes: z.array(comparabilityNoteSchema).optional().default([]),
});
export interface ToolCallTrace {
readonly toolName: string;
readonly input: Record<string, unknown>;
readonly ok: boolean;
readonly resultSummary: string;
readonly errorMessage?: string;
readonly at: string;
}
export interface ExtractRequest {
readonly analysisId: string;
readonly question: string;
readonly entities: readonly { readonly id: string; readonly aliases: readonly string[] }[];
readonly targetMetrics: readonly {
readonly key: string;
readonly definition: string;
readonly unit?: string;
}[];
readonly period: string;
readonly sources: readonly string[];
}
export interface ExtractWithToolsOutput {
readonly hits: readonly ExtractedHit[];
readonly comparabilityNotes: readonly ExtractedNote[];
readonly toolCalls: readonly ToolCallTrace[];
}
function buildUserMessage(req: ExtractRequest): string {
return [
`## Runbook — Source/Extraction agent (Baseline pillar)`,
``,
`## Job request`,
`analysisId: ${req.analysisId}`,
`question: ${req.question}`,
`period: ${req.period}`,
`sources: ${req.sources.join(', ')}`,
`entities:`,
...req.entities.map(e => ` - id="${e.id}" aliases=[${e.aliases.join(', ')}]`),
`target metrics:`,
...req.targetMetrics.map(
m => ` - key="${m.key}"${m.unit ? ` (target unit ${m.unit})` : ''} — ${m.definition}`,
),
``,
`## Available tools (Std 5 — approved tools only)`,
`You have ten SEC retrieval tools. Pick the smallest, cheapest tool that answers the question.`,
``,
`Identity & structured XBRL (prefer these for numeric metrics):`,
` - sec_edgar_companies(searchTerms) — ticker / name → CIK lookup`,
` - sec_company_concept(cik, taxonomy, tag, unit?) — one company × one concept time series (lightest)`,
` - sec_financials(cik, concepts?) — one company × all concepts (heavier; use a concepts filter)`,
` - sec_xbrl_frames(taxonomy, tag, unit, period) — every company that reported a concept at one period`,
` (peer benchmarking primitive — one call covers all entities)`,
``,
`Filings & unstructured text (fall back here when XBRL doesn't carry the answer):`,
` - sec_submissions(cik, filingTypes?) — filing history (returns primaryDocumentUrl per filing)`,
` - sec_filing_index(cik, accessionNumber) — all files inside a filing (exhibits, XBRL zip, etc.)`,
` - sec_filing_sections(documentUrl, items?) — split a 10-K/10-Q into Items (Item 1A, 7, 7A, 8, …)`,
` - sec_filing_document(documentUrl, maxChars?) — fetch a single filing doc, HTML-stripped, truncated`,
` - sec_full_text_search(query, opts?) — EDGAR full-text search across all filings`,
``,
`Insider activity:`,
` - sec_insider_form4(cik, opts?) — Form 4 list + optional XML parse of transactions`,
``,
`## Recommended runbook for this job`,
`1. Call **sec_edgar_companies** once with all entity names/aliases (comma-separated) to resolve every CIK.`,
`2. For numeric metrics in the requested period (${req.period}), prefer **sec_company_concept** per (entity, tag)`,
` or **sec_xbrl_frames** if the same concept across all entities answers the question in one call.`,
`3. If a bank reports a metric under a non-obvious tag, call **sec_financials(cik)** with no concepts arg once`,
` to inspect availableConcepts, then re-call with a targeted filter.`,
`4. For narrative metrics (risk factors, MD&A, ESG language) call **sec_submissions** → **sec_filing_sections**`,
` on the latest 10-K/10-Q primaryDocumentUrl with a tight items filter (e.g. "7,1A").`,
`5. Match the period: for FY-YYYY prefer fp="FY" + fy=YYYY + form="10-K"; for quarters use fp="Q1/Q2/Q3"`,
` + form="10-Q".`,
``,
`## Output (Std 11 — handoff)`,
`When you are done with all entities + metrics, return ONLY a JSON object — no prose, no markdown fence —`,
`in exactly this shape:`,
`{`,
` "hits": [`,
` {`,
` "entity": string, // echo the JobRequest entity.id (e.g. "JPMorgan Chase")`,
` "metricKey": string, // one of the requested target-metric keys`,
` "period": string, // echo the requested period`,
` "rawLabel": string, // the XBRL concept name you read the value from`,
` "value": number|null,// the numeric value in its native unit; null if unfound`,
` "rawUnit": string|null,// e.g. "USD", "USD/shares"; null if unknown`,
` "snippet": string, // short evidence string from the fact entry (e.g. "fy=2024 fp=FY end=2024-12-31 val=158104000000")`,
` "sourceUrl": string, // companyfacts URL or filing primaryDocumentUrl`,
` "confidence": number // 0-1 self-assessed confidence`,
` }`,
` ],`,
` "comparabilityNotes": [`,
` { "entities": string[], "detail": string } // e.g. mixed reporting bases, different fiscal year-ends`,
` ]`,
`}`,
``,
`## Standards reminders`,
`- Std 4: never fabricate a value. If you cannot locate a metric for an entity, emit a hit with value=null`,
` and a clear rawLabel/snippet explaining what you searched.`,
`- Std 4: every hit MUST carry a sourceUrl from a tool response — no values without provenance.`,
`- Std 7: assign honest confidence; lower it when you had to guess between concepts or when the period`,
` matched a non-annual filing.`,
`- Std 12: if a tool errors, do not retry blindly — log it in comparabilityNotes and move on.`,
].join('\n');
}
/**
* Route an Anthropic tool_use block to the correct executor based on
* its name's prefix. The tool sets are non-overlapping by design —
* sec_edgar_*, sec_financials/submissions/filing_document → core;
* sec_filing_* → filings; sec_company_concept / sec_xbrl_* → xbrl;
* sec_insider_* → insider. Unknown names fall through to the core
* executor which returns a structured `unknown-tool` error.
*/
async function dispatchSecTool(name: string, input: unknown) {
if (
name === 'sec_filing_index' ||
name === 'sec_filing_sections' ||
name === 'sec_full_text_search'
) {
return executeSecFilingTool(name, input);
}
if (name === 'sec_company_concept' || name === 'sec_xbrl_frames') {
return executeSecXbrlTool(name, input);
}
if (name === 'sec_insider_form4') {
return executeSecInsiderTool(name, input);
}
return executeSecTool(name, input);
}
function summarizeToolResult(name: string, ok: boolean, result: unknown): string {
if (!ok) return 'error';
try {
if (name === 'sec_edgar_companies' && Array.isArray(result)) {
return `${result.length} match(es): ${result
.slice(0, 6)
.map(r => (r as { ticker?: string; cik?: string }).ticker ?? (r as { cik?: string }).cik ?? '?')
.join(', ')}`;
}
if (name === 'sec_financials') {
const r = result as { entityName?: string; facts?: Record<string, unknown>; conceptsFilter?: string[] };
const taxonomies = r.facts ? Object.keys(r.facts) : [];
let conceptCount = 0;
if (r.facts) {
for (const t of Object.values(r.facts)) {
conceptCount += Object.keys((t as Record<string, unknown>) ?? {}).length;
}
}
return `${r.entityName ?? '?'} — ${conceptCount} concept(s) across [${taxonomies.join(', ')}]${
r.conceptsFilter ? ` filter=[${r.conceptsFilter.join(',')}]` : ''
}`;
}
if (name === 'sec_submissions') {
const r = result as { entityName?: string; filings?: unknown[] };
return `${r.entityName ?? '?'} — ${r.filings?.length ?? 0} filing(s)`;
}
if (name === 'sec_filing_document') {
const r = result as { url?: string; text?: string; truncated?: boolean };
return `${r.url ?? '?'} — ${(r.text ?? '').length} chars${r.truncated ? ' (truncated)' : ''}`;
}
if (name === 'sec_filing_index') {
const r = result as { directoryName?: string; files?: unknown[] };
return `${r.directoryName ?? '?'} — ${r.files?.length ?? 0} file(s)`;
}
if (name === 'sec_filing_sections') {
const r = result as { url?: string; sections?: { item?: string; charCount?: number }[]; fullCharCount?: number };
const items = (r.sections ?? []).map(s => s.item ?? '?').join(',');
return `${r.url ?? '?'} — items=[${items}] (full ${r.fullCharCount ?? 0} chars)`;
}
if (name === 'sec_full_text_search') {
const r = result as { totalHits?: number; hits?: unknown[]; query?: string };
return `q="${r.query ?? ''}" — ${r.totalHits ?? 0} total, ${r.hits?.length ?? 0} returned`;
}
if (name === 'sec_company_concept') {
const r = result as { tag?: string; label?: string; units?: Record<string, unknown[]> };
const units = r.units ? Object.keys(r.units) : [];
const total = r.units ? Object.values(r.units).reduce((n, arr) => n + (Array.isArray(arr) ? arr.length : 0), 0) : 0;
return `${r.tag ?? '?'} (${r.label ?? '?'}) — ${total} row(s) across units [${units.join(',')}]`;
}
if (name === 'sec_xbrl_frames') {
const r = result as { tag?: string; unit?: string; period?: string; pts?: number; rows?: unknown[] };
return `${r.tag ?? '?'}/${r.unit ?? '?'}/${r.period ?? '?'} — ${r.pts ?? 0} reporters, ${r.rows?.length ?? 0} rows`;
}
if (name === 'sec_insider_form4') {
const r = result as { cik?: string; detail?: boolean; filings?: unknown[] };
return `CIK ${r.cik ?? '?'} — ${r.filings?.length ?? 0} Form 4 filing(s)${r.detail ? ' (parsed)' : ''}`;
}
return 'ok';
} catch {
return 'ok';
}
}
function jsonResponseFromText(text: string): unknown {
const cleaned = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
try { return JSON.parse(cleaned); } catch { /* fall through */ }
const m = cleaned.match(/\{[\s\S]*\}/);
if (!m) return null;
try { return JSON.parse(m[0]); } catch { return null; }
}
/**
* Drive the source-extraction tool-use loop end-to-end.
*
* Returns a structured ExtractWithToolsOutput on success, with a
* per-tool trace the agent can fold into the audit trail (Std 6 + Std
* 10), or a typed LlmFailure on infrastructure failure.
*
* Std 12: never throws. Every failure mode is a structured result.
*/
export async function extractWithSecTools(
req: ExtractRequest,
onToolCall?: (t: ToolCallTrace) => void,
): Promise<LlmResult<ExtractWithToolsOutput>> {
if (!client) {
return {
ok: false,
failure: {
category: 'needs-api-key',
reason: 'Source/Extraction requires the LLM but ANTHROPIC_API_KEY is not configured.',
hint: 'Set ANTHROPIC_API_KEY and rerun; this agent does not fake runs.',
},
};
}
const tools = ANTHROPIC_TOOLS;
const system = buildSystemPrompt();
const messages: MessageParam[] = [{ role: 'user', content: buildUserMessage(req) }];
const toolCalls: ToolCallTrace[] = [];
let finalText = '';
for (let iter = 0; iter < MAX_TOOL_ITERATIONS; iter++) {
let resp;
try {
resp = await client.messages.create({
model: MODEL,
max_tokens: MAX_TOKENS_PER_TURN,
system,
tools,
messages,
});
} catch (err) {
return {
ok: false,
failure: { category: 'sdk-error', reason: err instanceof Error ? err.message : String(err) },
};
}
recordUsage('baseline.source-extraction', MODEL, resp.usage.input_tokens, resp.usage.output_tokens);
messages.push({ role: 'assistant', content: resp.content as ContentBlock[] });
if (resp.stop_reason !== 'tool_use') {
const textBlock = resp.content.find((b): b is TextBlock => b.type === 'text');
finalText = textBlock ? textBlock.text : '';
break;
}
const toolUses = resp.content.filter((b): b is ToolUseBlock => b.type === 'tool_use');
const toolResults: { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }[] = [];
for (const tu of toolUses) {
const result = await dispatchSecTool(tu.name, tu.input);
const summary = summarizeToolResult(tu.name, result.ok, result.result);
const trace: ToolCallTrace = {
toolName: tu.name,
input: (tu.input ?? {}) as Record<string, unknown>,
ok: result.ok,
resultSummary: summary,
errorMessage: result.error?.message,
at: new Date().toISOString(),
};
toolCalls.push(trace);
onToolCall?.(trace);
const payload = result.ok
? JSON.stringify(result.result)
: JSON.stringify({ error: result.error });
toolResults.push({
type: 'tool_result',
tool_use_id: tu.id,
content: payload,
is_error: !result.ok,
});
}
messages.push({ role: 'user', content: toolResults });
}
if (!finalText) {
return {
ok: false,
failure: {
category: 'tool-loop-overrun',
reason: `Tool-use loop exceeded ${MAX_TOOL_ITERATIONS} iterations without an end_turn.`,
},
};
}
const json = jsonResponseFromText(finalText);
const parsed = extractResponseSchema.safeParse(json);
if (!parsed.success) {
return {
ok: false,
failure: {
category: 'invalid-response',
reason: `final response did not match schema: ${parsed.error.message}`,
},
};
}
return {
ok: true,
value: {
hits: parsed.data.hits,
comparabilityNotes: parsed.data.comparabilityNotes,
toolCalls,
},
};
}