/**
* Source/Extraction agent — matrix row.
*
* The 12 per-agent fill-ins for the universal standards in
* src/standards.ts. The exported `sourceExtractionMatrix` carries both
* the human-readable matrix text (used by the prompt builder, Std 3 +
* Std 6) and the typed structural views the orchestrator consumes via
* `sourceExtractionContract` (Std 1, 4, 5, 6, 8, 10).
*
* No domain knowledge anywhere — the agent is universal. Domain-
* specific extraction targets arrive at runtime via the JobRequest.
*/
import {
type AgentRules,
type AgentStandardsContract,
BASELINE_FORBIDDEN,
type Capability,
type RunbookStep,
type TriggerCategory,
type WriteBackDeclaration,
} from '../../../standards.js';
export const AGENT_NAME = 'baseline.source-extraction';
export const AGENT_VERSION = '1.0.0';
/** The 12 matrix rows, verbatim from the spec. */
export const sourceExtractionMatrix = {
'1_objective':
'Acquire and structure source data for downstream analysis from approved sources.',
'2_inputs':
'Business objective, entities, time period, source scope, required data.',
'3_decisionLogic':
'Determine source category, retrieval method, parsing approach.',
'4_rulesConstraints':
'Approved sources only, preserve lineage, no fabrication.',
'5_methodsTools':
'Retrieval, API, web, parser, OCR, repository tools.',
'6_processing':
'Retrieve, parse, extract, structure.',
'7_validation':
'Completeness, lineage, confidence, validation status.',
'8_conditionalTriggers':
'Missing data, ambiguity, duplicate, low confidence.',
'9_hitlEscalation':
'Unreachable source, unresolved ambiguity or critical validation failure.',
'10_repositoryWriteBack':
'Raw payload, outputs, metadata, lineage, validation, confidence.',
'11_handoff':
'Payload + lineage + validation → Structured Payload for next agent.',
'12_failureHandling':
'Stop on unrecoverable extraction, no infinite retry, preserve failure context.',
} as const;
/* ------------------------------------------------------------------ *
* Typed structural views derived from the matrix. The orchestrator
* consumes `sourceExtractionContract`; the agent's `execute` walks the
* `runbook`.
* ------------------------------------------------------------------ */
const capabilities: readonly Capability[] = [
'retrieval', 'api', 'web', 'parser', 'ocr', 'repository',
];
const runbook: readonly RunbookStep[] = [
{ n: 1, name: 'validate-input', description: 'Validate the JobRequest input against the schema (Std 2).' },
{ n: 2, name: 'retrieve', description: 'Retrieve raw payloads from approved connectors for each (entity, source) pair (Std 4, Std 5).' },
{ n: 3, name: 'parse-and-extract', description: 'Parse each raw payload and extract target metric values (LLM judgment per Std 3 + Std 5).' },
{ n: 4, name: 'structure', description: 'Stamp provenance and assemble structured records (Std 4).' },
{ n: 5, name: 'validate-output', description: 'Validate completeness, lineage, confidence, and validation status (Std 7).' },
{ n: 6, name: 'handoff', description: 'Package the Structured Payload for the next agent (Std 11).' },
];
const triggers: readonly TriggerCategory[] = [
'missing-data', 'ambiguity', 'duplicate-records', 'low-confidence',
'failed-retrieval', 'parsing-failure', 'source-mismatch',
];
const writeBack: WriteBackDeclaration = {
structuredOutputs: true,
metadata: true,
lineage: true,
validation: true,
confidence: true,
exceptionLogs: true,
learnedRules: false,
humanOverrides: false,
};
const rules: AgentRules = {
preserveRawSource: true,
preserveLineage: true,
preserveAuditability: true,
forbidFabrication: true,
forbidDestructiveOverwrite: true,
approvedToolsOnly: true,
pillarSpecificForbidden: BASELINE_FORBIDDEN,
};
export const sourceExtractionContract: AgentStandardsContract = {
agentName: AGENT_NAME,
agentVersion: AGENT_VERSION,
pillar: 'baseline',
objective: {
does: sourceExtractionMatrix['1_objective'],
produces: 'A provenance-stamped Structured Payload of extracted values for the Normalization agent.',
doesNot: [
'normalize values to a canonical taxonomy',
'resolve duplicates or contradictions',
'fabricate values not present in the source',
],
downstreamPurpose: 'Feed the Normalization agent with raw, lineage-stamped inputs.',
},
rules,
capabilities,
runbook,
triggers,
writeBack,
};