BID · Console
Baseline · Intelligence · Decision
src/agents/baseline/source-extraction/matrix.ts 4,499 bytes · typescript
/**
 * Source/Extraction agent — matrix row.
 *
 * The 12 per-agent fill-ins for the universal standards in
 * src/standards.ts. The exported `sourceExtractionMatrix` carries both
 * the human-readable matrix text (used by the prompt builder, Std 3 +
 * Std 6) and the typed structural views the orchestrator consumes via
 * `sourceExtractionContract` (Std 1, 4, 5, 6, 8, 10).
 *
 * No domain knowledge anywhere — the agent is universal. Domain-
 * specific extraction targets arrive at runtime via the JobRequest.
 */

import {
  type AgentRules,
  type AgentStandardsContract,
  BASELINE_FORBIDDEN,
  type Capability,
  type RunbookStep,
  type TriggerCategory,
  type WriteBackDeclaration,
} from '../../../standards.js';

export const AGENT_NAME = 'baseline.source-extraction';
export const AGENT_VERSION = '1.0.0';

/** The 12 matrix rows, verbatim from the spec. */
export const sourceExtractionMatrix = {
  '1_objective':
    'Acquire and structure source data for downstream analysis from approved sources.',
  '2_inputs':
    'Business objective, entities, time period, source scope, required data.',
  '3_decisionLogic':
    'Determine source category, retrieval method, parsing approach.',
  '4_rulesConstraints':
    'Approved sources only, preserve lineage, no fabrication.',
  '5_methodsTools':
    'Retrieval, API, web, parser, OCR, repository tools.',
  '6_processing':
    'Retrieve, parse, extract, structure.',
  '7_validation':
    'Completeness, lineage, confidence, validation status.',
  '8_conditionalTriggers':
    'Missing data, ambiguity, duplicate, low confidence.',
  '9_hitlEscalation':
    'Unreachable source, unresolved ambiguity or critical validation failure.',
  '10_repositoryWriteBack':
    'Raw payload, outputs, metadata, lineage, validation, confidence.',
  '11_handoff':
    'Payload + lineage + validation → Structured Payload for next agent.',
  '12_failureHandling':
    'Stop on unrecoverable extraction, no infinite retry, preserve failure context.',
} as const;

/* ------------------------------------------------------------------ *
 * Typed structural views derived from the matrix. The orchestrator
 * consumes `sourceExtractionContract`; the agent's `execute` walks the
 * `runbook`.
 * ------------------------------------------------------------------ */

const capabilities: readonly Capability[] = [
  'retrieval', 'api', 'web', 'parser', 'ocr', 'repository',
];

const runbook: readonly RunbookStep[] = [
  { n: 1, name: 'validate-input', description: 'Validate the JobRequest input against the schema (Std 2).' },
  { n: 2, name: 'retrieve', description: 'Retrieve raw payloads from approved connectors for each (entity, source) pair (Std 4, Std 5).' },
  { n: 3, name: 'parse-and-extract', description: 'Parse each raw payload and extract target metric values (LLM judgment per Std 3 + Std 5).' },
  { n: 4, name: 'structure', description: 'Stamp provenance and assemble structured records (Std 4).' },
  { n: 5, name: 'validate-output', description: 'Validate completeness, lineage, confidence, and validation status (Std 7).' },
  { n: 6, name: 'handoff', description: 'Package the Structured Payload for the next agent (Std 11).' },
];

const triggers: readonly TriggerCategory[] = [
  'missing-data', 'ambiguity', 'duplicate-records', 'low-confidence',
  'failed-retrieval', 'parsing-failure', 'source-mismatch',
];

const writeBack: WriteBackDeclaration = {
  structuredOutputs: true,
  metadata: true,
  lineage: true,
  validation: true,
  confidence: true,
  exceptionLogs: true,
  learnedRules: false,
  humanOverrides: false,
};

const rules: AgentRules = {
  preserveRawSource: true,
  preserveLineage: true,
  preserveAuditability: true,
  forbidFabrication: true,
  forbidDestructiveOverwrite: true,
  approvedToolsOnly: true,
  pillarSpecificForbidden: BASELINE_FORBIDDEN,
};

export const sourceExtractionContract: AgentStandardsContract = {
  agentName: AGENT_NAME,
  agentVersion: AGENT_VERSION,
  pillar: 'baseline',
  objective: {
    does: sourceExtractionMatrix['1_objective'],
    produces: 'A provenance-stamped Structured Payload of extracted values for the Normalization agent.',
    doesNot: [
      'normalize values to a canonical taxonomy',
      'resolve duplicates or contradictions',
      'fabricate values not present in the source',
    ],
    downstreamPurpose: 'Feed the Normalization agent with raw, lineage-stamped inputs.',
  },
  rules,
  capabilities,
  runbook,
  triggers,
  writeBack,
};