BID · Console
Baseline · Intelligence · Decision
src/agents/baseline/source-extraction/schema.ts 2,393 bytes · typescript
/**
 * Source/Extraction — input and output schemas (Std 2 + Std 11).
 *
 * Input: the JobRequest (Std 2 — first agent of the pipeline).
 * Output: a Structured Payload of extracted values + comparability notes.
 */

import { z } from 'zod';
import { jobRequestSchema } from '../../../types.js';

export const sourceExtractionInputSchema = jobRequestSchema;
export type SourceExtractionInput = z.infer<typeof sourceExtractionInputSchema>;

/** A single extracted value with full provenance. */
export const extractedValueSchema = z.object({
  /** Original entity id from the JobRequest. */
  entity: z.string(),
  /** Target metric key from the JobRequest. */
  metricKey: z.string(),
  /** The period this value covers (echoed from JobRequest). */
  period: z.string(),
  /** Label as it appeared in the source (raw, not normalized). */
  rawLabel: z.string(),
  /** Best-effort numeric value, in the unit reported by `rawUnit`. */
  value: z.number().nullable(),
  /** Unit as observed (e.g. "USD_MM", "EUR_MM"); null if unknown. */
  rawUnit: z.string().nullable(),
  /** Substring of the source the value was extracted from (audit, Std 4). */
  snippet: z.string(),
  /** Canonical URL of the upstream document. */
  sourceUrl: z.string(),
  /** Connector that produced the payload. */
  sourceConnector: z.string(),
  /** Source content type (e.g. "text/html"). */
  contentType: z.string(),
  /** ISO timestamp the source payload was captured (Std 4 — provenance). */
  capturedAt: z.string(),
  /** 0-1 confidence the extractor assigned to this value. */
  confidence: z.number().min(0).max(1),
  /** How the value was located: 'llm' (judgment) or 'fail' (no extractor). */
  origin: z.enum(['llm', 'fail']),
  /** Free-form flags surfaced by the extractor (e.g. "restatement-mentioned"). */
  flags: z.array(z.string()),
});
export type ExtractedValue = z.infer<typeof extractedValueSchema>;

/** Comparability notes flagged for the Normalization agent. */
export const comparabilityNoteSchema = z.object({
  entities: z.array(z.string()),
  detail: z.string(),
});
export type ComparabilityNote = z.infer<typeof comparabilityNoteSchema>;

export const sourceExtractionOutputSchema = z.object({
  values: z.array(extractedValueSchema),
  comparabilityNotes: z.array(comparabilityNoteSchema),
});
export type SourceExtractionOutput = z.infer<typeof sourceExtractionOutputSchema>;