Code · src/agents/baseline/normalization/schema.ts

src/agents/baseline/normalization/schema.ts 1,965 bytes · typescript
/**
 * Normalization — input and output schemas (Std 2 + Std 11).
 *
 * Input: the Source/Extraction Structured Payload.
 * Output: analytics-ready normalized records, with raw values preserved
 * alongside the canonical ones (Std 4).
 */

import { z } from 'zod';
import { extractedValueSchema, comparabilityNoteSchema } from '../source-extraction/schema.js';

export const normalizationInputSchema = z.object({
  values: z.array(extractedValueSchema),
  comparabilityNotes: z.array(comparabilityNoteSchema),
});
export type NormalizationInput = z.infer<typeof normalizationInputSchema>;

export const normalizedRecordSchema = z.object({
  canonicalEntity: z.string(),
  canonicalMetric: z.string(),
  period: z.string(),
  /** Value expressed in `canonicalUnit`, or null if conversion failed. */
  value: z.number().nullable(),
  canonicalUnit: z.string(),
  /** Raw values preserved alongside (Std 4). */
  rawEntity: z.string(),
  rawLabel: z.string(),
  rawValue: z.number().nullable(),
  rawUnit: z.string().nullable(),
  /** Provenance (Std 4). */
  sourceUrl: z.string(),
  capturedAt: z.string(),
  /** Std 3: every applied rule recorded for explainability. */
  appliedRules: z.array(z.string()),
  /** Per-record confidence (Std 7). */
  confidence: z.number().min(0).max(1),
  /** Free-form flags (Std 8 trigger surface). */
  flags: z.array(z.string()),
});
export type NormalizedRecord = z.infer<typeof normalizedRecordSchema>;

/** Std 10 — surface for the orchestrator's write-back. */
export const learnedRuleSchema = z.object({
  key: z.string(),     // raw label (normalized to a lookup key)
  value: z.string(),   // canonical metric or canonical entity
});
export type LearnedRule = z.infer<typeof learnedRuleSchema>;

export const normalizationOutputSchema = z.object({
  records: z.array(normalizedRecordSchema),
  learnedRules: z.array(learnedRuleSchema),
});
export type NormalizationOutput = z.infer<typeof normalizationOutputSchema>;