/**
* Normalization — input and output schemas (Std 2 + Std 11).
*
* Input: the Source/Extraction Structured Payload.
* Output: analytics-ready normalized records, with raw values preserved
* alongside the canonical ones (Std 4).
*/
import { z } from 'zod';
import { extractedValueSchema, comparabilityNoteSchema } from '../source-extraction/schema.js';
export const normalizationInputSchema = z.object({
values: z.array(extractedValueSchema),
comparabilityNotes: z.array(comparabilityNoteSchema),
});
export type NormalizationInput = z.infer<typeof normalizationInputSchema>;
export const normalizedRecordSchema = z.object({
canonicalEntity: z.string(),
canonicalMetric: z.string(),
period: z.string(),
/** Value expressed in `canonicalUnit`, or null if conversion failed. */
value: z.number().nullable(),
canonicalUnit: z.string(),
/** Raw values preserved alongside (Std 4). */
rawEntity: z.string(),
rawLabel: z.string(),
rawValue: z.number().nullable(),
rawUnit: z.string().nullable(),
/** Provenance (Std 4). */
sourceUrl: z.string(),
capturedAt: z.string(),
/** Std 3: every applied rule recorded for explainability. */
appliedRules: z.array(z.string()),
/** Per-record confidence (Std 7). */
confidence: z.number().min(0).max(1),
/** Free-form flags (Std 8 trigger surface). */
flags: z.array(z.string()),
});
export type NormalizedRecord = z.infer<typeof normalizedRecordSchema>;
/** Std 10 — surface for the orchestrator's write-back. */
export const learnedRuleSchema = z.object({
key: z.string(), // raw label (normalized to a lookup key)
value: z.string(), // canonical metric or canonical entity
});
export type LearnedRule = z.infer<typeof learnedRuleSchema>;
export const normalizationOutputSchema = z.object({
records: z.array(normalizedRecordSchema),
learnedRules: z.array(learnedRuleSchema),
});
export type NormalizationOutput = z.infer<typeof normalizationOutputSchema>;