/**
* Normalization agent — runtime entry point.
*
* Walks the matrix-defined runbook (Std 6). Steps 1, 4 (when learned
* rules apply), 5, 6, 7 are deterministic; steps 2/3 fall back to the
* LLM for novel labels / entities (Std 3 — judgment is explicit).
*/
import {
type AgentResult,
type HITLEscalation,
LOW_CONFIDENCE_THRESHOLD,
makeConfidence,
} from '../../../standards.js';
import type {
ExecutionContext,
FailureObject,
Handoff,
JobRequest,
Lineage,
UnresolvedIssue,
} from '../../../types.js';
import { nowIso } from '../../../types.js';
import {
AGENT_NAME,
AGENT_VERSION,
normalizationContract,
} from './matrix.js';
import {
type NormalizationInput,
type NormalizationOutput,
type NormalizedRecord,
type LearnedRule,
normalizationInputSchema,
} from './schema.js';
import { LearnedMappingStore, normalizeKey } from './storage.js';
import { convertUnit, mapTerminology } from './llm.js';
export { normalizationContract } from './matrix.js';
export type { NormalizationOutput } from './schema.js';
export interface NormalizationSideContext {
readonly jobRequest: JobRequest;
readonly upstreamLineage: Lineage;
}
function trace(ctx: ExecutionContext, standard: number, step: string, detail: string): void {
ctx.trace.push({ agent: AGENT_NAME, standard, step, detail, at: nowIso() });
// eslint-disable-next-line no-console
console.log(` [${AGENT_NAME}][Std ${standard}] ${step} — ${detail}`);
}
function failure(
ctx: ExecutionContext,
category: FailureObject['category'],
reason: string,
context: Record<string, unknown>,
lineage: Lineage,
): FailureObject {
return {
agent: AGENT_NAME,
agentVersion: AGENT_VERSION,
category,
reason,
context,
lineage,
attempts: ctx.retries,
recursionDepth: ctx.recursionDepth,
occurredAt: nowIso(),
};
}
export async function runNormalization(
rawInput: unknown,
side: NormalizationSideContext,
ctx: ExecutionContext,
): Promise<AgentResult<NormalizationOutput>> {
/* Step 1 (Std 2): validate-input. */
const parsed = normalizationInputSchema.safeParse(rawInput);
if (!parsed.success) {
return {
ok: false,
escalations: [],
failure: failure(ctx, 'invalid-input', 'Normalization input failed schema validation.', { issues: parsed.error.issues }, side.upstreamLineage),
};
}
const input: NormalizationInput = parsed.data;
trace(ctx, 2, normalizationContract.runbook[0]!.name,
`received ${input.values.length} value(s); ${input.comparabilityNotes.length} comparability note(s)`);
/* Load learned mappings + JobRequest-supplied entities (Std 10). */
const store = new LearnedMappingStore();
await store.load();
/* Seed entity lookups from JobRequest.entities. */
for (const e of side.jobRequest.entities) {
store.learnEntity(e.id, e.id, 'seed', 1);
for (const a of e.aliases) store.learnEntity(a, e.id, 'seed', 1);
}
/* Seed metric lookups from JobRequest.targetMetrics + seedMappings. */
for (const m of side.jobRequest.targetMetrics) {
store.learnMetric(m.key, m.key, 'seed', 1);
}
for (const sm of side.jobRequest.seedMappings ?? []) {
store.learnMetric(sm.sourceLabel, sm.targetKey, 'seed', 1);
}
const records: NormalizedRecord[] = [];
const unresolved: UnresolvedIssue[] = [];
const escalations: HITLEscalation[] = [];
const targetUnit = side.jobRequest.targetMetrics[0]?.unit ?? '';
for (const v of input.values) {
const appliedRules: string[] = [];
const flags = [...v.flags];
/* Step 2 (Std 5): normalize-entities — learned lookup first; LLM if novel. */
let canonicalEntity = v.entity;
const eHit = store.lookupEntity(v.entity);
if (eHit) {
canonicalEntity = eHit.canonical;
appliedRules.push(`entity-resolver:${eHit.by}->${eHit.canonical}`);
}
/* Step 3 (Std 3 + Std 5): normalize-terminology — learned lookup first; LLM for novel. */
let canonicalMetric: string | null = null;
let metricConfidence = 0.9;
const mHit = store.lookupMetric(v.rawLabel);
if (mHit) {
canonicalMetric = mHit.canonical;
metricConfidence = mHit.by === 'seed' ? 0.95 : Math.max(0.8, mHit.confidence);
appliedRules.push(`taxonomy:${mHit.by}->${mHit.canonical}`);
} else if (
/* Std 6 — cost-appropriate execution: when Source/Extraction has
* already confidently tied this rawLabel to a target metric
* (the upstream selected rawLabel specifically to satisfy that
* metricKey), the mapping is established. Record the inherited
* determination and proceed — no LLM call. Std 7 — confidence
* inherits because this agent introduces no new uncertainty. */
v.metricKey
&& v.confidence >= LOW_CONFIDENCE_THRESHOLD
&& side.jobRequest.targetMetrics.some(m => m.key === v.metricKey)
) {
canonicalMetric = v.metricKey;
metricConfidence = v.confidence;
appliedRules.push(`taxonomy:inherited-from-source-extraction->${v.metricKey}`);
store.learnMetric(
v.rawLabel,
v.metricKey,
'lookup',
v.confidence,
`Inherited from Source/Extraction (Std 6/7): rawLabel "${v.rawLabel}" was chosen specifically to satisfy target metric "${v.metricKey}" at upstream confidence ${v.confidence.toFixed(2)}.`,
);
} else {
/* Std 3: judgment step — defer to LLM with citation. Reserved
* for genuinely novel or ambiguous labels (no learned rule and
* no confident upstream determination). */
const r = await mapTerminology({
rawLabel: v.rawLabel,
entity: v.entity,
vocabulary: side.jobRequest.targetMetrics.map(m => ({ key: m.key, definition: m.definition })),
});
if (r.ok) {
canonicalMetric = r.value.canonical;
metricConfidence = r.value.confidence;
appliedRules.push(`ai-with-citation:${r.value.rationale}`);
if (canonicalMetric) {
store.learnMetric(v.rawLabel, canonicalMetric, 'ai', metricConfidence, r.value.rationale);
}
} else {
unresolved.push({
category: r.failure.category === 'needs-api-key' ? 'unmapped-term' : 'unmapped-term',
detail: `could not map "${v.rawLabel}" for ${v.entity}: ${r.failure.reason}`,
blocking: false,
context: { failure: r.failure },
});
if (r.failure.category === 'needs-api-key') {
escalations.push({
agent: AGENT_NAME,
reason: 'unresolved-mapping',
failureContext: `no learned rule for "${v.rawLabel}" and LLM unavailable`,
lineage: side.upstreamLineage,
validation: {
status: 'review',
confidence: makeConfidence(0, 'LLM unavailable'),
checks: [{ name: 'mapping-available', passed: false }],
},
recommendedReviewer: 'domain-expert',
raisedAt: nowIso(),
});
}
continue;
}
}
if (!canonicalMetric) {
unresolved.push({
category: 'unmapped-term',
detail: `LLM declined to map "${v.rawLabel}"`,
blocking: false,
});
continue;
}
/* Step 4 (Std 5): normalize-units. Deterministic identity-pass when units match;
* defer to LLM with citation only when they differ. */
let canonicalValue: number | null = v.value;
let canonicalUnit: string = v.rawUnit ?? targetUnit;
if (v.value !== null && v.rawUnit) {
if (v.rawUnit === targetUnit || targetUnit === '') {
canonicalUnit = v.rawUnit;
appliedRules.push(`unit:identity:${v.rawUnit}`);
} else {
const conv = await convertUnit({ rawValue: v.value, rawUnit: v.rawUnit, targetUnit });
if (conv.ok && conv.value.convertedValue !== null) {
canonicalValue = conv.value.convertedValue;
canonicalUnit = targetUnit;
appliedRules.push(`unit:llm:${conv.value.rationale}`);
} else {
flags.push('unit-conversion-failed');
canonicalValue = null;
canonicalUnit = v.rawUnit;
unresolved.push({
category: 'ontology-conflict',
detail: `cannot convert ${v.rawUnit} to ${targetUnit}: ${conv.ok ? conv.value.rationale : conv.failure.reason}`,
blocking: false,
});
}
}
} else if (v.value === null) {
flags.push('value-missing');
}
/* Std 8 triggers: per-record flags. */
if (metricConfidence < LOW_CONFIDENCE_THRESHOLD) flags.push('low-confidence-mapping');
records.push({
canonicalEntity,
canonicalMetric,
period: v.period,
value: canonicalValue,
canonicalUnit,
rawEntity: v.entity,
rawLabel: v.rawLabel,
rawValue: v.value,
rawUnit: v.rawUnit,
sourceUrl: v.sourceUrl,
capturedAt: v.capturedAt,
appliedRules,
confidence: metricConfidence,
flags,
});
}
/* Step 5 (Std 8): resolve-duplicates — detect contradictions. */
const byKey = new Map<string, NormalizedRecord[]>();
for (const r of records) {
const k = `${r.canonicalEntity}::${r.canonicalMetric}::${r.period}`;
if (!byKey.has(k)) byKey.set(k, []);
byKey.get(k)!.push(r);
}
for (const [key, group] of byKey) {
if (group.length <= 1) continue;
const nonNull = group.filter(r => r.value !== null);
if (nonNull.length <= 1) continue;
const first = nonNull[0]!.value!;
const contradiction = nonNull.some(r => Math.abs(r.value! - first) / Math.max(1, Math.abs(first)) > 0.02);
if (contradiction) {
for (const r of group) r.flags.push('contradictory-mapping');
unresolved.push({
category: 'contradictory-mapping',
detail: `multiple values for ${key}: ${nonNull.map(r => r.value).join(', ')}`,
blocking: false,
context: { key, values: nonNull.map(r => ({ value: r.value, source: r.sourceUrl })) },
});
}
}
trace(ctx, 8, normalizationContract.runbook[4]!.name,
`dedup complete; ${unresolved.filter(u => u.category === 'contradictory-mapping').length} contradiction(s)`);
/* Std 10: drain learned rules for write-back. */
const learnedDrain = store.drainLearned();
const learnedRules: LearnedRule[] = learnedDrain
.filter(e => e.by !== 'seed')
.map(e => ({ key: e.key, value: e.canonical }));
if (learnedDrain.length > 0) await store.save();
trace(ctx, 10, normalizationContract.runbook[5]!.name,
`${learnedRules.length} new rule(s) drained for write-back`);
/* Step 6 (Std 7): validate-output. */
const avgConfidence = records.length === 0
? 0
: records.reduce((s, r) => s + r.confidence, 0) / records.length;
const blocking = unresolved.filter(u => u.blocking).length;
const confidence = makeConfidence(
Math.max(0, avgConfidence - 0.1 * blocking),
`avg per-record mapping confidence ${avgConfidence.toFixed(2)} with ${blocking} blocking issue(s)`,
);
if (records.length === 0) {
return {
ok: false,
escalations,
failure: failure(
ctx,
'unresolved-normalization',
'No values could be normalized.',
{ input, unresolved },
side.upstreamLineage,
),
};
}
/* Step 7 (Std 11): handoff. */
const lineage: Lineage = {
sourceUrl: side.upstreamLineage.sourceUrl,
capturedAt: nowIso(),
effectiveAs: side.upstreamLineage.effectiveAs,
agentVersion: AGENT_VERSION,
upstream: Array.from(new Set([...side.upstreamLineage.upstream, ...records.map(r => r.sourceUrl)])),
};
const validationStatus =
blocking > 0 ? 'review' : confidence.value < LOW_CONFIDENCE_THRESHOLD ? 'flagged' : 'passed';
const handoff: Handoff<NormalizationOutput> = {
fromAgent: AGENT_NAME,
fromAgentVersion: AGENT_VERSION,
toAgent: 'baseline.resolution',
payload: { records, learnedRules },
metadata: {
analysisId: ctx.analysisId,
capabilities: normalizationContract.capabilities,
targetUnit,
},
confidence,
validation: {
status: validationStatus,
checks: [
{ name: 'records-produced', passed: records.length > 0, detail: `${records.length}` },
{ name: 'no-contradictions', passed: records.every(r => !r.flags.includes('contradictory-mapping')) },
{ name: 'units-canonical', passed: targetUnit === '' || records.every(r => r.canonicalUnit === targetUnit || r.value === null) },
],
},
unresolvedIssues: unresolved,
lineage,
timestamp: nowIso(),
};
trace(ctx, 11, normalizationContract.runbook[6]!.name,
`handoff → ${handoff.toAgent} (validation=${validationStatus} confidence=${confidence.tier})`);
return { ok: true, handoff, escalations };
}