Code · src/agents/decision/output-ingestion/index.ts

src/agents/decision/output-ingestion/index.ts 16,276 bytes · typescript
/**
 * Output Ingestion & Interpretation — runtime entry point.
 *
 * Two-phase execution per spec §Std 6 cost-appropriate execution:
 *
 *   Phase 1 (deterministic) — Rule discovery
 *     For each insight, query the Rule Library (`find_rules`) by
 *     triggers derived from the insight's framework + claim. Resolve
 *     multi-match precedence here, not in the LLM. If zero rules
 *     match for an insight, emit a structured rule-gap escalation
 *     immediately — no LLM round-trip needed to decide "no rule".
 *
 *   Phase 2 (LLM, only when needed) — Rule application
 *     Once per run, send all insights with their pre-resolved
 *     candidate rules to the LLM in a single batch. The LLM applies
 *     each rule's natural-language conditions to the insight and
 *     produces structured recommendations.
 *
 * Post-LLM cross-checks (Std 4 + Std 7): every recommendation must
 * reference a real rule_id and a real upstream insightId; confidence
 * below LOW_CONFIDENCE_THRESHOLD downgrades validation to 'flagged'.
 */

import {
  type AgentResult,
  type HITLEscalation,
  LOW_CONFIDENCE_THRESHOLD,
  makeConfidence,
} from '../../../standards.js';
import type {
  ExecutionContext,
  FailureObject,
  Handoff,
  JobRequest,
  Lineage,
  UnresolvedIssue,
} from '../../../types.js';
import { nowIso } from '../../../types.js';
import {
  findRules,
  resolvePrecedence,
  type Rule,
} from '../../../decision/library.js';
import {
  AGENT_NAME,
  AGENT_VERSION,
  outputIngestionContract,
} from './matrix.js';
import {
  type OutputIngestionInput,
  type OutputIngestionOutput,
  outputIngestionInputSchema,
} from './schema.js';
import {
  interpretFindings,
  MODEL_NAME,
  TOOL_COUNT,
  TOOL_NAMES,
  type InterpretationCandidate,
  type ToolCallTrace,
} from './llm.js';

export { outputIngestionContract } from './matrix.js';
export type { OutputIngestionOutput } from './schema.js';

export interface OutputIngestionSideContext {
  readonly jobRequest: JobRequest;
  readonly upstreamLineage: Lineage;
}

function trace(ctx: ExecutionContext, standard: number, step: string, detail: string): void {
  ctx.trace.push({ agent: AGENT_NAME, standard, step, detail, at: nowIso() });
  // eslint-disable-next-line no-console
  console.log(`  [${AGENT_NAME}][Std ${standard}] ${step} — ${detail}`);
}

function failure(
  ctx: ExecutionContext,
  category: FailureObject['category'],
  reason: string,
  context: Record<string, unknown>,
  lineage: Lineage,
): FailureObject {
  return {
    agent: AGENT_NAME,
    agentVersion: AGENT_VERSION,
    category,
    reason,
    context,
    lineage,
    attempts: ctx.retries,
    recursionDepth: ctx.recursionDepth,
    occurredAt: nowIso(),
  };
}

/** Heuristic trigger generation from an insight. Triggers are
 *  case-insensitive substring matches in the rule library, so coarse
 *  keywords are fine — narrowness comes from the rule's own trigger
 *  declarations. */
function triggersFromInsight(insight: {
  frameworkUsed: string;
  claim: string;
}): string[] {
  const out = new Set<string>();
  const fw = insight.frameworkUsed.toLowerCase();
  if (fw) out.add(fw);
  if (fw.includes('peer')) out.add('peer');
  if (fw.includes('peer')) out.add('peer-positioning');
  if (fw.includes('trend')) out.add('trend');
  if (fw.includes('outlier')) out.add('outlier');
  if (fw.includes('comparison')) out.add('comparison');
  /* Light scan of the claim for additional structural hints. */
  const c = insight.claim.toLowerCase();
  if (c.includes('outperform') || c.includes('above')) out.add('outperformer');
  if (c.includes('underperform') || c.includes('below')) out.add('underperformer');
  if (c.includes('in line') || c.includes('parity') || c.includes('equivalent') || c.includes('comparable')) out.add('in-line');
  return [...out];
}

export async function runOutputIngestion(
  rawInput: unknown,
  side: OutputIngestionSideContext,
  ctx: ExecutionContext,
): Promise<AgentResult<OutputIngestionOutput>> {
  /* Step 1 (Std 2): receive-pillar2-output. */
  const parsed = outputIngestionInputSchema.safeParse(rawInput);
  if (!parsed.success) {
    return {
      ok: false,
      escalations: [],
      failure: failure(
        ctx,
        'invalid-input',
        'Output Ingestion input failed schema validation.',
        { issues: parsed.error.issues },
        side.upstreamLineage,
      ),
    };
  }
  const insightsPayload: OutputIngestionInput = parsed.data;
  trace(ctx, 2, outputIngestionContract.runbook[0]!.name,
    `received ${insightsPayload.insights.length} insight(s); ${insightsPayload.unsupportedClaimsRemoved.length} upstream removal(s); ${insightsPayload.appliedFrameworks.length} framework(s) used`);

  const unresolved: UnresolvedIssue[] = [];
  const escalations: HITLEscalation[] = [];
  const audienceTier = side.jobRequest.audience?.tier ?? 'decision_maker';
  trace(ctx, 2, outputIngestionContract.runbook[0]!.name,
    `audience tier: ${audienceTier}${side.jobRequest.audience ? '' : ' (default — no JobRequest.audience supplied)'}`);

  if (insightsPayload.insights.length === 0) {
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        'interpretation-blocked',
        'No insights supplied — nothing to interpret.',
        { input: insightsPayload },
        side.upstreamLineage,
      ),
    };
  }

  /* Step 2 (Std 5/6 — deterministic): find-rules per insight. */
  const candidates: InterpretationCandidate[] = [];
  const earlyRuleGaps: { sourceInsightId: string; reason: 'no-rule-matched'; detail: string; triedTriggers: string[] }[] = [];
  for (const ins of insightsPayload.insights) {
    const triggers = triggersFromInsight({ frameworkUsed: ins.frameworkUsed, claim: ins.claim });
    let matched: readonly Rule[] = [];
    if (triggers.length > 0) {
      const tries: Rule[][] = [];
      /* Try a single broad term first, then narrow combos — most
       * specific successful match is what `resolvePrecedence` orders. */
      for (const t of triggers) {
        const m = await findRules({ type: 'interpretation', agent: 'output_ingestion', triggers: [t] });
        if (m.length > 0) tries.push([...m]);
      }
      const flat = tries.flat();
      const unique = new Map<string, Rule>();
      for (const r of flat) unique.set(r.rule_id, r);
      matched = [...unique.values()];
    } else {
      matched = await findRules({ type: 'interpretation', agent: 'output_ingestion' });
    }
    matched = resolvePrecedence(matched);
    if (matched.length === 0) {
      earlyRuleGaps.push({
        sourceInsightId: ins.insightId,
        reason: 'no-rule-matched',
        detail: `no interpretation rule matched triggers [${triggers.join(', ')}] for insight "${ins.insightId}"`,
        triedTriggers: triggers,
      });
      unresolved.push({
        category: 'rule-gap',
        detail: `no interpretation rule for insight "${ins.insightId}" (framework=${ins.frameworkUsed})`,
        blocking: false,
      });
      escalations.push({
        agent: AGENT_NAME,
        reason: 'rule-gap',
        failureContext: `no interpretation rule matched for insight "${ins.insightId}"; SME guidance or new library entry needed`,
        lineage: side.upstreamLineage,
        validation: {
          status: 'review',
          confidence: makeConfidence(0, 'no rule available'),
          checks: [{ name: 'rule-available', passed: false }],
        },
        recommendedReviewer: 'domain-expert',
        raisedAt: nowIso(),
      });
      continue;
    }
    candidates.push({
      insightId: ins.insightId,
      claim: ins.claim,
      frameworkUsed: ins.frameworkUsed,
      isInference: ins.isInference,
      confidence: ins.confidence,
      candidateRules: matched,
    });
  }
  trace(ctx, 5, outputIngestionContract.runbook[1]!.name,
    `find-rules: ${candidates.length} insight(s) with matched rules; ${earlyRuleGaps.length} rule-gap(s) for escalation`);

  /* If every single insight is a rule-gap, fail loudly — there is
   * nothing for the LLM to do and the pipeline cannot continue. */
  if (candidates.length === 0) {
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        'rule-unavailable',
        `No interpretation rule matched any of the ${insightsPayload.insights.length} insight(s). All findings escalated as rule-gaps.`,
        { ruleGaps: earlyRuleGaps },
        side.upstreamLineage,
      ),
    };
  }

  trace(ctx, 5, outputIngestionContract.runbook[3]!.name,
    `delegating rule application to LLM (${MODEL_NAME}) with ${TOOL_COUNT} rule tool(s) available: [${TOOL_NAMES.join(', ')}]`);

  const onToolCall = (t: ToolCallTrace): void => {
    const args = Object.entries(t.input).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(', ');
    ctx.trace.push({
      agent: AGENT_NAME,
      standard: 5,
      step: 'tool-call',
      detail: `${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage ?? 'unknown'}`}`,
      at: t.at,
    });
    // eslint-disable-next-line no-console
    console.log(`  [${AGENT_NAME}][Std 5] tool-call — ${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage}`}`);
  };

  /* Step 4 (Std 3): apply-rule via LLM. */
  const llm = await interpretFindings(candidates, audienceTier, side.jobRequest, onToolCall);
  if (!llm.ok) {
    const isKey = llm.failure.category === 'needs-api-key';
    escalations.push({
      agent: AGENT_NAME,
      reason: 'critical-validation-failure',
      failureContext: llm.failure.reason,
      lineage: side.upstreamLineage,
      validation: {
        status: 'review',
        confidence: makeConfidence(0, 'LLM unavailable or invalid response'),
        checks: [{ name: 'llm-available', passed: false, detail: llm.failure.hint }],
      },
      recommendedReviewer: isKey ? 'engineer' : 'domain-expert',
      raisedAt: nowIso(),
    });
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        isKey ? 'tool-unavailable' : 'interpretation-blocked',
        llm.failure.reason,
        { llmFailure: llm.failure },
        side.upstreamLineage,
      ),
    };
  }

  const out = llm.value.output;

  /* Merge any rule-gaps the LLM separately surfaced with the
   * ones we already short-circuited in phase 1. */
  out.ruleGapsEscalated = [...earlyRuleGaps, ...out.ruleGapsEscalated];

  /* Step 6 (Std 4 + Std 7): cross-check. Each recommendation must
   * cite a real rule_id and a real upstream insightId, and meet the
   * action threshold. */
  const validInsightIds = new Set(insightsPayload.insights.map(i => i.insightId));
  const validRuleIds = new Set<string>();
  for (const c of candidates) for (const r of c.candidateRules) validRuleIds.add(r.rule_id);
  let droppedForBadRef = 0;
  let droppedForLowConfidence = 0;
  out.recommendations = out.recommendations.filter(r => {
    if (!validInsightIds.has(r.sourceInsightId)) { droppedForBadRef++; return false; }
    if (!validRuleIds.has(r.ruleApplied)) { droppedForBadRef++; return false; }
    if (r.confidence < LOW_CONFIDENCE_THRESHOLD) {
      droppedForLowConfidence++;
      escalations.push({
        agent: AGENT_NAME,
        reason: 'low-confidence',
        failureContext: `recommendation "${r.recommendationId}" (rule=${r.ruleApplied}) below action threshold at ${r.confidence.toFixed(2)}`,
        lineage: side.upstreamLineage,
        validation: {
          status: 'review',
          confidence: makeConfidence(r.confidence, 'below action threshold'),
          checks: [{ name: 'meets-action-threshold', passed: false }],
        },
        recommendedReviewer: 'authorizing-decision-maker',
        raisedAt: nowIso(),
      });
      out.ruleGapsEscalated.push({
        sourceInsightId: r.sourceInsightId,
        reason: 'sub-threshold-confidence',
        detail: `recommendation "${r.recommendationId}" confidence ${r.confidence.toFixed(2)} < ${LOW_CONFIDENCE_THRESHOLD}`,
        triedTriggers: [],
      });
      return false;
    }
    /* Material-impact triggers (Std 8 + Pillar 3's stricter escalation). */
    if (r.severity === 'material' || r.severity === 'high_impact') {
      unresolved.push({
        category: 'material-impact-finding',
        detail: `${r.severity} recommendation "${r.recommendationId}" requires authorizing-decision-maker sign-off (Std 9, Pillar 3 stricter regime)`,
        blocking: false,
      });
      escalations.push({
        agent: AGENT_NAME,
        reason: 'material-impact-finding',
        failureContext: `${r.severity} severity recommendation "${r.recommendationId}" — authorize before downstream delivery`,
        lineage: side.upstreamLineage,
        validation: {
          status: 'review',
          confidence: makeConfidence(r.confidence, `${r.severity} severity`),
          checks: [{ name: 'requires-authorization', passed: false, detail: `severity=${r.severity}` }],
        },
        recommendedReviewer: 'authorizing-decision-maker',
        raisedAt: nowIso(),
      });
    }
    return true;
  });

  /* Recompute appliedRules from the surviving recommendations. */
  out.appliedRules = Array.from(new Set(out.recommendations.map(r => r.ruleApplied))).sort();

  trace(ctx, 4, outputIngestionContract.runbook[5]!.name,
    `cross-check: dropped ${droppedForBadRef} (bad ref) + ${droppedForLowConfidence} (sub-threshold) → ${out.recommendations.length} surviving recommendation(s)`);
  trace(ctx, 9, outputIngestionContract.runbook[6]!.name,
    `escalations raised: ${escalations.length}; ruleGapsEscalated entries: ${out.ruleGapsEscalated.length}`);

  /* Step 5 (Std 7): score-confidence. Aggregate per-run validation. */
  const avgConf = out.recommendations.length === 0
    ? 0
    : out.recommendations.reduce((s, r) => s + r.confidence, 0) / out.recommendations.length;
  const blocking = unresolved.filter(u => u.blocking).length;
  const confidence = makeConfidence(
    Math.max(0, avgConf - 0.05 * Math.min(out.ruleGapsEscalated.length, 5)),
    `avg per-recommendation confidence ${avgConf.toFixed(2)} with ${out.ruleGapsEscalated.length} rule-gap escalation(s)`,
  );
  trace(ctx, 7, outputIngestionContract.runbook[4]!.name,
    `validation: ${out.recommendations.length} recommendation(s) avgConf=${avgConf.toFixed(2)}`);

  /* Step 8 (Std 11): handoff. */
  const lineage: Lineage = {
    sourceUrl: side.upstreamLineage.sourceUrl,
    capturedAt: nowIso(),
    effectiveAs: side.upstreamLineage.effectiveAs,
    agentVersion: AGENT_VERSION,
    upstream: Array.from(new Set([
      ...side.upstreamLineage.upstream,
      ...out.recommendations.flatMap(r => r.reasoningLineage),
    ])),
  };
  const validationStatus =
    out.recommendations.length === 0 ? 'review'
      : blocking > 0 ? 'review'
      : confidence.value < LOW_CONFIDENCE_THRESHOLD ? 'flagged'
      : 'passed';

  const handoff: Handoff<OutputIngestionOutput> = {
    fromAgent: AGENT_NAME,
    fromAgentVersion: AGENT_VERSION,
    toAgent: 'decision.visualization',
    payload: out,
    metadata: {
      analysisId: ctx.analysisId,
      capabilities: outputIngestionContract.capabilities,
      audienceTier,
      candidatesConsidered: candidates.length,
      droppedForBadRef,
      droppedForLowConfidence,
      toolCallCount: llm.value.toolCalls.length,
      toolCalls: llm.value.toolCalls.map(t => ({
        toolName: t.toolName,
        ok: t.ok,
        input: t.input,
        resultSummary: t.resultSummary,
        errorMessage: t.errorMessage,
        at: t.at,
      })),
    },
    confidence,
    validation: {
      status: validationStatus,
      checks: [
        { name: 'at-least-one-recommendation', passed: out.recommendations.length > 0, detail: `${out.recommendations.length}` },
        { name: 'every-recommendation-cites-rule', passed: out.recommendations.every(r => !!r.ruleApplied) },
        { name: 'no-blocking-issues', passed: blocking === 0 },
      ],
    },
    unresolvedIssues: unresolved,
    lineage,
    timestamp: nowIso(),
  };
  trace(ctx, 11, outputIngestionContract.runbook[7]!.name,
    `handoff → ${handoff.toAgent} (validation=${validationStatus} confidence=${confidence.tier})`);

  return { ok: true, handoff, escalations };
}