BID · Console
Baseline · Intelligence · Decision
src/agents/intelligence/insight-synthesis/index.ts 9,763 bytes · typescript
/**
 * Insight Synthesis agent — runtime entry point.
 *
 * After the LLM produces insights, this file re-validates Std 4 by
 * checking every supportingEvidence.ref points at a real comparisonId
 * or metricKey from the upstream payload. Claims that reference
 * something not in scope get downgraded or stripped.
 */

import {
  type AgentResult,
  type HITLEscalation,
  LOW_CONFIDENCE_THRESHOLD,
  makeConfidence,
} from '../../../standards.js';
import type {
  ExecutionContext,
  FailureObject,
  Handoff,
  JobRequest,
  Lineage,
  UnresolvedIssue,
} from '../../../types.js';
import { nowIso } from '../../../types.js';
import {
  AGENT_NAME,
  AGENT_VERSION,
  insightSynthesisContract,
} from './matrix.js';
import {
  type InsightSynthesisInput,
  type InsightSynthesisOutput,
  insightSynthesisInputSchema,
} from './schema.js';
import {
  synthesizeInsights,
  MODEL_NAME,
  TOOL_COUNT,
  TOOL_NAMES,
  type ToolCallTrace,
} from './llm.js';

export { insightSynthesisContract } from './matrix.js';
export type { InsightSynthesisOutput } from './schema.js';

export interface InsightSynthesisSideContext {
  readonly jobRequest: JobRequest;
  readonly upstreamLineage: Lineage;
  /** The upstream computed metrics, supplied so the cross-check on
   *  Std 4 (every claim cites something real) can verify metric
   *  references too — comparisons are already in `rawInput`. */
  readonly upstreamMetricKeys: readonly string[];
}

function trace(ctx: ExecutionContext, standard: number, step: string, detail: string): void {
  ctx.trace.push({ agent: AGENT_NAME, standard, step, detail, at: nowIso() });
  // eslint-disable-next-line no-console
  console.log(`  [${AGENT_NAME}][Std ${standard}] ${step} — ${detail}`);
}

function failure(
  ctx: ExecutionContext,
  category: FailureObject['category'],
  reason: string,
  context: Record<string, unknown>,
  lineage: Lineage,
): FailureObject {
  return {
    agent: AGENT_NAME,
    agentVersion: AGENT_VERSION,
    category,
    reason,
    context,
    lineage,
    attempts: ctx.retries,
    recursionDepth: ctx.recursionDepth,
    occurredAt: nowIso(),
  };
}

export async function runInsightSynthesis(
  rawInput: unknown,
  side: InsightSynthesisSideContext,
  ctx: ExecutionContext,
): Promise<AgentResult<InsightSynthesisOutput>> {
  /* Step 1 (Std 2): receive-comparisons. */
  const parsed = insightSynthesisInputSchema.safeParse(rawInput);
  if (!parsed.success) {
    return {
      ok: false,
      escalations: [],
      failure: failure(
        ctx,
        'invalid-input',
        'Insight Synthesis input failed schema validation.',
        { issues: parsed.error.issues },
        side.upstreamLineage,
      ),
    };
  }
  const comparisons: InsightSynthesisInput = parsed.data;
  trace(ctx, 2, insightSynthesisContract.runbook[0]!.name,
    `received ${comparisons.comparisons.length} comparison(s); ${comparisons.comparabilityFailures.length} upstream comparability failure(s)`);

  const unresolved: UnresolvedIssue[] = [];
  const escalations: HITLEscalation[] = [];

  if (comparisons.comparisons.length === 0) {
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        'unsupported-claim',
        'No comparisons were supplied — cannot synthesize insights.',
        { input: comparisons },
        side.upstreamLineage,
      ),
    };
  }

  trace(ctx, 5, insightSynthesisContract.runbook[1]!.name,
    `delegating synthesis to LLM (${MODEL_NAME}) with ${TOOL_COUNT} methodology tool(s) available: [${TOOL_NAMES.join(', ')}]`);

  const onToolCall = (t: ToolCallTrace): void => {
    const args = Object.entries(t.input).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(', ');
    ctx.trace.push({
      agent: AGENT_NAME,
      standard: 5,
      step: 'tool-call',
      detail: `${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage ?? 'unknown'}`}`,
      at: t.at,
    });
    // eslint-disable-next-line no-console
    console.log(`  [${AGENT_NAME}][Std 5] tool-call — ${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage}`}`);
  };

  const llm = await synthesizeInsights(comparisons, side.jobRequest, onToolCall);
  if (!llm.ok) {
    const isKey = llm.failure.category === 'needs-api-key';
    escalations.push({
      agent: AGENT_NAME,
      reason: 'critical-validation-failure',
      failureContext: llm.failure.reason,
      lineage: side.upstreamLineage,
      validation: {
        status: 'review',
        confidence: makeConfidence(0, 'LLM unavailable or invalid response'),
        checks: [{ name: 'llm-available', passed: false, detail: llm.failure.hint }],
      },
      recommendedReviewer: isKey ? 'engineer' : 'domain-expert',
      raisedAt: nowIso(),
    });
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        isKey ? 'tool-unavailable' : 'unsupported-claim',
        llm.failure.reason,
        { llmFailure: llm.failure },
        side.upstreamLineage,
      ),
    };
  }

  const out = llm.value.insights;

  /* Std 4 + Std 7: cross-check every supportingEvidence.ref against
   * what's actually present upstream. Claims whose references don't
   * resolve get a flag and a confidence haircut. */
  const validComparisonIds = new Set(comparisons.comparisons.map(c => c.comparisonId));
  const validMetricKeys = new Set(side.upstreamMetricKeys);
  let strippedSupportCount = 0;
  for (const ins of out.insights) {
    const goodEvidence = ins.supportingEvidence.filter(e => {
      if (e.kind === 'comparison') return validComparisonIds.has(e.ref);
      if (e.kind === 'metric') return validMetricKeys.has(e.ref);
      return true; // methodology refs are not cross-checked here
    });
    const stripped = ins.supportingEvidence.length - goodEvidence.length;
    if (stripped > 0) {
      strippedSupportCount += stripped;
      ins.flags.push(`stripped-${stripped}-bad-ref(s)`);
      (ins.supportingEvidence as unknown as typeof goodEvidence).length = 0;
      (ins.supportingEvidence as unknown as typeof goodEvidence).push(...goodEvidence);
      ins.confidence = Math.max(0, ins.confidence - 0.2);
    }
    if (ins.supportingEvidence.length === 0 && !ins.isInference) {
      ins.flags.push('no-valid-support → marked-inference');
      ins.isInference = true;
      ins.confidence = Math.max(0, ins.confidence - 0.1);
    }
  }
  trace(ctx, 4, insightSynthesisContract.runbook[3]!.name,
    `Std-4 cross-check: ${strippedSupportCount} bad supporting-evidence ref(s) stripped; ${out.insights.filter(i => i.isInference).length} insight(s) marked inference`);

  /* Std 8: inference flags + unsupported-claim removals → triggers. */
  for (const i of out.insights.filter(x => x.isInference)) {
    unresolved.push({
      category: 'unsupported-claim',
      detail: `insight "${i.insightId}" is inference — verify with SME before partner delivery`,
      blocking: false,
      context: { insightId: i.insightId },
    });
  }
  for (const r of out.unsupportedClaimsRemoved) {
    unresolved.push({
      category: 'unsupported-claim',
      detail: `removed: "${r.claim}" — ${r.reason}`,
      blocking: false,
    });
  }

  const avgConf = out.insights.length === 0
    ? 0
    : out.insights.reduce((s, i) => s + i.confidence, 0) / out.insights.length;
  const blocking = unresolved.filter(u => u.blocking).length;
  const confidence = makeConfidence(
    Math.max(0, avgConf - 0.05 * Math.min(out.unsupportedClaimsRemoved.length, 5)),
    `avg per-insight confidence ${avgConf.toFixed(2)} with ${out.unsupportedClaimsRemoved.length} unsupported-claim removal(s)`,
  );
  trace(ctx, 7, insightSynthesisContract.runbook[6]!.name,
    `validation: ${out.insights.length} insight(s) — ${out.insights.filter(i => !i.isInference).length} data-supported, ${out.insights.filter(i => i.isInference).length} inference avgConf=${avgConf.toFixed(2)}`);

  /* Step 8 (Std 11): handoff. */
  const lineage: Lineage = {
    sourceUrl: side.upstreamLineage.sourceUrl,
    capturedAt: nowIso(),
    effectiveAs: side.upstreamLineage.effectiveAs,
    agentVersion: AGENT_VERSION,
    upstream: Array.from(new Set([
      ...side.upstreamLineage.upstream,
      ...out.insights.flatMap(i => i.reasoningLineage),
    ])),
  };
  const validationStatus =
    blocking > 0 ? 'review' : confidence.value < LOW_CONFIDENCE_THRESHOLD ? 'flagged' : 'passed';

  const handoff: Handoff<InsightSynthesisOutput> = {
    fromAgent: AGENT_NAME,
    fromAgentVersion: AGENT_VERSION,
    toAgent: null,
    payload: out,
    metadata: {
      analysisId: ctx.analysisId,
      capabilities: insightSynthesisContract.capabilities,
      appliedFrameworks: out.appliedFrameworks,
      strippedSupportCount,
      toolCallCount: llm.value.toolCalls.length,
      toolCalls: llm.value.toolCalls.map(t => ({
        toolName: t.toolName,
        ok: t.ok,
        input: t.input,
        resultSummary: t.resultSummary,
        errorMessage: t.errorMessage,
        at: t.at,
      })),
    },
    confidence,
    validation: {
      status: validationStatus,
      checks: [
        { name: 'at-least-one-insight', passed: out.insights.length > 0, detail: `${out.insights.length}` },
        { name: 'all-claims-cited-or-flagged', passed: out.insights.every(i => i.supportingEvidence.length > 0 || i.isInference) },
        { name: 'no-blocking-issues', passed: blocking === 0 },
      ],
    },
    unresolvedIssues: unresolved,
    lineage,
    timestamp: nowIso(),
  };
  trace(ctx, 11, insightSynthesisContract.runbook[7]!.name,
    `handoff → ${handoff.toAgent ?? '(end of pipeline)'} (validation=${validationStatus} confidence=${confidence.tier})`);

  return { ok: true, handoff, escalations };
}