BID · Console
Baseline · Intelligence · Decision
src/agents/intelligence/performance-metrics/index.ts 8,072 bytes · typescript
/**
 * Performance Metrics agent — runtime entry point.
 *
 * Walks the 7-step runbook (Std 6). Step 1 (validate input) and steps
 * 5-7 (validate output / confidence / handoff) are deterministic;
 * steps 2-4 (lookup → validate inputs → compute) are delegated to the
 * LLM with the methodology-library tool surface.
 */

import {
  type AgentResult,
  type HITLEscalation,
  LOW_CONFIDENCE_THRESHOLD,
  makeConfidence,
} from '../../../standards.js';
import type {
  ExecutionContext,
  FailureObject,
  Handoff,
  JobRequest,
  Lineage,
  UnresolvedIssue,
} from '../../../types.js';
import { nowIso } from '../../../types.js';
import {
  AGENT_NAME,
  AGENT_VERSION,
  performanceMetricsContract,
} from './matrix.js';
import {
  type PerformanceMetricsInput,
  type PerformanceMetricsOutput,
  performanceMetricsInputSchema,
} from './schema.js';
import {
  computeMetrics,
  MODEL_NAME,
  TOOL_COUNT,
  TOOL_NAMES,
  type ToolCallTrace,
} from './llm.js';

export { performanceMetricsContract } from './matrix.js';
export type { PerformanceMetricsOutput } from './schema.js';

export interface PerformanceMetricsSideContext {
  readonly jobRequest: JobRequest;
  readonly upstreamLineage: Lineage;
}

function trace(ctx: ExecutionContext, standard: number, step: string, detail: string): void {
  ctx.trace.push({ agent: AGENT_NAME, standard, step, detail, at: nowIso() });
  // eslint-disable-next-line no-console
  console.log(`  [${AGENT_NAME}][Std ${standard}] ${step} — ${detail}`);
}

function failure(
  ctx: ExecutionContext,
  category: FailureObject['category'],
  reason: string,
  context: Record<string, unknown>,
  lineage: Lineage,
): FailureObject {
  return {
    agent: AGENT_NAME,
    agentVersion: AGENT_VERSION,
    category,
    reason,
    context,
    lineage,
    attempts: ctx.retries,
    recursionDepth: ctx.recursionDepth,
    occurredAt: nowIso(),
  };
}

export async function runPerformanceMetrics(
  rawInput: unknown,
  side: PerformanceMetricsSideContext,
  ctx: ExecutionContext,
): Promise<AgentResult<PerformanceMetricsOutput>> {
  /* Step 1 (Std 2): receive-table — validate. */
  const parsed = performanceMetricsInputSchema.safeParse(rawInput);
  if (!parsed.success) {
    return {
      ok: false,
      escalations: [],
      failure: failure(
        ctx,
        'invalid-input',
        'Performance Metrics input failed schema validation.',
        { issues: parsed.error.issues },
        side.upstreamLineage,
      ),
    };
  }
  const table: PerformanceMetricsInput = parsed.data;
  trace(ctx, 2, performanceMetricsContract.runbook[0]!.name,
    `received table with ${table.cells.length} cell(s); ${table.entities.length}×${table.metrics.length}×${table.periods.length}`);

  const unresolved: UnresolvedIssue[] = [];
  const escalations: HITLEscalation[] = [];

  /* Steps 2-4 (Std 3 + 5): delegate to LLM with methodology tools. */
  trace(ctx, 5, performanceMetricsContract.runbook[1]!.name,
    `delegating computation to LLM (${MODEL_NAME}) with ${TOOL_COUNT} methodology tool(s) available: [${TOOL_NAMES.join(', ')}]`);

  const onToolCall = (t: ToolCallTrace): void => {
    const args = Object.entries(t.input).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(', ');
    ctx.trace.push({
      agent: AGENT_NAME,
      standard: 5,
      step: 'tool-call',
      detail: `${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage ?? 'unknown'}`}`,
      at: t.at,
    });
    // eslint-disable-next-line no-console
    console.log(`  [${AGENT_NAME}][Std 5] tool-call — ${t.toolName}(${args}) → ${t.ok ? t.resultSummary : `ERROR: ${t.errorMessage}`}`);
  };

  const llm = await computeMetrics(table, side.jobRequest, onToolCall);
  if (!llm.ok) {
    const isKey = llm.failure.category === 'needs-api-key';
    escalations.push({
      agent: AGENT_NAME,
      reason: 'critical-validation-failure',
      failureContext: llm.failure.reason,
      lineage: side.upstreamLineage,
      validation: {
        status: 'review',
        confidence: makeConfidence(0, 'LLM unavailable or invalid response'),
        checks: [{ name: 'llm-available', passed: false, detail: llm.failure.hint }],
      },
      recommendedReviewer: isKey ? 'engineer' : 'domain-expert',
      raisedAt: nowIso(),
    });
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        isKey ? 'tool-unavailable' : 'methodology-unavailable',
        llm.failure.reason,
        { llmFailure: llm.failure, table },
        side.upstreamLineage,
      ),
    };
  }

  const out = llm.value.metrics;
  trace(ctx, 7, performanceMetricsContract.runbook[4]!.name,
    `computed ${out.metrics.length} metric value(s); ${out.methodologyGaps.length} methodology gap(s)`);

  /* Std 8: methodology gaps → triggers. */
  for (const g of out.methodologyGaps) {
    unresolved.push({
      category: 'methodology-gap',
      detail: `no methodology for "${g.metricKey}" on ${g.entity}/${g.period}: ${g.reason}`,
      blocking: false,
      context: { gap: g },
    });
  }

  /* Step 6 (Std 7): score confidence. */
  const computedNonNull = out.metrics.filter(m => m.value !== null);
  const avgConf = computedNonNull.length === 0
    ? 0
    : computedNonNull.reduce((s, m) => s + m.confidence, 0) / computedNonNull.length;
  const blocking = unresolved.filter(u => u.blocking).length;
  const confidence = makeConfidence(
    Math.max(0, avgConf - 0.05 * Math.min(out.methodologyGaps.length, 5)),
    `avg per-metric confidence ${avgConf.toFixed(2)} with ${out.methodologyGaps.length} methodology gap(s)`,
  );
  trace(ctx, 7, performanceMetricsContract.runbook[5]!.name,
    `validation: ${computedNonNull.length}/${out.metrics.length} non-null metric(s) avgConf=${avgConf.toFixed(2)} confidence=${confidence.tier}`);

  if (out.metrics.length === 0 && out.methodologyGaps.length > 0) {
    return {
      ok: false,
      escalations,
      failure: failure(
        ctx,
        'methodology-unavailable',
        'No metric could be computed — every requested metric returned a methodology gap.',
        { gaps: out.methodologyGaps },
        side.upstreamLineage,
      ),
    };
  }

  /* Step 7 (Std 11): handoff. */
  const lineage: Lineage = {
    sourceUrl: side.upstreamLineage.sourceUrl,
    capturedAt: nowIso(),
    effectiveAs: side.upstreamLineage.effectiveAs,
    agentVersion: AGENT_VERSION,
    upstream: Array.from(new Set([
      ...side.upstreamLineage.upstream,
      ...out.metrics.flatMap(m => m.inputLineage),
    ])),
  };
  const validationStatus =
    blocking > 0 ? 'review' : confidence.value < LOW_CONFIDENCE_THRESHOLD ? 'flagged' : 'passed';

  const handoff: Handoff<PerformanceMetricsOutput> = {
    fromAgent: AGENT_NAME,
    fromAgentVersion: AGENT_VERSION,
    toAgent: 'intelligence.comparisons-synthesis',
    payload: out,
    metadata: {
      analysisId: ctx.analysisId,
      capabilities: performanceMetricsContract.capabilities,
      appliedMethodologies: out.appliedMethodologies,
      methodologyGapCount: out.methodologyGaps.length,
      toolCallCount: llm.value.toolCalls.length,
      toolCalls: llm.value.toolCalls.map(t => ({
        toolName: t.toolName,
        ok: t.ok,
        input: t.input,
        resultSummary: t.resultSummary,
        errorMessage: t.errorMessage,
        at: t.at,
      })),
    },
    confidence,
    validation: {
      status: validationStatus,
      checks: [
        { name: 'at-least-one-metric', passed: out.metrics.length > 0, detail: `${out.metrics.length}` },
        { name: 'methodology-references', passed: out.metrics.every(m => m.methodologyId !== null || m.methodologyRationale.length > 0) },
        { name: 'no-blocking-issues', passed: blocking === 0 },
      ],
    },
    unresolvedIssues: unresolved,
    lineage,
    timestamp: nowIso(),
  };
  trace(ctx, 11, performanceMetricsContract.runbook[6]!.name,
    `handoff → ${handoff.toAgent} (validation=${validationStatus} confidence=${confidence.tier})`);

  return { ok: true, handoff, escalations };
}