Code · src/standards.ts

src/standards.ts 11,613 bytes · typescript
/**
 * The 12 universal operational standards every BID agent must satisfy,
 * encoded as typed contracts. Per-pillar standards layer on top.
 *
 * Every agent declares an AgentStandardsContract; the orchestrator
 * checks the declared shape (e.g. write-back surface) before executing
 * the agent's runbook.
 */

import type {
  ConfidenceScore,
  ConfidenceTier,
  FailureObject,
  Handoff,
  Lineage,
  ValidationStatus,
} from './types.js';

/* Standard 12: hard guardrails on retries / recursion. */
export const MAX_RETRIES = 3;
export const MAX_RECURSION_DEPTH = 5;

/* Standard 7 / 9: confidence thresholds. */
export const LOW_CONFIDENCE_THRESHOLD = 0.6;
export const HITL_CONFIDENCE_THRESHOLD = 0.4;

/** Standard 1: Objective — explicit boundaries + downstream purpose. */
export interface AgentObjective {
  readonly does: string;
  readonly produces: string;
  readonly doesNot: readonly string[];
  readonly downstreamPurpose: string;
}

/** Standard 4: Rules & constraints. */
export interface AgentRules {
  readonly preserveRawSource: true;
  readonly preserveLineage: true;
  readonly preserveAuditability: true;
  readonly forbidFabrication: true;
  readonly forbidDestructiveOverwrite: true;
  readonly approvedToolsOnly: true;
  readonly pillarSpecificForbidden: readonly string[];
}

/** Standard 5: Methods & tools — capability-based, least-privilege. */
export type Capability =
  | 'retrieval'
  | 'api'
  | 'web'
  | 'parser'
  | 'ocr'
  | 'repository'
  | 'taxonomy-mapping'
  | 'ontology-rules'
  | 'entity-resolution'
  | 'semantic-mapping'
  | 'duplicate-resolution'
  | 'exact-lookup'
  | 'rule-lookup'
  | 'fuzzy-match'
  | 'ai-with-citation'
  /* Pillar 2 — Intelligence capabilities (each one references the
   * methodology library where applicable; analytical work only). */
  | 'methodology-library'
  | 'table-construction'
  | 'unit-normalization'
  | 'period-alignment'
  | 'metric-computation'
  | 'comparison-analysis'
  | 'statistical-context'
  | 'insight-synthesis'
  | 'narrative-fidelity'
  /* Pillar 3 — Decision capabilities (each one references the
   * Rule Library where applicable; interpretive, visual, or
   * delivery work — no analytical re-derivation). */
  | 'rule-library'
  | 'interpretation'
  | 'visualization-rendering'
  | 'disclosure-policy-check'
  | 'audience-tier-routing'
  | 'channel-dispatch'
  | 'cadence-control'
  | 'acknowledgment-tracking';

/** Standard 6: Processing — modular, numbered, replayable. */
export interface RunbookStep {
  readonly n: number;
  readonly name: string;
  readonly description: string;
}

/** Standard 7: Validation outcome attached to every handoff. */
export interface ValidationOutcome {
  readonly status: ValidationStatus;
  readonly confidence: ConfidenceScore;
  readonly checks: readonly { name: string; passed: boolean; detail?: string }[];
}

/** Standard 8: Conditional trigger categories. */
export type TriggerCategory =
  | 'failed-retrieval'
  | 'missing-metadata'
  | 'missing-data'
  | 'duplicate-records'
  | 'duplicate-entity'
  | 'duplicate-conflict'
  | 'unresolved-mappings'
  | 'unmapped-term'
  | 'low-confidence'
  | 'contradictory-outputs'
  | 'contradictory-mapping'
  | 'parsing-failure'
  | 'source-mismatch'
  | 'workflow-recursion'
  | 'ontology-conflict'
  | 'unresolved-conflict'
  | 'ambiguity'
  | 'failed-remediation'
  | 'recursion'
  /* Pillar 2 — Intelligence trigger categories (per spec §Std 8). */
  | 'methodology-gap'
  | 'comparability-failure'
  | 'insufficient-data'
  | 'statistical-anomaly'
  | 'narrative-ambiguity'
  | 'unsupported-claim'
  | 'peer-set-failure'
  /* Pillar 3 — Decision trigger categories (per spec §Std 8). */
  | 'rule-gap'
  | 'rule-conflict'
  | 'audience-policy-violation'
  | 'disclosure-policy-concern'
  | 'material-impact-finding'
  | 'threshold-breach'
  | 'cadence-violation'
  | 'recipient-verification-failed'
  | 'delivery-failure'
  | 'acknowledgment-failure'
  | 'fidelity-violation';

/** Standard 9: HITL escalation envelope. */
export type ReviewerRole =
  | 'analyst'
  | 'domain-expert'
  | 'data-steward'
  | 'engineer'
  /* Pillar 3 — Decision-layer reviewers (per spec §Std 9). */
  | 'compliance-reviewer'
  | 'operations-reviewer'
  | 'authorizing-decision-maker';

export interface HITLEscalation {
  readonly agent: string;
  readonly reason: string;
  readonly failureContext: string;
  readonly lineage: Lineage;
  readonly validation: ValidationOutcome;
  readonly recommendedReviewer: ReviewerRole;
  readonly raisedAt: string;
}

/** Standard 10: Repository write-back declaration (orchestrator persists). */
export interface WriteBackDeclaration {
  readonly structuredOutputs: boolean;
  readonly metadata: boolean;
  readonly lineage: boolean;
  readonly validation: boolean;
  readonly confidence: boolean;
  readonly exceptionLogs: boolean;
  readonly learnedRules: boolean;
  readonly humanOverrides: boolean;
}

/** The 12-standard contract every agent exposes statically. */
export interface AgentStandardsContract {
  readonly agentName: string;
  readonly agentVersion: string;
  readonly pillar: 'baseline' | 'intelligence' | 'decision';
  readonly objective: AgentObjective;
  readonly rules: AgentRules;
  readonly capabilities: readonly Capability[];
  readonly runbook: readonly RunbookStep[];
  readonly triggers: readonly TriggerCategory[];
  readonly writeBack: WriteBackDeclaration;
}

/** Standard 11/12: Agents always return one of these. Never raw exceptions. */
export type AgentResult<T> =
  | { ok: true; handoff: Handoff<T>; escalations: readonly HITLEscalation[] }
  | { ok: false; failure: FailureObject; escalations: readonly HITLEscalation[] };

/** Per-pillar standards layered on top of the 12 universals. */
export const BASELINE_FORBIDDEN: readonly string[] = [
  'strategic-insights',
  'benchmarking',
  'maturity-scoring',
  'recommendations',
];

/** Pillar 2 (Intelligence) — agents reason analytically but do not
 *  retrieve fresh data, do not produce partner-facing recommendations,
 *  and do not invent values to fill gaps (per spec §Architectural
 *  principles + §Std 4). */
export const INTELLIGENCE_FORBIDDEN: readonly string[] = [
  'fresh-data-retrieval',
  'value-fabrication',
  'unsupported-claims',
  'partner-recommendations',
];

/** Pillar 3 (Decision) — agents interpret, visualize, and deliver but
 *  do not re-derive analytical findings (Pillar 2 output is truth),
 *  do not invent rules outside the Rule Library, do not misrepresent
 *  data visually, do not deliver outside approved channels, and do
 *  not bypass disclosure policies (per spec §Architectural
 *  principles + §Std 4 + §Std 5). */
export const DECISION_FORBIDDEN: readonly string[] = [
  're-analyze-findings',
  'improvised-interpretation',
  'unsupported-recommendation',
  'misleading-visualization',
  'undeclared-channel',
  'disclosure-policy-bypass',
  'silent-delivery-failure',
];

/** Standard 7: numeric → tier helper. */
export function confidenceTier(score: number): ConfidenceTier {
  if (score >= 0.8) return 'high';
  if (score >= LOW_CONFIDENCE_THRESHOLD) return 'medium';
  return 'low';
}

/** Standard 7: short helper to build a fully-formed ConfidenceScore. */
export function makeConfidence(value: number, rationale: string): ConfidenceScore {
  const clamped = Math.max(0, Math.min(1, value));
  return { value: clamped, tier: confidenceTier(clamped), rationale };
}

/**
 * Short, human-readable summary of the 12 standards — surfaced in the
 * audit JSON so the trace is self-describing.
 */
export const STANDARDS_SUMMARY: readonly { n: number; name: string; gist: string }[] = [
  { n: 1, name: 'Objective', gist: 'Single clear responsibility with explicit boundaries.' },
  { n: 2, name: 'Inputs', gist: 'Structured, machine-readable; lineage + confidence persist.' },
  { n: 3, name: 'Decision logic', gist: 'Explicit, deterministic where possible, every decision recorded.' },
  { n: 4, name: 'Rules & constraints', gist: 'Preserve raw, lineage, audit; no fabrication; approved tools only.' },
  { n: 5, name: 'Methods & tools', gist: 'Capability-based; approved + connected; lineage-preserving. The system prompt for each LLM invocation carries the discipline the current operation actually engages — the 12 standards remain the framework\'s invariants and are always honored by agent behavior, but the full standards recital is reserved for steps that engage them.' },
  { n: 6, name: 'Processing', gist: 'Modular, repeatable, replayable; numbered runbook. Each runbook step is executed at the appropriate cost level — when a step\'s input has been deterministically established by upstream agents, schema, or unambiguous data, the agent records the inherited determination and proceeds without LLM call. LLM reasoning is reserved for genuine interpretation, judgment, or resolution of ambiguity.' },
  { n: 7, name: 'Validation & confidence', gist: 'Every output carries validation status + confidence tier. Confidence can be inherited from upstream when the upstream agent\'s confidence is high and the current agent\'s processing introduces no new uncertainty. Re-validation via LLM is reserved for cases where the agent introduces uncertainty or where the runbook explicitly requires independent verification.' },
  { n: 8, name: 'Conditional triggers', gist: 'Explicit exception categories; traceable, context-preserving.' },
  { n: 9, name: 'HITL escalation', gist: 'Defined thresholds; escalations carry full context.' },
  { n: 10, name: 'Repository write-back', gist: 'Agents declare; orchestrator persists.' },
  { n: 11, name: 'Handoff', gist: 'Standardized envelope; downstream never reconstructs context.' },
  { n: 12, name: 'Failure handling', gist: 'Fail safely; bounded retries + recursion; structured FailureObject.' },
];

/**
 * Std 5 / Std 6 — prompt scoping primitive.
 *
 * The 12 universal standards govern every agent's behavior; the
 * prompt overhead per LLM call is right-sized to which standards the
 * specific step actually engages. Lightweight calls (deterministic
 * checks, single-tool calls with clear inputs, structured
 * pass-throughs) pass a scope listing just those standard numbers and
 * receive a trimmed prompt; interpretive calls (multi-turn tool use,
 * narrative generation) omit the scope and receive the full prompt.
 *
 * This is implementation of Std 5 + Std 6, not a relaxation of either
 * — the runtime behaviour is still bound by all 12 standards.
 */
export interface PromptScope {
  /** Numeric ids (1–12) of the universal standards the current LLM
   *  call directly engages. Omit / leave empty for the full prompt. */
  readonly engagedStandards?: readonly number[];
  /** Skip the matrix-row dump (verbose). Default true when
   *  engagedStandards is set, false otherwise. */
  readonly omitMatrix?: boolean;
  /** Skip the runbook dump. Default true when engagedStandards is
   *  set, false otherwise. */
  readonly omitRunbook?: boolean;
  /** Short label for the step, included in the trimmed prompt header
   *  so the model knows what it is doing (e.g. "normalize-units"). */
  readonly stepLabel?: string;
}

/** Filtered render of STANDARDS_SUMMARY, ready to drop into a prompt. */
export function renderEngagedStandards(engaged: readonly number[]): string {
  const set = new Set(engaged);
  return STANDARDS_SUMMARY
    .filter(s => set.has(s.n))
    .map(s => `${s.n}. ${s.name} — ${s.gist}`)
    .join('\n');
}

/** Re-export FailureObject so agent code only needs to import standards.ts. */
export type { FailureObject };