BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/dispatcher.ts 5,008 bytes · typescript
/**
 * Retrieval dispatcher.
 *
 * - Owns the connector registry (one entry per unique `source` name).
 * - Enforces declared rate-limit envelopes via per-connector RateLimiter.
 * - Converts thrown RetrievalError + unknown exceptions into a
 *   structured DispatchResult so callers (and the legacy `fetchRaw`
 *   adapter) never receive raw exceptions (Std 12).
 * - Refuses unregistered sources with an explicit, actionable message
 *   that points to the connectors/ directory (Std 5).
 *
 * The legacy `fetchRaw(req)` adapter at the bottom preserves the
 * call-shape the existing Source/Extraction agent uses, so no agent
 * code has to change. Future agents should prefer `dispatchFetch`.
 */

import {
  RetrievalError,
  type FetchParams,
  type RawPayload,
  type RetrievalConnector,
  type RetrievalErrorCategory,
} from './interface.js';
import { MockConnector } from './mock-connector.js';
import { RateLimiter } from './rate-limiter.js';

interface RegistryEntry {
  connector: RetrievalConnector;
  limiter: RateLimiter;
}

const REGISTRY = new Map<string, RegistryEntry>();

export function registerConnector(connector: RetrievalConnector): void {
  REGISTRY.set(connector.name, { connector, limiter: new RateLimiter(connector.rateLimit) });
}

export function getConnector(name: string): RetrievalConnector | undefined {
  return REGISTRY.get(name)?.connector;
}

export function listConnectors(): string[] {
  return [...REGISTRY.keys()];
}

export function clearConnectors(): void {
  REGISTRY.clear();
}

/* The mock connector is registered eagerly so demos continue to run
 * without any extra wiring. Domain connectors are NOT registered here
 * — they live in `connectors/` and a caller registers them explicitly. */
registerConnector(new MockConnector());

export type DispatchResult =
  | { ok: true; payload: RawPayload; via: string }
  | { ok: false; category: RetrievalErrorCategory; reason: string; context?: Record<string, unknown> };

export async function dispatchFetch(source: string, params: FetchParams): Promise<DispatchResult> {
  const entry = REGISTRY.get(source);
  if (!entry) {
    const known = [...REGISTRY.keys()].join(', ') || '<none>';
    return {
      ok: false,
      category: 'connector-not-registered',
      reason:
        `source "${source}" has no connector registered; ` +
        `add one to src/tools/retrieval/connectors/ and register it via registerConnector(). ` +
        `Known sources: ${known}.`,
      context: { source, known: [...REGISTRY.keys()] },
    };
  }
  try {
    if (!(await entry.connector.isAvailable())) {
      return {
        ok: false,
        category: 'unavailable',
        reason: `connector "${entry.connector.name}" reports it is not available`,
      };
    }
    await entry.limiter.acquire();
    const payload = await entry.connector.fetch(params);
    return { ok: true, payload, via: entry.connector.name };
  } catch (err) {
    if (err instanceof RetrievalError) {
      return { ok: false, category: err.category, reason: err.message, context: err.context };
    }
    return {
      ok: false,
      category: 'internal',
      reason: err instanceof Error ? err.message : String(err),
    };
  }
}

/* -------------------------------------------------------------------------
 * Legacy adapter — preserves the shape the existing source-extraction
 * agent already uses (`fetchRaw({entityId, entityAliases, period, source})`).
 * Translates the new RawPayload (`contentType`, `rawContent`, `sourceUrl`)
 * into the older `shape`, `body`, `url` triple the agent reads.
 * ------------------------------------------------------------------------- */

export interface LegacyRetrievalRequest {
  readonly entityId: string;
  readonly entityAliases: readonly string[];
  readonly period: string;
  readonly source: string;
}

export type LegacyShape = 'html' | 'json' | 'xbrl' | 'text';

export interface LegacyRawPayload {
  readonly source: string;
  readonly url: string;
  readonly capturedAt: string;
  readonly shape: LegacyShape;
  readonly body: string;
}

export type LegacyRetrievalResult =
  | { ok: true; payload: LegacyRawPayload }
  | { ok: false; payload: null; reason: string };

function contentTypeToShape(ct: string): LegacyShape {
  const lc = ct.toLowerCase();
  if (lc.includes('html')) return 'html';
  if (lc.includes('json')) return 'json';
  if (lc.includes('xbrl') || lc.includes('xml')) return 'xbrl';
  return 'text';
}

export async function fetchRaw(req: LegacyRetrievalRequest): Promise<LegacyRetrievalResult> {
  const r = await dispatchFetch(req.source, {
    entity: { id: req.entityId, aliases: req.entityAliases },
    period: req.period,
  });
  if (!r.ok) return { ok: false, payload: null, reason: r.reason };
  return {
    ok: true,
    payload: {
      source: r.payload.source,
      url: r.payload.sourceUrl ?? '',
      capturedAt: r.payload.capturedAt,
      shape: contentTypeToShape(r.payload.contentType),
      body: r.payload.rawContent,
    },
  };
}