/**
* Retrieval dispatcher.
*
* - Owns the connector registry (one entry per unique `source` name).
* - Enforces declared rate-limit envelopes via per-connector RateLimiter.
* - Converts thrown RetrievalError + unknown exceptions into a
* structured DispatchResult so callers (and the legacy `fetchRaw`
* adapter) never receive raw exceptions (Std 12).
* - Refuses unregistered sources with an explicit, actionable message
* that points to the connectors/ directory (Std 5).
*
* The legacy `fetchRaw(req)` adapter at the bottom preserves the
* call-shape the existing Source/Extraction agent uses, so no agent
* code has to change. Future agents should prefer `dispatchFetch`.
*/
import {
RetrievalError,
type FetchParams,
type RawPayload,
type RetrievalConnector,
type RetrievalErrorCategory,
} from './interface.js';
import { MockConnector } from './mock-connector.js';
import { RateLimiter } from './rate-limiter.js';
interface RegistryEntry {
connector: RetrievalConnector;
limiter: RateLimiter;
}
const REGISTRY = new Map<string, RegistryEntry>();
export function registerConnector(connector: RetrievalConnector): void {
REGISTRY.set(connector.name, { connector, limiter: new RateLimiter(connector.rateLimit) });
}
export function getConnector(name: string): RetrievalConnector | undefined {
return REGISTRY.get(name)?.connector;
}
export function listConnectors(): string[] {
return [...REGISTRY.keys()];
}
export function clearConnectors(): void {
REGISTRY.clear();
}
/* The mock connector is registered eagerly so demos continue to run
* without any extra wiring. Domain connectors are NOT registered here
* — they live in `connectors/` and a caller registers them explicitly. */
registerConnector(new MockConnector());
export type DispatchResult =
| { ok: true; payload: RawPayload; via: string }
| { ok: false; category: RetrievalErrorCategory; reason: string; context?: Record<string, unknown> };
export async function dispatchFetch(source: string, params: FetchParams): Promise<DispatchResult> {
const entry = REGISTRY.get(source);
if (!entry) {
const known = [...REGISTRY.keys()].join(', ') || '<none>';
return {
ok: false,
category: 'connector-not-registered',
reason:
`source "${source}" has no connector registered; ` +
`add one to src/tools/retrieval/connectors/ and register it via registerConnector(). ` +
`Known sources: ${known}.`,
context: { source, known: [...REGISTRY.keys()] },
};
}
try {
if (!(await entry.connector.isAvailable())) {
return {
ok: false,
category: 'unavailable',
reason: `connector "${entry.connector.name}" reports it is not available`,
};
}
await entry.limiter.acquire();
const payload = await entry.connector.fetch(params);
return { ok: true, payload, via: entry.connector.name };
} catch (err) {
if (err instanceof RetrievalError) {
return { ok: false, category: err.category, reason: err.message, context: err.context };
}
return {
ok: false,
category: 'internal',
reason: err instanceof Error ? err.message : String(err),
};
}
}
/* -------------------------------------------------------------------------
* Legacy adapter — preserves the shape the existing source-extraction
* agent already uses (`fetchRaw({entityId, entityAliases, period, source})`).
* Translates the new RawPayload (`contentType`, `rawContent`, `sourceUrl`)
* into the older `shape`, `body`, `url` triple the agent reads.
* ------------------------------------------------------------------------- */
export interface LegacyRetrievalRequest {
readonly entityId: string;
readonly entityAliases: readonly string[];
readonly period: string;
readonly source: string;
}
export type LegacyShape = 'html' | 'json' | 'xbrl' | 'text';
export interface LegacyRawPayload {
readonly source: string;
readonly url: string;
readonly capturedAt: string;
readonly shape: LegacyShape;
readonly body: string;
}
export type LegacyRetrievalResult =
| { ok: true; payload: LegacyRawPayload }
| { ok: false; payload: null; reason: string };
function contentTypeToShape(ct: string): LegacyShape {
const lc = ct.toLowerCase();
if (lc.includes('html')) return 'html';
if (lc.includes('json')) return 'json';
if (lc.includes('xbrl') || lc.includes('xml')) return 'xbrl';
return 'text';
}
export async function fetchRaw(req: LegacyRetrievalRequest): Promise<LegacyRetrievalResult> {
const r = await dispatchFetch(req.source, {
entity: { id: req.entityId, aliases: req.entityAliases },
period: req.period,
});
if (!r.ok) return { ok: false, payload: null, reason: r.reason };
return {
ok: true,
payload: {
source: r.payload.source,
url: r.payload.sourceUrl ?? '',
capturedAt: r.payload.capturedAt,
shape: contentTypeToShape(r.payload.contentType),
body: r.payload.rawContent,
},
};
}