/**
* SEC EDGAR retrieval connector.
*
* The connector ships in two shapes:
*
* 1. Four standalone functions —
* secEdgarCompanies(searchTerms)
* secFinancials(cik, concepts?)
* secSubmissions(cik, filingTypes?)
* secFilingDocument(documentUrl, maxChars?)
* These are the surfaces agents call (typically as Anthropic
* tools via SEC_TOOLS / executeSecTool below). They return
* structured JSON, not raw bytes, because their job is to make
* EDGAR usable for an LLM — not to dump untouched HTML.
*
* 2. SecEdgarConnector — a RetrievalConnector that registers the
* `sec-edgar` source name with the dispatcher so JobRequest
* `sources: ["sec-edgar"]` is a known source. Its `fetch()` is a
* thin wrapper around secFinancials, kept for legacy
* compatibility with the original dispatchFetch path.
*
* All outbound requests share one rate limiter (SEC's published public
* limit is 10 req/s) and the User-Agent
* `MR mitchell.roy@sia-partners.com`. Every failure is translated to
* RetrievalError so the dispatcher hands callers a structured
* DispatchResult (Std 12).
*/
import {
RetrievalError,
type FetchParams,
type RawPayload,
type RetrievalConnector,
} from '../interface.js';
import { httpGet, HttpError } from '../http-client.js';
import { RateLimiter } from '../rate-limiter.js';
/**
* Shared User-Agent + rate limiter, exported so sibling SEC connector
* modules (e.g. sec-edgar-filings.ts) can reuse them instead of
* standing up parallel limiters that would silently double the
* outbound rate against SEC.
*/
export const SEC_USER_AGENT = 'MR mitchell.roy@sia-partners.com';
export const SEC_HEADERS_JSON = { 'User-Agent': SEC_USER_AGENT, Accept: 'application/json' } as const;
export const SEC_HEADERS_ANY = { 'User-Agent': SEC_USER_AGENT, Accept: '*/*' } as const;
const TICKERS_URL = 'https://www.sec.gov/files/company_tickers.json';
const COMPANY_FACTS_BASE = 'https://data.sec.gov/api/xbrl/companyfacts';
const SUBMISSIONS_BASE = 'https://data.sec.gov/submissions';
const COMPANY_FACTS_MAX_BYTES = 64 * 1024 * 1024;
const SUBMISSIONS_MAX_BYTES = 8 * 1024 * 1024;
const TICKERS_MAX_BYTES = 8 * 1024 * 1024;
const FILING_DOC_MAX_BYTES = 32 * 1024 * 1024;
const DEFAULT_FILING_TRUNCATE_CHARS = 50_000;
export const secLimiter = new RateLimiter({ requestsPerSecond: 10, burstSize: 10 });
const limiter = secLimiter;
const SEC_HEADERS = SEC_HEADERS_JSON;
/* ------------------------------------------------------------------ *
* Tool 1 — secEdgarCompanies(searchTerms)
* Resolves a free-form search string (one or more tickers / names,
* comma-separated) to {cik, ticker, name} records. Backed by
* company_tickers.json, cached for the process lifetime.
* ------------------------------------------------------------------ */
export interface SecCompanyMatch {
readonly cik: string;
readonly ticker: string;
readonly name: string;
}
interface TickerEntry {
readonly cik: string;
readonly ticker: string;
readonly nameUpper: string;
readonly name: string;
}
let tickerIndex: readonly TickerEntry[] | null = null;
let tickerIndexPromise: Promise<readonly TickerEntry[]> | null = null;
async function loadTickerIndex(): Promise<readonly TickerEntry[]> {
if (tickerIndex) return tickerIndex;
if (tickerIndexPromise) return tickerIndexPromise;
tickerIndexPromise = (async () => {
await limiter.acquire();
let res;
try {
res = await httpGet(TICKERS_URL, { headers: SEC_HEADERS, maxBodyBytes: TICKERS_MAX_BYTES });
} catch (err) {
tickerIndexPromise = null;
throw translateHttpError(err, TICKERS_URL);
}
let parsed: unknown;
try {
parsed = JSON.parse(res.body);
} catch (err) {
tickerIndexPromise = null;
throw new RetrievalError(
'internal',
`SEC company_tickers.json invalid JSON: ${err instanceof Error ? err.message : String(err)}`,
{ url: TICKERS_URL },
);
}
const out: TickerEntry[] = [];
if (parsed && typeof parsed === 'object') {
for (const raw of Object.values(parsed as Record<string, unknown>)) {
if (!raw || typeof raw !== 'object') continue;
const e = raw as { cik_str?: unknown; ticker?: unknown; title?: unknown };
const cikRaw =
typeof e.cik_str === 'number'
? String(e.cik_str)
: typeof e.cik_str === 'string'
? e.cik_str
: '';
const cik = parseCik(cikRaw);
const ticker = typeof e.ticker === 'string' ? e.ticker.trim().toUpperCase() : '';
const name = typeof e.title === 'string' ? e.title.trim() : '';
if (!cik || !ticker) continue;
out.push({ cik, ticker, nameUpper: name.toUpperCase(), name });
}
}
if (out.length === 0) {
tickerIndexPromise = null;
throw new RetrievalError('no-content', 'company_tickers.json contained no resolvable records.', {
url: TICKERS_URL,
});
}
tickerIndex = out;
return out;
})();
return tickerIndexPromise;
}
export async function secEdgarCompanies(searchTerms: string): Promise<SecCompanyMatch[]> {
if (typeof searchTerms !== 'string' || !searchTerms.trim()) {
throw new RetrievalError('invalid-request', 'secEdgarCompanies: searchTerms must be a non-empty string.');
}
const terms = searchTerms
.split(/[,;\n]/)
.map(t => t.trim())
.filter(t => t.length > 0);
if (terms.length === 0) {
throw new RetrievalError('invalid-request', 'secEdgarCompanies: no usable search terms after splitting.');
}
const index = await loadTickerIndex();
const seenCiks = new Set<string>();
const matches: SecCompanyMatch[] = [];
for (const term of terms) {
const direct = parseCik(term);
if (direct) {
if (seenCiks.has(direct)) continue;
const hit = index.find(r => r.cik === direct);
if (hit) {
seenCiks.add(direct);
matches.push({ cik: hit.cik, ticker: hit.ticker, name: hit.name });
} else {
seenCiks.add(direct);
matches.push({ cik: direct, ticker: '', name: '' });
}
continue;
}
const norm = term.toUpperCase();
const exact = index.find(r => r.ticker === norm || r.nameUpper === norm);
if (exact && !seenCiks.has(exact.cik)) {
seenCiks.add(exact.cik);
matches.push({ cik: exact.cik, ticker: exact.ticker, name: exact.name });
continue;
}
for (const r of index) {
if (r.ticker === norm || r.nameUpper.includes(norm)) {
if (seenCiks.has(r.cik)) continue;
seenCiks.add(r.cik);
matches.push({ cik: r.cik, ticker: r.ticker, name: r.name });
if (matches.length >= terms.length * 8) break;
}
}
}
return matches;
}
/* ------------------------------------------------------------------ *
* Tool 2 — secFinancials(cik, concepts?)
*
* Returns the company-facts JSON for a CIK. Connector design
* principle (Std 5 — narrow-first): a tool that can return a large
* response body MUST default to a summary describing the available
* data elements; full data is returned only when the caller names
* specific elements. Here that means:
* - secFinancials(cik) → summary: { entity, availableConcepts, conceptCountsByTaxonomy, summary: true } — facts omitted.
* - secFinancials(cik, "Revenues,Assets") → targeted: full filtered facts time series for the named concepts.
* This keeps the LLM from accidentally pulling 900+ XBRL concepts
* into context when it only needs two.
* ------------------------------------------------------------------ */
export interface SecCompanyFacts {
readonly cik: string;
readonly entityName: string;
/** Omitted in summary mode (no `concepts` argument). Populated only when
* the caller names specific concepts. */
readonly facts?: Record<string, Record<string, unknown>>;
/** True when this is a summary response (no `concepts` was specified).
* Use the listed availableConcepts to drive a targeted re-call. */
readonly summary?: boolean;
readonly availableConcepts?: readonly string[];
/** Per-taxonomy count of available concepts, e.g. {"us-gaap": 870, "dei": 35}. */
readonly conceptCountsByTaxonomy?: Readonly<Record<string, number>>;
readonly conceptsFilter?: readonly string[];
readonly sourceUrl: string;
readonly capturedAt: string;
}
export async function secFinancials(cik: string, concepts?: string): Promise<SecCompanyFacts> {
const padded = parseCik(cik);
if (!padded) {
throw new RetrievalError('invalid-request', `secFinancials: not a valid CIK: "${cik}"`);
}
const url = `${COMPANY_FACTS_BASE}/CIK${padded}.json`;
await limiter.acquire();
let res;
try {
res = await httpGet(url, { headers: SEC_HEADERS, maxBodyBytes: COMPANY_FACTS_MAX_BYTES });
} catch (err) {
throw translateHttpError(err, url);
}
let parsed: { cik?: unknown; entityName?: unknown; facts?: unknown };
try {
parsed = JSON.parse(res.body);
} catch (err) {
throw new RetrievalError(
'internal',
`companyfacts JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
{ url },
);
}
const entityName = typeof parsed.entityName === 'string' ? parsed.entityName : '';
const allFacts = parsed.facts && typeof parsed.facts === 'object'
? (parsed.facts as Record<string, Record<string, unknown>>)
: {};
const conceptList = (concepts ?? '')
.split(',')
.map(s => s.trim())
.filter(s => s.length > 0);
const availableConcepts = collectConceptNames(allFacts);
const conceptCountsByTaxonomy: Record<string, number> = {};
for (const [taxonomy, conceptMap] of Object.entries(allFacts)) {
if (conceptMap && typeof conceptMap === 'object') {
conceptCountsByTaxonomy[taxonomy] = Object.keys(conceptMap).length;
}
}
/* Summary mode — narrow-first connector design (Std 5). The full
* facts body is *not* returned; instead the caller gets the menu
* of available concepts and must re-call with explicit names. */
if (conceptList.length === 0) {
return {
cik: padded,
entityName,
summary: true,
availableConcepts,
conceptCountsByTaxonomy,
sourceUrl: res.url,
capturedAt: new Date().toISOString(),
};
}
/* Targeted mode — the caller named specific concepts; return the
* full filtered time series for each one. */
const filtered: Record<string, Record<string, unknown>> = {};
const wanted = new Set(conceptList.map(c => c.toLowerCase()));
for (const [taxonomy, conceptMap] of Object.entries(allFacts)) {
if (!conceptMap || typeof conceptMap !== 'object') continue;
const subset: Record<string, unknown> = {};
for (const [name, val] of Object.entries(conceptMap)) {
if (wanted.has(name.toLowerCase())) subset[name] = val;
}
if (Object.keys(subset).length > 0) filtered[taxonomy] = subset;
}
return {
cik: padded,
entityName,
facts: filtered,
availableConcepts,
conceptCountsByTaxonomy,
conceptsFilter: conceptList,
sourceUrl: res.url,
capturedAt: new Date().toISOString(),
};
}
function collectConceptNames(allFacts: Record<string, Record<string, unknown>>): string[] {
const set = new Set<string>();
for (const conceptMap of Object.values(allFacts)) {
if (!conceptMap || typeof conceptMap !== 'object') continue;
for (const name of Object.keys(conceptMap)) set.add(name);
}
return [...set].sort();
}
/* ------------------------------------------------------------------ *
* Tool 3 — secSubmissions(cik, filingTypes?)
* Returns the company's filing history (recent filings list),
* optionally narrowed to specific filing types (e.g. "10-K,10-Q").
* ------------------------------------------------------------------ */
export interface SecFilingHit {
readonly accessionNumber: string;
readonly filingDate: string;
readonly reportDate: string;
readonly form: string;
readonly primaryDocument: string;
readonly primaryDocumentUrl: string;
}
export interface SecSubmissions {
readonly cik: string;
readonly entityName: string;
readonly tickers: readonly string[];
readonly sic: string;
readonly sicDescription: string;
readonly filings: readonly SecFilingHit[];
readonly filingTypesFilter?: readonly string[];
readonly sourceUrl: string;
readonly capturedAt: string;
}
export async function secSubmissions(cik: string, filingTypes?: string): Promise<SecSubmissions> {
const padded = parseCik(cik);
if (!padded) {
throw new RetrievalError('invalid-request', `secSubmissions: not a valid CIK: "${cik}"`);
}
const url = `${SUBMISSIONS_BASE}/CIK${padded}.json`;
await limiter.acquire();
let res;
try {
res = await httpGet(url, { headers: SEC_HEADERS, maxBodyBytes: SUBMISSIONS_MAX_BYTES });
} catch (err) {
throw translateHttpError(err, url);
}
let parsed: Record<string, unknown>;
try {
parsed = JSON.parse(res.body) as Record<string, unknown>;
} catch (err) {
throw new RetrievalError(
'internal',
`submissions JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
{ url },
);
}
const entityName = typeof parsed.name === 'string' ? parsed.name : '';
const tickersRaw = Array.isArray(parsed.tickers) ? parsed.tickers : [];
const tickers = tickersRaw.filter((t): t is string => typeof t === 'string');
const sic = typeof parsed.sic === 'string' ? parsed.sic : '';
const sicDescription = typeof parsed.sicDescription === 'string' ? parsed.sicDescription : '';
const recent = (parsed.filings as Record<string, unknown> | undefined)?.recent as
| Record<string, unknown>
| undefined;
const filings: SecFilingHit[] = [];
if (recent) {
const accs = pickStrings(recent.accessionNumber);
const dates = pickStrings(recent.filingDate);
const reports = pickStrings(recent.reportDate);
const forms = pickStrings(recent.form);
const docs = pickStrings(recent.primaryDocument);
const n = Math.min(accs.length, dates.length, forms.length, docs.length, reports.length);
for (let i = 0; i < n; i++) {
const accession = accs[i]!;
const accNoDash = accession.replace(/-/g, '');
const primary = docs[i]!;
const primaryUrl = primary
? `https://www.sec.gov/Archives/edgar/data/${parseInt(padded, 10)}/${accNoDash}/${primary}`
: '';
filings.push({
accessionNumber: accession,
filingDate: dates[i]!,
reportDate: reports[i]!,
form: forms[i]!,
primaryDocument: primary,
primaryDocumentUrl: primaryUrl,
});
}
}
const formList = (filingTypes ?? '')
.split(',')
.map(s => s.trim().toUpperCase())
.filter(s => s.length > 0);
const filtered =
formList.length === 0
? filings
: filings.filter(f => formList.includes(f.form.toUpperCase()));
return {
cik: padded,
entityName,
tickers,
sic,
sicDescription,
filings: filtered,
filingTypesFilter: formList.length > 0 ? formList : undefined,
sourceUrl: res.url,
capturedAt: new Date().toISOString(),
};
}
function pickStrings(v: unknown): readonly string[] {
if (!Array.isArray(v)) return [];
return v.map(x => (typeof x === 'string' ? x : x == null ? '' : String(x)));
}
/* ------------------------------------------------------------------ *
* Tool 4 — secFilingDocument(documentUrl, maxChars?)
* Downloads a filing document, strips HTML tags + entities, and
* truncates at maxChars. URL is whitelisted to sec.gov / data.sec.gov.
* ------------------------------------------------------------------ */
export interface SecFilingDocument {
readonly url: string;
readonly contentType: string;
readonly text: string;
readonly originalBytes: number;
readonly truncated: boolean;
readonly capturedAt: string;
}
export async function secFilingDocument(documentUrl: string, maxChars?: number): Promise<SecFilingDocument> {
if (typeof documentUrl !== 'string' || !documentUrl) {
throw new RetrievalError('invalid-request', 'secFilingDocument: documentUrl is required.');
}
let parsedUrl: URL;
try {
parsedUrl = new URL(documentUrl);
} catch {
throw new RetrievalError('invalid-request', `secFilingDocument: invalid URL "${documentUrl}".`);
}
if (!/(^|\.)sec\.gov$/i.test(parsedUrl.hostname)) {
throw new RetrievalError(
'invalid-request',
`secFilingDocument: refusing non-SEC host "${parsedUrl.hostname}". Only sec.gov hosts are allowed.`,
);
}
const limit = clampChars(maxChars);
await limiter.acquire();
let res;
try {
res = await httpGet(documentUrl, {
headers: { 'User-Agent': SEC_USER_AGENT, Accept: '*/*' },
maxBodyBytes: FILING_DOC_MAX_BYTES,
});
} catch (err) {
throw translateHttpError(err, documentUrl);
}
const originalBytes = res.body.length;
const stripped = stripHtml(res.body);
const truncated = stripped.length > limit;
const text = truncated ? stripped.slice(0, limit) : stripped;
return {
url: res.url,
contentType: res.contentType,
text,
originalBytes,
truncated,
capturedAt: new Date().toISOString(),
};
}
function clampChars(n?: number): number {
if (typeof n !== 'number' || !Number.isFinite(n) || n <= 0) return DEFAULT_FILING_TRUNCATE_CHARS;
return Math.min(Math.floor(n), 400_000);
}
export function stripHtml(input: string): string {
return input
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<!--[\s\S]*?-->/g, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/ /gi, ' ')
.replace(/&/gi, '&')
.replace(/</gi, '<')
.replace(/>/gi, '>')
.replace(/"/gi, '"')
.replace(/'/gi, "'")
.replace(/&[a-z]+;/gi, ' ')
.replace(/[ \t\r\f\v]+/g, ' ')
.replace(/\s*\n\s*/g, '\n')
.trim();
}
/* ------------------------------------------------------------------ *
* Anthropic tool descriptors. Pulled in by agents that want to expose
* SEC retrieval as LLM tools. Names use snake_case per Anthropic
* convention; the executor maps them back to the camelCase functions
* above.
* ------------------------------------------------------------------ */
export interface SecToolDescriptor {
readonly name: string;
readonly description: string;
readonly input_schema: {
readonly type: 'object';
readonly properties: Record<string, { type: string; description: string }>;
readonly required: readonly string[];
};
}
export const SEC_TOOLS: readonly SecToolDescriptor[] = [
{
name: 'sec_edgar_companies',
description:
'Resolve company tickers or names to SEC CIK numbers. Accepts a comma-separated search string ' +
'(e.g. "JPM, Bank of America, GS"). Returns an array of {cik, ticker, name} matches. ' +
'Use this FIRST to obtain the CIK before calling sec_financials or sec_submissions.',
input_schema: {
type: 'object',
properties: {
searchTerms: {
type: 'string',
description: 'One or more tickers / company names / CIKs, comma- or newline-separated.',
},
},
required: ['searchTerms'],
},
},
{
name: 'sec_financials',
description:
'Fetch XBRL company facts from SEC EDGAR for a CIK. Two modes:\n' +
' - Summary mode (no `concepts` arg): returns ONLY the entity name and the list of available ' +
'concept names plus per-taxonomy counts; the facts body is omitted. Use this to discover which ' +
'concepts a company reports, then re-call in targeted mode.\n' +
' - Targeted mode (with `concepts`): returns the full time series (values, periods, units, ' +
'filing accessions) for the named concepts only.\n' +
'When the JobRequest already names the concepts you need (e.g. via targetMetrics or a ' +
'methodology\'s required input fields), call targeted mode directly. Do not call summary mode ' +
'first if you already know the concept names.',
input_schema: {
type: 'object',
properties: {
cik: {
type: 'string',
description: '10-digit padded CIK (or any numeric form; will be padded). Required.',
},
concepts: {
type: 'string',
description:
'Comma-separated XBRL concept names to fetch (e.g. "Revenues,OperatingExpenses"). ' +
'Omit to receive only the summary (entity + available concept names).',
},
},
required: ['cik'],
},
},
{
name: 'sec_submissions',
description:
'Fetch a company\'s filing history (recent filings) for a CIK from SEC EDGAR. ' +
'Optionally filter by a comma-separated list of filing types (e.g. "10-K,10-Q,8-K"). ' +
'Returns {accessionNumber, filingDate, reportDate, form, primaryDocument, primaryDocumentUrl} per filing.',
input_schema: {
type: 'object',
properties: {
cik: { type: 'string', description: '10-digit padded CIK. Required.' },
filingTypes: {
type: 'string',
description: 'Optional comma-separated filing types to filter (e.g. "10-K,10-Q").',
},
},
required: ['cik'],
},
},
{
name: 'sec_filing_document',
description:
'Download a SEC filing document (10-K, 10-Q, 8-K HTML body) by its primaryDocumentUrl. ' +
'Strips HTML to plain text and truncates at maxChars (default 50,000). ' +
'Only sec.gov URLs are allowed.',
input_schema: {
type: 'object',
properties: {
documentUrl: { type: 'string', description: 'A primaryDocumentUrl from sec_submissions.' },
maxChars: {
type: 'number',
description: 'Maximum characters of stripped text to return (default 50,000, max 400,000).',
},
},
required: ['documentUrl'],
},
},
];
export interface SecToolResult {
readonly ok: boolean;
readonly result?: unknown;
readonly error?: { readonly category: string; readonly message: string };
}
/** Execute a tool by Anthropic tool name. Returns a JSON-safe object. */
export async function executeSecTool(name: string, rawInput: unknown): Promise<SecToolResult> {
const input = (rawInput && typeof rawInput === 'object') ? (rawInput as Record<string, unknown>) : {};
try {
switch (name) {
case 'sec_edgar_companies': {
const searchTerms = typeof input.searchTerms === 'string' ? input.searchTerms : '';
const result = await secEdgarCompanies(searchTerms);
return { ok: true, result };
}
case 'sec_financials': {
const cik = typeof input.cik === 'string' ? input.cik : '';
const concepts = typeof input.concepts === 'string' ? input.concepts : undefined;
const result = await secFinancials(cik, concepts);
return { ok: true, result };
}
case 'sec_submissions': {
const cik = typeof input.cik === 'string' ? input.cik : '';
const filingTypes = typeof input.filingTypes === 'string' ? input.filingTypes : undefined;
const result = await secSubmissions(cik, filingTypes);
return { ok: true, result };
}
case 'sec_filing_document': {
const documentUrl = typeof input.documentUrl === 'string' ? input.documentUrl : '';
const maxChars = typeof input.maxChars === 'number' ? input.maxChars : undefined;
const result = await secFilingDocument(documentUrl, maxChars);
return { ok: true, result };
}
default:
return { ok: false, error: { category: 'unknown-tool', message: `unknown SEC tool "${name}"` } };
}
} catch (err) {
if (err instanceof RetrievalError) {
return { ok: false, error: { category: err.category, message: err.message } };
}
return {
ok: false,
error: { category: 'internal', message: err instanceof Error ? err.message : String(err) },
};
}
}
/* ------------------------------------------------------------------ *
* Legacy RetrievalConnector wrapper — registers `sec-edgar` with the
* dispatcher so JobRequest.sources can include it. fetch() resolves
* CIK from the entity then fetches company facts (uncompressed,
* unfiltered) so the old dispatch path still works for any caller that
* uses it. Modern agents prefer the four-function surface above.
* ------------------------------------------------------------------ */
export class SecEdgarConnector implements RetrievalConnector {
readonly name = 'sec-edgar';
readonly authRequired = false;
readonly rateLimit = { requestsPerSecond: 10, burstSize: 10 };
async isAvailable(): Promise<boolean> {
return true;
}
async fetch(params: FetchParams): Promise<RawPayload> {
const candidates = [params.entity.id, ...params.entity.aliases];
const directCik = candidates.map(parseCik).find((c): c is string => !!c);
let cik: string | undefined = directCik;
if (!cik) {
const search = candidates.filter(c => c && c.trim().length > 0).join(', ');
const matches = await secEdgarCompanies(search);
cik = matches[0]?.cik;
}
if (!cik) {
throw new RetrievalError(
'no-content',
`unable to resolve CIK for entity "${params.entity.id}" (aliases: ${params.entity.aliases.join(', ') || '<none>'})`,
{ entity: params.entity.id, aliases: params.entity.aliases },
);
}
const facts = await secFinancials(cik);
return {
source: this.name,
sourceUrl: facts.sourceUrl,
capturedAt: facts.capturedAt,
contentType: 'application/json',
rawContent: JSON.stringify(facts),
metadata: {
cik,
entity: params.entity.id,
period: params.period,
endpoint: 'companyfacts',
},
};
}
}
/* ------------------------------------------------------------------ *
* Helpers
* ------------------------------------------------------------------ */
export function parseCik(value: string): string | null {
if (!value) return null;
const trimmed = value.replace(/^CIK/i, '').trim();
if (!/^\d{1,10}$/.test(trimmed)) return null;
return trimmed.padStart(10, '0');
}
export function translateHttpError(err: unknown, url: string): RetrievalError {
if (err instanceof HttpError) {
switch (err.category) {
case 'timeout':
case 'network':
case 'aborted':
return new RetrievalError('unavailable', err.message, { url });
case 'body-too-large':
return new RetrievalError('internal', err.message, { url });
case 'status': {
if (err.status === 404) return new RetrievalError('no-content', err.message, { url });
if (err.status === 401 || err.status === 403) {
return new RetrievalError('auth-failed', err.message, { url });
}
if (err.status === 429) return new RetrievalError('rate-limited', err.message, { url });
if (err.status === 400) return new RetrievalError('invalid-request', err.message, { url });
return new RetrievalError('internal', err.message, { url });
}
}
}
return new RetrievalError('internal', err instanceof Error ? err.message : String(err), { url });
}