BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/connectors/sec-edgar-xbrl.ts 15,711 bytes · typescript
/**
 * SEC EDGAR XBRL extensions.
 *
 * Two pure-retrieval functions for working with SEC's XBRL APIs at a
 * finer granularity than secFinancials (which returns the entire
 * company-facts blob):
 *
 *   1. secCompanyConcept(cik, taxonomy, tag, unit?)
 *        One company, one concept — returns the full time series for
 *        a single XBRL tag. Useful when the agent already knows the
 *        concept it wants (e.g. "Revenues") and only needs that, not
 *        all 300+ concepts the company reports.
 *
 *   2. secXbrlFrames(taxonomy, tag, unit, period)
 *        Cross-company snapshot at a point in time — every company
 *        that reported the concept for the supplied period frame
 *        (e.g. "CY2024Q4I", "CY2024"). The peer-benchmarking
 *        primitive: no LLM needed to pull "every bank's revenue for
 *        FY-2024 in one call".
 *
 * Both share the User-Agent and rate limiter exported by sec-edgar.ts
 * so there is no risk of doubling the outbound rate against SEC.
 * Every failure is translated to RetrievalError (Std 12).
 */

import { RetrievalError } from '../interface.js';
import { httpGet } from '../http-client.js';
import {
  SEC_HEADERS_JSON,
  secLimiter,
  parseCik,
  translateHttpError,
} from './sec-edgar.js';

const COMPANY_CONCEPT_BASE = 'https://data.sec.gov/api/xbrl/companyconcept';
const FRAMES_BASE = 'https://data.sec.gov/api/xbrl/frames';

const COMPANY_CONCEPT_MAX_BYTES = 16 * 1024 * 1024;
const FRAMES_MAX_BYTES = 64 * 1024 * 1024;

/* ------------------------------------------------------------------ *
 * Tool 8 — secCompanyConcept(cik, taxonomy, tag, unit?, period?)
 *
 * URL: data.sec.gov/api/xbrl/companyconcept/CIK{padded}/{taxonomy}/{tag}.json
 * Returns one concept's time series for one company. Lighter than
 * secFinancials when you already know which tag you want.
 *
 * Scope parameters (connector design principle — narrow-first):
 *   - `unit`: narrow to one unit bucket ("USD", "shares", ...).
 *   - `period`: narrow to rows matching a JobRequest-style period
 *       ("FY-2024", "Q3-2024", "latest-annual", "latest-quarter").
 *       Filtering happens here so the agent never has to wade through
 *       every reported period to find the one it asked for.
 * Both are optional; omit either for the full unfiltered slice on
 * that axis (discovery use).
 * ------------------------------------------------------------------ */

export interface SecConceptRow {
  readonly val: number;
  readonly start?: string;
  readonly end?: string;
  readonly fy?: number;
  readonly fp?: string;
  readonly form?: string;
  readonly filed?: string;
  readonly accn?: string;
  readonly frame?: string;
}

export interface SecCompanyConcept {
  readonly cik: string;
  readonly taxonomy: string;
  readonly tag: string;
  readonly label: string;
  readonly description: string;
  readonly units: Readonly<Record<string, readonly SecConceptRow[]>>;
  readonly unitFilter?: string;
  readonly periodFilter?: string;
  readonly sourceUrl: string;
  readonly capturedAt: string;
}

export async function secCompanyConcept(
  cik: string,
  taxonomy: string,
  tag: string,
  unit?: string,
  period?: string,
): Promise<SecCompanyConcept> {
  const padded = parseCik(cik);
  if (!padded) {
    throw new RetrievalError('invalid-request', `secCompanyConcept: not a valid CIK: "${cik}"`);
  }
  if (!isSafePathSegment(taxonomy)) {
    throw new RetrievalError('invalid-request', `secCompanyConcept: invalid taxonomy "${taxonomy}".`);
  }
  if (!isSafePathSegment(tag)) {
    throw new RetrievalError('invalid-request', `secCompanyConcept: invalid concept tag "${tag}".`);
  }
  const url = `${COMPANY_CONCEPT_BASE}/CIK${padded}/${taxonomy}/${tag}.json`;
  await secLimiter.acquire();
  let res;
  try {
    res = await httpGet(url, { headers: SEC_HEADERS_JSON, maxBodyBytes: COMPANY_CONCEPT_MAX_BYTES });
  } catch (err) {
    throw translateHttpError(err, url);
  }
  let parsed: { label?: unknown; description?: unknown; units?: unknown };
  try {
    parsed = JSON.parse(res.body);
  } catch (err) {
    throw new RetrievalError(
      'internal',
      `companyconcept JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
      { url },
    );
  }
  const label = typeof parsed.label === 'string' ? parsed.label : '';
  const description = typeof parsed.description === 'string' ? parsed.description : '';
  const rawUnits = (parsed.units && typeof parsed.units === 'object'
    ? parsed.units
    : {}) as Record<string, unknown>;
  const units: Record<string, readonly SecConceptRow[]> = {};
  for (const [k, v] of Object.entries(rawUnits)) {
    if (unit && k.toUpperCase() !== unit.toUpperCase()) continue;
    if (!Array.isArray(v)) continue;
    let rows = v.filter((r): r is SecConceptRow => isConceptRow(r));
    if (period) rows = applyPeriodFilter(rows, period);
    units[k] = rows;
  }
  return {
    cik: padded,
    taxonomy,
    tag,
    label,
    description,
    units,
    unitFilter: unit,
    periodFilter: period,
    sourceUrl: res.url,
    capturedAt: new Date().toISOString(),
  };
}

/* Period filter — interprets JobRequest-style period strings against
 * SEC's fy / fp row fields. Unknown strings pass through unfiltered
 * (the connector narrows where it can and reports otherwise; the
 * agent can still see all rows and decide). */
function applyPeriodFilter(rows: readonly SecConceptRow[], period: string): SecConceptRow[] {
  const fyOnly = /^FY-?(\d{4})$/i.exec(period);
  if (fyOnly) {
    const want = Number(fyOnly[1]);
    return rows.filter(r => r.fy === want && r.fp === 'FY');
  }
  const q = /^Q([1-4])-?(\d{4})$/i.exec(period);
  if (q) {
    const wantFp = `Q${q[1]}`;
    const wantFy = Number(q[2]);
    return rows.filter(r => r.fy === wantFy && r.fp === wantFp);
  }
  if (period.toLowerCase() === 'latest-annual') {
    const annuals = rows.filter(r => r.fp === 'FY' && typeof r.fy === 'number');
    if (annuals.length === 0) return [];
    const maxFy = annuals.reduce((m, r) => (r.fy! > m ? r.fy! : m), Number.NEGATIVE_INFINITY);
    return annuals.filter(r => r.fy === maxFy);
  }
  if (period.toLowerCase() === 'latest-quarter') {
    const quarters = rows.filter(r => /^Q[1-4]$/.test(r.fp ?? '') && typeof r.fy === 'number');
    if (quarters.length === 0) return [];
    const maxFy = quarters.reduce((m, r) => (r.fy! > m ? r.fy! : m), Number.NEGATIVE_INFINITY);
    const inMaxFy = quarters.filter(r => r.fy === maxFy);
    const qNum = (r: SecConceptRow) => parseInt((r.fp ?? 'Q0').slice(1), 10);
    const maxQ = inMaxFy.reduce((m, r) => Math.max(m, qNum(r)), 0);
    return inMaxFy.filter(r => qNum(r) === maxQ);
  }
  return [...rows];
}

/* ------------------------------------------------------------------ *
 * Tool 9 — secXbrlFrames(taxonomy, tag, unit, period)
 *
 * URL: data.sec.gov/api/xbrl/frames/{taxonomy}/{tag}/{unit}/{period}.json
 *
 * Period encoding (SEC convention):
 *   - "CY2024"       calendar year duration (income statement items)
 *   - "CY2024Q1"     calendar Q1 duration
 *   - "CY2024Q1I"    instantaneous Q1-end (balance sheet items)
 *   - "CY2024Q4I"    instantaneous year-end (balance sheet items)
 *
 * Returns one row per company that reported the concept in that
 * period — the peer-benchmarking primitive.
 * ------------------------------------------------------------------ */

export interface SecFrameRow {
  readonly accn: string;
  readonly cik: number;
  readonly entityName: string;
  readonly loc: string;
  readonly start?: string;
  readonly end: string;
  readonly val: number;
}

export interface SecXbrlFrames {
  readonly taxonomy: string;
  readonly tag: string;
  readonly unit: string;
  readonly period: string;
  readonly label: string;
  readonly description: string;
  readonly pts: number;
  readonly rows: readonly SecFrameRow[];
  readonly sourceUrl: string;
  readonly capturedAt: string;
}

export async function secXbrlFrames(
  taxonomy: string,
  tag: string,
  unit: string,
  period: string,
): Promise<SecXbrlFrames> {
  if (!isSafePathSegment(taxonomy)) {
    throw new RetrievalError('invalid-request', `secXbrlFrames: invalid taxonomy "${taxonomy}".`);
  }
  if (!isSafePathSegment(tag)) {
    throw new RetrievalError('invalid-request', `secXbrlFrames: invalid concept tag "${tag}".`);
  }
  if (!isSafePathSegment(unit)) {
    throw new RetrievalError('invalid-request', `secXbrlFrames: invalid unit "${unit}".`);
  }
  if (!/^CY\d{4}(Q[1-4])?I?$/.test(period)) {
    throw new RetrievalError(
      'invalid-request',
      `secXbrlFrames: period must look like "CY2024", "CY2024Q1", or "CY2024Q4I" — got "${period}".`,
    );
  }
  const url = `${FRAMES_BASE}/${taxonomy}/${tag}/${unit}/${period}.json`;
  await secLimiter.acquire();
  let res;
  try {
    res = await httpGet(url, { headers: SEC_HEADERS_JSON, maxBodyBytes: FRAMES_MAX_BYTES });
  } catch (err) {
    throw translateHttpError(err, url);
  }
  let parsed: { label?: unknown; description?: unknown; pts?: unknown; data?: unknown };
  try {
    parsed = JSON.parse(res.body);
  } catch (err) {
    throw new RetrievalError(
      'internal',
      `frames JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
      { url },
    );
  }
  const label = typeof parsed.label === 'string' ? parsed.label : '';
  const description = typeof parsed.description === 'string' ? parsed.description : '';
  const pts = typeof parsed.pts === 'number' ? parsed.pts : 0;
  const rawData = Array.isArray(parsed.data) ? parsed.data : [];
  const rows: SecFrameRow[] = [];
  for (const raw of rawData) {
    if (!raw || typeof raw !== 'object') continue;
    const r = raw as Record<string, unknown>;
    const val = typeof r.val === 'number' ? r.val : null;
    const cik = typeof r.cik === 'number' ? r.cik : null;
    const end = typeof r.end === 'string' ? r.end : null;
    if (val === null || cik === null || !end) continue;
    rows.push({
      accn: typeof r.accn === 'string' ? r.accn : '',
      cik,
      entityName: typeof r.entityName === 'string' ? r.entityName : '',
      loc: typeof r.loc === 'string' ? r.loc : '',
      start: typeof r.start === 'string' ? r.start : undefined,
      end,
      val,
    });
  }
  return {
    taxonomy,
    tag,
    unit,
    period,
    label,
    description,
    pts,
    rows,
    sourceUrl: res.url,
    capturedAt: new Date().toISOString(),
  };
}

/* ------------------------------------------------------------------ *
 * Anthropic tool descriptors.
 * ------------------------------------------------------------------ */

export interface SecXbrlToolDescriptor {
  readonly name: string;
  readonly description: string;
  readonly input_schema: {
    readonly type: 'object';
    readonly properties: Record<string, { type: string; description: string }>;
    readonly required: readonly string[];
  };
}

export const SEC_XBRL_TOOLS: readonly SecXbrlToolDescriptor[] = [
  {
    name: 'sec_company_concept',
    description:
      'Fetch one XBRL concept\'s time series for one company. Lighter than sec_financials when you ' +
      'already know the tag you need (e.g. "Revenues", "Assets", "NetIncomeLoss"). Returns rows ' +
      '{val, start, end, fy, fp, form, accn} per unit (USD, shares, USD/shares, ...).\n' +
      'When the JobRequest names a target period, pass it via the `period` argument so the tool ' +
      'filters the time series to just the matching rows (narrow-first connector design). Skipping ' +
      '`period` returns the full series — only do that for genuine discovery.',
    input_schema: {
      type: 'object',
      properties: {
        cik: { type: 'string', description: 'CIK (any numeric form; will be padded).' },
        taxonomy: { type: 'string', description: 'Taxonomy, typically "us-gaap" (also "dei", "ifrs-full", "srt").' },
        tag: { type: 'string', description: 'XBRL concept tag, e.g. "Revenues".' },
        unit: { type: 'string', description: 'Optional unit narrow (e.g. "USD"). Omit for all unit buckets.' },
        period: {
          type: 'string',
          description:
            'Optional period narrow in JobRequest form: "FY-2024" (annual), "Q3-2024" (specific quarter), ' +
            '"latest-annual" (most recent FY row), "latest-quarter" (most recent quarter row). Omit to ' +
            'return the full time series. Unknown values pass through unfiltered.',
        },
      },
      required: ['cik', 'taxonomy', 'tag'],
    },
  },
  {
    name: 'sec_xbrl_frames',
    description:
      'Cross-company snapshot for one XBRL concept at one period frame. Returns every company that ' +
      'reported the concept for the period. Use period encoding "CY2024" (annual duration), "CY2024Q1" ' +
      '(quarter duration), or "CY2024Q4I" (year-end instantaneous balance). Output rows ' +
      '{cik, entityName, val, end, accn} — the peer-benchmarking primitive.',
    input_schema: {
      type: 'object',
      properties: {
        taxonomy: { type: 'string', description: 'Taxonomy, typically "us-gaap".' },
        tag: { type: 'string', description: 'XBRL concept tag, e.g. "Revenues".' },
        unit: { type: 'string', description: 'Unit (e.g. "USD").' },
        period: {
          type: 'string',
          description: 'Period frame, e.g. "CY2024", "CY2024Q1", "CY2024Q4I" (balance-sheet instantaneous).',
        },
      },
      required: ['taxonomy', 'tag', 'unit', 'period'],
    },
  },
];

export interface SecXbrlToolResult {
  readonly ok: boolean;
  readonly result?: unknown;
  readonly error?: { readonly category: string; readonly message: string };
}

export async function executeSecXbrlTool(name: string, rawInput: unknown): Promise<SecXbrlToolResult> {
  const input = (rawInput && typeof rawInput === 'object') ? (rawInput as Record<string, unknown>) : {};
  try {
    switch (name) {
      case 'sec_company_concept': {
        const cik = typeof input.cik === 'string' ? input.cik : '';
        const taxonomy = typeof input.taxonomy === 'string' ? input.taxonomy : '';
        const tag = typeof input.tag === 'string' ? input.tag : '';
        const unit = typeof input.unit === 'string' ? input.unit : undefined;
        const period = typeof input.period === 'string' ? input.period : undefined;
        return { ok: true, result: await secCompanyConcept(cik, taxonomy, tag, unit, period) };
      }
      case 'sec_xbrl_frames': {
        const taxonomy = typeof input.taxonomy === 'string' ? input.taxonomy : '';
        const tag = typeof input.tag === 'string' ? input.tag : '';
        const unit = typeof input.unit === 'string' ? input.unit : '';
        const period = typeof input.period === 'string' ? input.period : '';
        return { ok: true, result: await secXbrlFrames(taxonomy, tag, unit, period) };
      }
      default:
        return { ok: false, error: { category: 'unknown-tool', message: `unknown SEC XBRL tool "${name}"` } };
    }
  } catch (err) {
    if (err instanceof RetrievalError) {
      return { ok: false, error: { category: err.category, message: err.message } };
    }
    return {
      ok: false,
      error: { category: 'internal', message: err instanceof Error ? err.message : String(err) },
    };
  }
}

/* ------------------------------------------------------------------ *
 * Helpers
 * ------------------------------------------------------------------ */

function isSafePathSegment(s: string): boolean {
  return typeof s === 'string' && /^[A-Za-z][A-Za-z0-9._\-]*$/.test(s);
}

function isConceptRow(r: unknown): r is SecConceptRow {
  if (!r || typeof r !== 'object') return false;
  const o = r as Record<string, unknown>;
  return typeof o.val === 'number';
}