BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/connectors/sec-edgar-insider.ts 17,719 bytes · typescript
/**
 * SEC EDGAR insider / ownership forms.
 *
 * One pure-retrieval function — `secInsiderForm4` — for pulling Form 4
 * filings (insider trading reports). Two modes:
 *
 *   - List mode (default): returns the list of recent Form 4 filings
 *     for a CIK (one HTTP call, fast, cheap). Useful when the agent
 *     only needs accession numbers + filed dates.
 *
 *   - Detail mode (`detail: true`): for each filing in the window,
 *     fetches the underlying Form 4 XML body and parses out the
 *     non-derivative and derivative transactions (date, code, shares,
 *     price, A/D, ownership). Deterministic regex parser on SEC's
 *     fixed XML schema — no LLM in the loop.
 *
 * Costs: list mode = 1 HTTP call. Detail mode = 1 (submissions) +
 * 2 × N (filing index + XML for each of N filings, capped via
 * `maxFilings`, default 10). Stays under SEC's 10 req/s via the shared
 * `secLimiter` exported by sec-edgar.ts.
 *
 * Form 4 XML structure (relevant excerpt):
 *   <ownershipDocument>
 *     <issuer><issuerCik/><issuerName/><issuerTradingSymbol/></issuer>
 *     <reportingOwner>
 *       <reportingOwnerId><rptOwnerCik/><rptOwnerName/></reportingOwnerId>
 *       <reportingOwnerRelationship><isDirector/><isOfficer/><officerTitle/></reportingOwnerRelationship>
 *     </reportingOwner>
 *     <nonDerivativeTable>
 *       <nonDerivativeTransaction>
 *         <securityTitle><value/></securityTitle>
 *         <transactionDate><value/></transactionDate>
 *         <transactionCoding><transactionCode/></transactionCoding>
 *         <transactionAmounts>
 *           <transactionShares><value/></transactionShares>
 *           <transactionPricePerShare><value/></transactionPricePerShare>
 *           <transactionAcquiredDisposedCode><value/></transactionAcquiredDisposedCode>
 *         </transactionAmounts>
 *         <postTransactionAmounts><sharesOwnedFollowingTransaction><value/></sharesOwnedFollowingTransaction></postTransactionAmounts>
 *         <ownershipNature><directOrIndirectOwnership><value/></directOrIndirectOwnership></ownershipNature>
 *       </nonDerivativeTransaction>
 *     </nonDerivativeTable>
 *     <derivativeTable> ... </derivativeTable>
 *   </ownershipDocument>
 */

import { RetrievalError } from '../interface.js';
import { httpGet } from '../http-client.js';
import {
  SEC_HEADERS_JSON,
  SEC_HEADERS_ANY,
  secLimiter,
  parseCik,
  translateHttpError,
} from './sec-edgar.js';
import { secSubmissions } from './sec-edgar.js';
import { secFilingIndex, type SecFilingFile } from './sec-edgar-filings.js';

const FORM4_XML_MAX_BYTES = 4 * 1024 * 1024;

const DEFAULT_MAX_FILINGS = 10;
const HARD_MAX_FILINGS = 100;

/* ------------------------------------------------------------------ *
 * Tool 10 — secInsiderForm4(cik, opts?)
 * ------------------------------------------------------------------ */

export interface SecForm4Transaction {
  readonly tableKind: 'non-derivative' | 'derivative';
  readonly securityTitle: string;
  readonly transactionDate: string;
  readonly transactionCode: string;
  readonly equitySwapInvolved?: boolean;
  readonly shares: number | null;
  readonly pricePerShare: number | null;
  readonly acquiredOrDisposed: 'A' | 'D' | '';
  readonly sharesOwnedFollowing: number | null;
  readonly ownershipNature: 'D' | 'I' | '';
  readonly natureOfIndirectOwnership?: string;
  readonly conversionOrExercisePrice?: number | null;
  readonly exerciseDate?: string;
  readonly expirationDate?: string;
}

export interface SecForm4Filing {
  readonly accessionNumber: string;
  readonly filingDate: string;
  readonly reportDate: string;
  readonly form: string;
  readonly primaryDocument: string;
  readonly primaryDocumentUrl: string;
  /* Populated only when `detail: true`. */
  readonly xmlUrl?: string;
  readonly issuer?: { readonly cik: string; readonly name: string; readonly ticker: string };
  readonly reportingOwner?: {
    readonly cik: string;
    readonly name: string;
    readonly isDirector: boolean;
    readonly isOfficer: boolean;
    readonly officerTitle: string;
    readonly isTenPercentOwner: boolean;
    readonly isOther: boolean;
  };
  readonly transactions?: readonly SecForm4Transaction[];
  readonly parseError?: string;
}

export interface SecInsiderForm4Result {
  readonly cik: string;
  readonly detail: boolean;
  readonly filings: readonly SecForm4Filing[];
  readonly dateFrom?: string;
  readonly dateTo?: string;
  readonly capturedAt: string;
}

export interface SecInsiderForm4Options {
  /** Default false — list mode only (filings, no transactions). */
  readonly detail?: boolean;
  /** Inclusive lower bound on filing date (YYYY-MM-DD). */
  readonly dateFrom?: string;
  /** Inclusive upper bound on filing date (YYYY-MM-DD). */
  readonly dateTo?: string;
  /** How many filings to surface (and to parse when detail=true). Default 10, hard cap 100. */
  readonly maxFilings?: number;
}

export async function secInsiderForm4(
  cik: string,
  opts: SecInsiderForm4Options = {},
): Promise<SecInsiderForm4Result> {
  const padded = parseCik(cik);
  if (!padded) {
    throw new RetrievalError('invalid-request', `secInsiderForm4: not a valid CIK: "${cik}"`);
  }
  const cap = Math.max(1, Math.min(HARD_MAX_FILINGS, opts.maxFilings ?? DEFAULT_MAX_FILINGS));
  const dateFrom = opts.dateFrom ?? '';
  const dateTo = opts.dateTo ?? '';

  const subs = await secSubmissions(padded, '4');
  let filings = subs.filings.filter(f => f.form === '4');
  if (dateFrom) filings = filings.filter(f => f.filingDate >= dateFrom);
  if (dateTo) filings = filings.filter(f => f.filingDate <= dateTo);
  filings = filings.slice(0, cap);

  if (!opts.detail) {
    return {
      cik: padded,
      detail: false,
      filings: filings.map(f => ({
        accessionNumber: f.accessionNumber,
        filingDate: f.filingDate,
        reportDate: f.reportDate,
        form: f.form,
        primaryDocument: f.primaryDocument,
        primaryDocumentUrl: f.primaryDocumentUrl,
      })),
      dateFrom: dateFrom || undefined,
      dateTo: dateTo || undefined,
      capturedAt: new Date().toISOString(),
    };
  }

  /* Detail mode: for each filing, locate the XML and parse it. */
  const detailed: SecForm4Filing[] = [];
  for (const f of filings) {
    const base: SecForm4Filing = {
      accessionNumber: f.accessionNumber,
      filingDate: f.filingDate,
      reportDate: f.reportDate,
      form: f.form,
      primaryDocument: f.primaryDocument,
      primaryDocumentUrl: f.primaryDocumentUrl,
    };
    try {
      const idx = await secFilingIndex(padded, f.accessionNumber);
      const xmlFile = pickForm4Xml(idx.files);
      if (!xmlFile) {
        detailed.push({ ...base, parseError: 'no XML body file found in filing index' });
        continue;
      }
      await secLimiter.acquire();
      let res;
      try {
        res = await httpGet(xmlFile.url, { headers: SEC_HEADERS_ANY, maxBodyBytes: FORM4_XML_MAX_BYTES });
      } catch (err) {
        throw translateHttpError(err, xmlFile.url);
      }
      const parsed = parseForm4Xml(res.body);
      detailed.push({
        ...base,
        xmlUrl: xmlFile.url,
        issuer: parsed.issuer,
        reportingOwner: parsed.reportingOwner,
        transactions: parsed.transactions,
      });
    } catch (err) {
      detailed.push({
        ...base,
        parseError: err instanceof Error ? err.message : String(err),
      });
    }
  }

  return {
    cik: padded,
    detail: true,
    filings: detailed,
    dateFrom: dateFrom || undefined,
    dateTo: dateTo || undefined,
    capturedAt: new Date().toISOString(),
  };
}

/* ------------------------------------------------------------------ *
 * Form 4 XML parser — deterministic, no XML library.
 *
 * Form 4 XML is a tightly-scoped schema with a fixed element set. We
 * pick out the elements we need with anchored regexes; an XML parser
 * is overkill for a flat schema and avoids the npm dependency. The
 * parser is forgiving — missing fields become empty strings / nulls
 * rather than throwing.
 * ------------------------------------------------------------------ */

interface ParsedForm4 {
  readonly issuer?: SecForm4Filing['issuer'];
  readonly reportingOwner?: SecForm4Filing['reportingOwner'];
  readonly transactions: readonly SecForm4Transaction[];
}

function parseForm4Xml(xml: string): ParsedForm4 {
  const issuer = parseIssuer(xml);
  const reportingOwner = parseReportingOwner(xml);
  const txs: SecForm4Transaction[] = [];
  for (const block of iterateBlocks(xml, 'nonDerivativeTransaction')) {
    const t = parseNonDerivativeTransaction(block);
    if (t) txs.push(t);
  }
  for (const block of iterateBlocks(xml, 'derivativeTransaction')) {
    const t = parseDerivativeTransaction(block);
    if (t) txs.push(t);
  }
  return { issuer, reportingOwner, transactions: txs };
}

function parseIssuer(xml: string): SecForm4Filing['issuer'] {
  const blk = firstBlock(xml, 'issuer');
  if (!blk) return undefined;
  return {
    cik: textOf(blk, 'issuerCik') ?? '',
    name: textOf(blk, 'issuerName') ?? '',
    ticker: textOf(blk, 'issuerTradingSymbol') ?? '',
  };
}

function parseReportingOwner(xml: string): SecForm4Filing['reportingOwner'] {
  const blk = firstBlock(xml, 'reportingOwner');
  if (!blk) return undefined;
  const idBlk = firstBlock(blk, 'reportingOwnerId') ?? '';
  const relBlk = firstBlock(blk, 'reportingOwnerRelationship') ?? '';
  return {
    cik: textOf(idBlk, 'rptOwnerCik') ?? '',
    name: textOf(idBlk, 'rptOwnerName') ?? '',
    isDirector: boolText(textOf(relBlk, 'isDirector')),
    isOfficer: boolText(textOf(relBlk, 'isOfficer')),
    officerTitle: textOf(relBlk, 'officerTitle') ?? '',
    isTenPercentOwner: boolText(textOf(relBlk, 'isTenPercentOwner')),
    isOther: boolText(textOf(relBlk, 'isOther')),
  };
}

function parseNonDerivativeTransaction(block: string): SecForm4Transaction | null {
  const securityTitle = valueOf(block, 'securityTitle') ?? '';
  const transactionDate = valueOf(block, 'transactionDate') ?? '';
  const transactionCode = textOf(block, 'transactionCode') ?? '';
  const sharesStr = valueOf(block, 'transactionShares');
  const priceStr = valueOf(block, 'transactionPricePerShare');
  const adCode = valueOf(block, 'transactionAcquiredDisposedCode') ?? '';
  const postShares = valueOf(block, 'sharesOwnedFollowingTransaction');
  const ownership = valueOf(block, 'directOrIndirectOwnership') ?? '';
  const indirectNature = valueOf(block, 'natureOfOwnership');
  return {
    tableKind: 'non-derivative',
    securityTitle,
    transactionDate,
    transactionCode,
    shares: parseNum(sharesStr),
    pricePerShare: parseNum(priceStr),
    acquiredOrDisposed: (adCode === 'A' || adCode === 'D') ? adCode : '',
    sharesOwnedFollowing: parseNum(postShares),
    ownershipNature: (ownership === 'D' || ownership === 'I') ? ownership : '',
    natureOfIndirectOwnership: indirectNature || undefined,
  };
}

function parseDerivativeTransaction(block: string): SecForm4Transaction | null {
  const securityTitle = valueOf(block, 'securityTitle') ?? '';
  const transactionDate = valueOf(block, 'transactionDate') ?? '';
  const transactionCode = textOf(block, 'transactionCode') ?? '';
  const equitySwap = boolText(textOf(block, 'equitySwapInvolved'));
  const sharesStr = valueOf(block, 'transactionShares');
  const priceStr = valueOf(block, 'transactionPricePerShare');
  const adCode = valueOf(block, 'transactionAcquiredDisposedCode') ?? '';
  const postShares = valueOf(block, 'sharesOwnedFollowingTransaction');
  const ownership = valueOf(block, 'directOrIndirectOwnership') ?? '';
  const conv = valueOf(block, 'conversionOrExercisePrice');
  const exDate = valueOf(block, 'exerciseDate');
  const expDate = valueOf(block, 'expirationDate');
  return {
    tableKind: 'derivative',
    securityTitle,
    transactionDate,
    transactionCode,
    equitySwapInvolved: equitySwap || undefined,
    shares: parseNum(sharesStr),
    pricePerShare: parseNum(priceStr),
    acquiredOrDisposed: (adCode === 'A' || adCode === 'D') ? adCode : '',
    sharesOwnedFollowing: parseNum(postShares),
    ownershipNature: (ownership === 'D' || ownership === 'I') ? ownership : '',
    conversionOrExercisePrice: parseNum(conv),
    exerciseDate: exDate || undefined,
    expirationDate: expDate || undefined,
  };
}

/** Pick the Form 4 XML file from a filing's index. SEC names it
 *  "wf-form4_<digits>.xml", "primary_doc.xml", or similar. */
function pickForm4Xml(files: readonly SecFilingFile[]): SecFilingFile | null {
  /* Prefer files named "*form4*.xml", else any .xml that isn't an exhibit. */
  const preferred = files.find(f => /form4.*\.xml$/i.test(f.name));
  if (preferred) return preferred;
  const fallback = files.find(f => /\.xml$/i.test(f.name) && !/exhibit/i.test(f.name));
  return fallback ?? null;
}

/* ----- tiny XML helpers (single-pass regex, no library) ----- */

function escapeTag(tag: string): string {
  return tag.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function firstBlock(xml: string, tag: string): string | null {
  const re = new RegExp(`<${escapeTag(tag)}\\b[^>]*>([\\s\\S]*?)</${escapeTag(tag)}>`, 'i');
  const m = xml.match(re);
  return m ? m[1]! : null;
}

function* iterateBlocks(xml: string, tag: string): Generator<string> {
  const re = new RegExp(`<${escapeTag(tag)}\\b[^>]*>([\\s\\S]*?)</${escapeTag(tag)}>`, 'gi');
  let m: RegExpExecArray | null;
  while ((m = re.exec(xml)) !== null) yield m[1]!;
}

/** Get the text content of an element, stripping any inner tags. */
function textOf(xml: string, tag: string): string | null {
  const blk = firstBlock(xml, tag);
  if (blk === null) return null;
  return blk.replace(/<[^>]+>/g, '').trim();
}

/** SEC's <foo><value>X</value></foo> shape: pull just the <value>. */
function valueOf(xml: string, tag: string): string | null {
  const blk = firstBlock(xml, tag);
  if (blk === null) return null;
  const v = blk.match(/<value\b[^>]*>([\s\S]*?)<\/value>/i);
  if (v) return v[1]!.replace(/<[^>]+>/g, '').trim();
  return blk.replace(/<[^>]+>/g, '').trim();
}

function boolText(s: string | null | undefined): boolean {
  if (!s) return false;
  const t = s.trim().toLowerCase();
  return t === '1' || t === 'true' || t === 'yes';
}

function parseNum(s: string | null | undefined): number | null {
  if (s === null || s === undefined) return null;
  const cleaned = s.replace(/,/g, '').trim();
  if (!cleaned) return null;
  const n = Number(cleaned);
  return Number.isFinite(n) ? n : null;
}

/* ------------------------------------------------------------------ *
 * Anthropic tool descriptor.
 * ------------------------------------------------------------------ */

export interface SecInsiderToolDescriptor {
  readonly name: string;
  readonly description: string;
  readonly input_schema: {
    readonly type: 'object';
    readonly properties: Record<string, { type: string; description: string }>;
    readonly required: readonly string[];
  };
}

export const SEC_INSIDER_TOOLS: readonly SecInsiderToolDescriptor[] = [
  {
    name: 'sec_insider_form4',
    description:
      "Fetch Form 4 (insider trading) filings for a CIK. Default 'list mode' returns just the filing " +
      'metadata (accession, filed date, primary doc URL). Pass `detail: true` to also fetch each filing\'s ' +
      'XML body and parse it into structured transactions (transactionDate, code, shares, pricePerShare, ' +
      'acquiredOrDisposed, ownershipNature). Optionally narrow by filing date range; capped by ' +
      "`maxFilings` (default 10, hard cap 100).",
    input_schema: {
      type: 'object',
      properties: {
        cik: { type: 'string', description: 'Issuer CIK (any numeric form; will be padded).' },
        detail: { type: 'boolean', description: 'true to parse XML bodies; default false (list only).' },
        dateFrom: { type: 'string', description: 'Optional inclusive lower bound on filing date (YYYY-MM-DD).' },
        dateTo: { type: 'string', description: 'Optional inclusive upper bound on filing date (YYYY-MM-DD).' },
        maxFilings: { type: 'number', description: 'How many filings to surface; default 10, hard cap 100.' },
      },
      required: ['cik'],
    },
  },
];

export interface SecInsiderToolResult {
  readonly ok: boolean;
  readonly result?: unknown;
  readonly error?: { readonly category: string; readonly message: string };
}

export async function executeSecInsiderTool(
  name: string,
  rawInput: unknown,
): Promise<SecInsiderToolResult> {
  const input = (rawInput && typeof rawInput === 'object') ? (rawInput as Record<string, unknown>) : {};
  try {
    switch (name) {
      case 'sec_insider_form4': {
        const cik = typeof input.cik === 'string' ? input.cik : '';
        const detail = typeof input.detail === 'boolean' ? input.detail : undefined;
        const dateFrom = typeof input.dateFrom === 'string' ? input.dateFrom : undefined;
        const dateTo = typeof input.dateTo === 'string' ? input.dateTo : undefined;
        const maxFilings = typeof input.maxFilings === 'number' ? input.maxFilings : undefined;
        return {
          ok: true,
          result: await secInsiderForm4(cik, { detail, dateFrom, dateTo, maxFilings }),
        };
      }
      default:
        return { ok: false, error: { category: 'unknown-tool', message: `unknown SEC insider tool "${name}"` } };
    }
  } catch (err) {
    if (err instanceof RetrievalError) {
      return { ok: false, error: { category: err.category, message: err.message } };
    }
    return {
      ok: false,
      error: { category: 'internal', message: err instanceof Error ? err.message : String(err) },
    };
  }
}