BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/connectors/finra-brokercheck.ts 6,710 bytes · typescript
/**
 * FINRA BrokerCheck connector.
 *
 * Returns broker-dealer firm and individual broker registration records
 * + disciplinary history from FINRA's public BrokerCheck search.
 * Backed by https://api.brokercheck.finra.org/search/{individual|firm}.
 *
 * No authentication required. The endpoint is public (used by the
 * brokercheck.finra.org web UI) but a descriptive User-Agent is sent
 * as a courtesy.
 *
 * Search modes (selected via `params.query.type`):
 *   - "firm"        — firm / broker-dealer registration lookup
 *   - "individual"  — individual broker / registered rep lookup (default)
 *   - "iapd-firm"   — IAPD-side firm lookup via the same gateway
 *   - "iapd-individual" — IAPD-side individual lookup
 *
 * The query string is taken from params.entity.id (firm name, CRD,
 * SEC#, or individual name). Result is the upstream Solr-style JSON
 * with `hits.total` and `hits.hits[]._source` entries.
 */

import {
  RetrievalError,
  type FetchParams,
  type RawPayload,
  type RetrievalConnector,
} from '../interface.js';
import { httpGet, HttpError } from '../http-client.js';
import { RateLimiter } from '../rate-limiter.js';

const USER_AGENT = 'MR mitchell.roy@sia-partners.com';
const SEARCH_BASE = 'https://api.brokercheck.finra.org/search';
const RESPONSE_MAX_BYTES = 16 * 1024 * 1024;

/** FINRA's published guidance is "be reasonable"; 5 req/s is well
 *  under what the BrokerCheck UI itself fires. */
const limiter = new RateLimiter({ requestsPerSecond: 5, burstSize: 5 });

type SearchMode = 'firm' | 'individual' | 'iapd-firm' | 'iapd-individual';

const MODE_PATHS: Record<SearchMode, string> = {
  firm: 'firm',
  individual: 'individual',
  'iapd-firm': 'iapd/firm',
  'iapd-individual': 'iapd/individual',
};

export interface FinraBrokerCheckHit {
  readonly id: string;
  readonly type: string;
  readonly score: number;
  readonly source: Readonly<Record<string, unknown>>;
}

export interface FinraBrokerCheckResult {
  readonly query: string;
  readonly mode: SearchMode;
  readonly totalHits: number;
  readonly hits: readonly FinraBrokerCheckHit[];
  readonly sourceUrl: string;
  readonly capturedAt: string;
}

export class FinraBrokerCheckConnector implements RetrievalConnector {
  readonly name = 'finra-brokercheck';
  readonly authRequired = false;
  readonly rateLimit = { requestsPerSecond: 5, burstSize: 5 };

  async isAvailable(): Promise<boolean> {
    return true;
  }

  async fetch(params: FetchParams): Promise<RawPayload> {
    const query = params.entity?.id?.trim();
    if (!query) {
      throw new RetrievalError(
        'invalid-request',
        'finra-brokercheck: entity.id (search query — firm name, individual name, CRD) is required.',
      );
    }
    const mode = parseMode(params.query?.type);
    const maxHits = clampHits(params.query?.maxHits);
    const result = await fetchBrokerCheck(query, mode, maxHits);
    return {
      source: this.name,
      sourceUrl: result.sourceUrl,
      capturedAt: result.capturedAt,
      contentType: 'application/json',
      rawContent: JSON.stringify(result),
      metadata: { mode, totalHits: result.totalHits, returned: result.hits.length },
    };
  }
}

export async function fetchBrokerCheck(
  query: string,
  mode: SearchMode = 'individual',
  maxHits = 25,
): Promise<FinraBrokerCheckResult> {
  if (!query || typeof query !== 'string') {
    throw new RetrievalError('invalid-request', 'fetchBrokerCheck: query is required.');
  }
  const path = MODE_PATHS[mode];
  if (!path) {
    throw new RetrievalError('invalid-request', `fetchBrokerCheck: unknown mode "${mode}".`);
  }
  const params = new URLSearchParams({
    query,
    hl: 'true',
    nrows: String(clampHits(maxHits)),
    start: '0',
    r: '25',
    sort: 'score desc',
    wt: 'json',
  });
  const url = `${SEARCH_BASE}/${path}?${params.toString()}`;
  await limiter.acquire();
  let res;
  try {
    res = await httpGet(url, {
      headers: { 'User-Agent': USER_AGENT, Accept: 'application/json' },
      maxBodyBytes: RESPONSE_MAX_BYTES,
    });
  } catch (err) {
    throw translateHttpError(err, url);
  }
  let parsed: { hits?: { total?: unknown; hits?: unknown[] } };
  try {
    parsed = JSON.parse(res.body);
  } catch (err) {
    throw new RetrievalError(
      'internal',
      `brokercheck JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
      { url },
    );
  }
  const rawHits = Array.isArray(parsed.hits?.hits) ? parsed.hits.hits : [];
  const total =
    typeof parsed.hits?.total === 'number'
      ? parsed.hits.total
      : typeof (parsed.hits?.total as { value?: number } | undefined)?.value === 'number'
        ? (parsed.hits!.total as { value: number }).value
        : rawHits.length;
  const hits: FinraBrokerCheckHit[] = [];
  for (const rh of rawHits) {
    if (!rh || typeof rh !== 'object') continue;
    const h = rh as { _id?: unknown; _type?: unknown; _score?: unknown; _source?: unknown };
    hits.push({
      id: typeof h._id === 'string' ? h._id : '',
      type: typeof h._type === 'string' ? h._type : '',
      score: typeof h._score === 'number' ? h._score : 0,
      source: h._source && typeof h._source === 'object'
        ? (h._source as Record<string, unknown>)
        : {},
    });
  }
  return {
    query,
    mode,
    totalHits: total,
    hits,
    sourceUrl: url,
    capturedAt: new Date().toISOString(),
  };
}

function parseMode(raw: unknown): SearchMode {
  if (typeof raw === 'string' && raw in MODE_PATHS) return raw as SearchMode;
  return 'individual';
}

function clampHits(raw: unknown): number {
  if (typeof raw !== 'number' || !Number.isFinite(raw) || raw <= 0) return 25;
  return Math.min(Math.floor(raw), 100);
}

function translateHttpError(err: unknown, url: string): RetrievalError {
  if (err instanceof HttpError) {
    switch (err.category) {
      case 'timeout':
      case 'network':
      case 'aborted':
        return new RetrievalError('unavailable', err.message, { url });
      case 'body-too-large':
        return new RetrievalError('internal', err.message, { url });
      case 'status':
        if (err.status === 404) return new RetrievalError('no-content', err.message, { url });
        if (err.status === 401 || err.status === 403) return new RetrievalError('auth-failed', err.message, { url });
        if (err.status === 429) return new RetrievalError('rate-limited', err.message, { url });
        if (err.status === 400) return new RetrievalError('invalid-request', err.message, { url });
        return new RetrievalError('internal', err.message, { url });
    }
  }
  return new RetrievalError('internal', err instanceof Error ? err.message : String(err), { url });
}