BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/connectors/iapd.ts 5,978 bytes · typescript
/**
 * Investment Adviser Public Disclosure (IAPD) connector.
 *
 * Returns investment-adviser registration records and Form ADV
 * filings. IAPD shares search infrastructure with FINRA BrokerCheck;
 * the IAPD-specific endpoints under api.brokercheck.finra.org/search
 * are the public ones used by the IAPD web UI at adviserinfo.sec.gov.
 *
 * No authentication required.
 *
 * Search modes (selected via `params.query.type`):
 *   - "firm"        — adviser firm lookup (default)
 *   - "individual"  — individual investment adviser representative lookup
 *
 * The query string is taken from params.entity.id (firm name, IARD #,
 * SEC #, individual name, or CRD).
 */

import {
  RetrievalError,
  type FetchParams,
  type RawPayload,
  type RetrievalConnector,
} from '../interface.js';
import { httpGet, HttpError } from '../http-client.js';
import { RateLimiter } from '../rate-limiter.js';

const USER_AGENT = 'MR mitchell.roy@sia-partners.com';
const SEARCH_BASE = 'https://api.brokercheck.finra.org/search/iapd';
const RESPONSE_MAX_BYTES = 16 * 1024 * 1024;

const limiter = new RateLimiter({ requestsPerSecond: 5, burstSize: 5 });

type IapdMode = 'firm' | 'individual';

export interface IapdHit {
  readonly id: string;
  readonly type: string;
  readonly score: number;
  readonly source: Readonly<Record<string, unknown>>;
}

export interface IapdResult {
  readonly query: string;
  readonly mode: IapdMode;
  readonly totalHits: number;
  readonly hits: readonly IapdHit[];
  readonly sourceUrl: string;
  readonly capturedAt: string;
}

export class IapdConnector implements RetrievalConnector {
  readonly name = 'iapd';
  readonly authRequired = false;
  readonly rateLimit = { requestsPerSecond: 5, burstSize: 5 };

  async isAvailable(): Promise<boolean> {
    return true;
  }

  async fetch(params: FetchParams): Promise<RawPayload> {
    const query = params.entity?.id?.trim();
    if (!query) {
      throw new RetrievalError(
        'invalid-request',
        'iapd: entity.id (search query — firm name, IARD #, individual name, CRD) is required.',
      );
    }
    const mode = parseMode(params.query?.type);
    const maxHits = clampHits(params.query?.maxHits);
    const result = await fetchIapd(query, mode, maxHits);
    return {
      source: this.name,
      sourceUrl: result.sourceUrl,
      capturedAt: result.capturedAt,
      contentType: 'application/json',
      rawContent: JSON.stringify(result),
      metadata: { mode, totalHits: result.totalHits, returned: result.hits.length },
    };
  }
}

export async function fetchIapd(
  query: string,
  mode: IapdMode = 'firm',
  maxHits = 25,
): Promise<IapdResult> {
  if (!query || typeof query !== 'string') {
    throw new RetrievalError('invalid-request', 'fetchIapd: query is required.');
  }
  if (mode !== 'firm' && mode !== 'individual') {
    throw new RetrievalError('invalid-request', `fetchIapd: unknown mode "${mode}".`);
  }
  const params = new URLSearchParams({
    query,
    hl: 'true',
    nrows: String(clampHits(maxHits)),
    start: '0',
    r: '25',
    sort: 'score desc',
    wt: 'json',
  });
  const url = `${SEARCH_BASE}/${mode}?${params.toString()}`;
  await limiter.acquire();
  let res;
  try {
    res = await httpGet(url, {
      headers: { 'User-Agent': USER_AGENT, Accept: 'application/json' },
      maxBodyBytes: RESPONSE_MAX_BYTES,
    });
  } catch (err) {
    throw translateHttpError(err, url);
  }
  let parsed: { hits?: { total?: unknown; hits?: unknown[] } };
  try {
    parsed = JSON.parse(res.body);
  } catch (err) {
    throw new RetrievalError(
      'internal',
      `iapd JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
      { url },
    );
  }
  const rawHits = Array.isArray(parsed.hits?.hits) ? parsed.hits.hits : [];
  const total =
    typeof parsed.hits?.total === 'number'
      ? parsed.hits.total
      : typeof (parsed.hits?.total as { value?: number } | undefined)?.value === 'number'
        ? (parsed.hits!.total as { value: number }).value
        : rawHits.length;
  const hits: IapdHit[] = [];
  for (const rh of rawHits) {
    if (!rh || typeof rh !== 'object') continue;
    const h = rh as { _id?: unknown; _type?: unknown; _score?: unknown; _source?: unknown };
    hits.push({
      id: typeof h._id === 'string' ? h._id : '',
      type: typeof h._type === 'string' ? h._type : '',
      score: typeof h._score === 'number' ? h._score : 0,
      source: h._source && typeof h._source === 'object'
        ? (h._source as Record<string, unknown>)
        : {},
    });
  }
  return {
    query,
    mode,
    totalHits: total,
    hits,
    sourceUrl: url,
    capturedAt: new Date().toISOString(),
  };
}

function parseMode(raw: unknown): IapdMode {
  if (raw === 'firm' || raw === 'individual') return raw;
  return 'firm';
}

function clampHits(raw: unknown): number {
  if (typeof raw !== 'number' || !Number.isFinite(raw) || raw <= 0) return 25;
  return Math.min(Math.floor(raw), 100);
}

function translateHttpError(err: unknown, url: string): RetrievalError {
  if (err instanceof HttpError) {
    switch (err.category) {
      case 'timeout':
      case 'network':
      case 'aborted':
        return new RetrievalError('unavailable', err.message, { url });
      case 'body-too-large':
        return new RetrievalError('internal', err.message, { url });
      case 'status':
        if (err.status === 404) return new RetrievalError('no-content', err.message, { url });
        if (err.status === 401 || err.status === 403) return new RetrievalError('auth-failed', err.message, { url });
        if (err.status === 429) return new RetrievalError('rate-limited', err.message, { url });
        if (err.status === 400) return new RetrievalError('invalid-request', err.message, { url });
        return new RetrievalError('internal', err.message, { url });
    }
  }
  return new RetrievalError('internal', err instanceof Error ? err.message : String(err), { url });
}