BID · Console
Baseline · Intelligence · Decision
src/tools/retrieval/mock-connector.ts 4,329 bytes · typescript
/**
 * Mock retrieval connector — the only connector that ships with the
 * framework. It returns realistic-shaped UNSTRUCTURED payloads (HTML
 * tables, fixed-width text, narrative prose) so the parser layer has
 * something to do; the Source/Extraction agent does the real work.
 *
 * Swap or supplement this by adding files under
 * `src/tools/retrieval/connectors/` and registering them in the
 * dispatcher — see connectors/README.md for the contract.
 */

import { RetrievalError, type FetchParams, type RawPayload, type RetrievalConnector } from './interface.js';

interface MockDoc {
  readonly matcher: (params: FetchParams) => boolean;
  readonly build: () => Omit<RawPayload, 'capturedAt'>;
}

const MOCK_DOCS: readonly MockDoc[] = [
  {
    matcher: p =>
      ['ACME', 'ACME Corp', 'Acme'].some(
        a => p.entity.id === a || p.entity.aliases.includes(a),
      ),
    build: () => ({
      source: 'mock',
      sourceUrl: 'https://example.invalid/mock/acme-2024-annual.html',
      contentType: 'text/html',
      rawContent: `<html><body>
        <h1>ACME Corp — Annual Report, Fiscal Year 2024</h1>
        <p>Filed: 2025-02-18. Period of report: 2024-12-31.</p>
        <table>
          <tr><th>Item</th><th>2024</th><th>2023</th></tr>
          <tr><td>Net revenues</td><td>$1,180.4 million</td><td>$1,094.7 million</td></tr>
          <tr><td>Cost of revenues</td><td>$612.2 million</td><td>$590.1 million</td></tr>
          <tr><td>Operating income</td><td>$298.5 million</td><td>$240.9 million</td></tr>
        </table>
        <p>Note 3 — Revenue Recognition: amounts above are reported in U.S. dollars.
        2023 figures have been restated to reflect ASC 606 reclassifications.</p>
      </body></html>`,
      metadata: { mock: true, entityKey: 'ACME', period: 'FY-2024' },
    }),
  },
  {
    matcher: p =>
      ['GLOBEX', 'Globex SE', 'Globex'].some(
        a => p.entity.id === a || p.entity.aliases.includes(a),
      ),
    build: () => ({
      source: 'mock',
      sourceUrl: 'https://example.invalid/mock/globex-2024-annual.txt',
      contentType: 'text/plain',
      rawContent: [
        'GLOBEX SE — Annual Report — Fiscal Year ended 31 December 2024',
        '',
        'Consolidated Statements of Operations (in millions of EUR)',
        '                                                  2024        2023',
        '  Total revenue ..............................   845.0       812.3',
        '  Operating expenses .........................   612.5       598.0',
        '  Profit before tax ..........................   188.7       170.2',
        '',
        'Amounts above are presented in millions of euros (EUR).',
      ].join('\n'),
      metadata: { mock: true, entityKey: 'GLOBEX', period: 'FY-2024' },
    }),
  },
  {
    matcher: p =>
      ['INITECH', 'Initech, Inc.', 'Initech'].some(
        a => p.entity.id === a || p.entity.aliases.includes(a),
      ),
    build: () => ({
      source: 'mock',
      sourceUrl: 'https://example.invalid/mock/initech-2024-annual.html',
      contentType: 'text/html',
      rawContent: `<html><body>
        <h1>Initech, Inc. — Annual Report 2024</h1>
        <p>Filed: 2025-03-04.</p>
        <p>Sales for the year totaled approximately $402 million,
           compared with $385 million in the prior year.</p>
        <p>Sales for the year totaled $410 million per the management discussion
           narrative on page 14.</p>
      </body></html>`,
      metadata: { mock: true, entityKey: 'INITECH', period: 'FY-2024' },
    }),
  },
];

export class MockConnector implements RetrievalConnector {
  readonly name = 'mock';
  readonly authRequired = false;
  readonly rateLimit = { requestsPerSecond: 100, burstSize: 100 };

  async isAvailable(): Promise<boolean> {
    return true;
  }

  async fetch(params: FetchParams): Promise<RawPayload> {
    await new Promise(r => setTimeout(r, 5));
    const doc = MOCK_DOCS.find(d => d.matcher(params));
    if (!doc) {
      throw new RetrievalError(
        'no-content',
        `mock connector has no content for entity "${params.entity.id}"`,
        { entity: params.entity.id, aliases: params.entity.aliases, period: params.period },
      );
    }
    const built = doc.build();
    return { ...built, capturedAt: new Date().toISOString() };
  }
}