/**
* Mock retrieval connector — the only connector that ships with the
* framework. It returns realistic-shaped UNSTRUCTURED payloads (HTML
* tables, fixed-width text, narrative prose) so the parser layer has
* something to do; the Source/Extraction agent does the real work.
*
* Swap or supplement this by adding files under
* `src/tools/retrieval/connectors/` and registering them in the
* dispatcher — see connectors/README.md for the contract.
*/
import { RetrievalError, type FetchParams, type RawPayload, type RetrievalConnector } from './interface.js';
interface MockDoc {
readonly matcher: (params: FetchParams) => boolean;
readonly build: () => Omit<RawPayload, 'capturedAt'>;
}
const MOCK_DOCS: readonly MockDoc[] = [
{
matcher: p =>
['ACME', 'ACME Corp', 'Acme'].some(
a => p.entity.id === a || p.entity.aliases.includes(a),
),
build: () => ({
source: 'mock',
sourceUrl: 'https://example.invalid/mock/acme-2024-annual.html',
contentType: 'text/html',
rawContent: `<html><body>
<h1>ACME Corp — Annual Report, Fiscal Year 2024</h1>
<p>Filed: 2025-02-18. Period of report: 2024-12-31.</p>
<table>
<tr><th>Item</th><th>2024</th><th>2023</th></tr>
<tr><td>Net revenues</td><td>$1,180.4 million</td><td>$1,094.7 million</td></tr>
<tr><td>Cost of revenues</td><td>$612.2 million</td><td>$590.1 million</td></tr>
<tr><td>Operating income</td><td>$298.5 million</td><td>$240.9 million</td></tr>
</table>
<p>Note 3 — Revenue Recognition: amounts above are reported in U.S. dollars.
2023 figures have been restated to reflect ASC 606 reclassifications.</p>
</body></html>`,
metadata: { mock: true, entityKey: 'ACME', period: 'FY-2024' },
}),
},
{
matcher: p =>
['GLOBEX', 'Globex SE', 'Globex'].some(
a => p.entity.id === a || p.entity.aliases.includes(a),
),
build: () => ({
source: 'mock',
sourceUrl: 'https://example.invalid/mock/globex-2024-annual.txt',
contentType: 'text/plain',
rawContent: [
'GLOBEX SE — Annual Report — Fiscal Year ended 31 December 2024',
'',
'Consolidated Statements of Operations (in millions of EUR)',
' 2024 2023',
' Total revenue .............................. 845.0 812.3',
' Operating expenses ......................... 612.5 598.0',
' Profit before tax .......................... 188.7 170.2',
'',
'Amounts above are presented in millions of euros (EUR).',
].join('\n'),
metadata: { mock: true, entityKey: 'GLOBEX', period: 'FY-2024' },
}),
},
{
matcher: p =>
['INITECH', 'Initech, Inc.', 'Initech'].some(
a => p.entity.id === a || p.entity.aliases.includes(a),
),
build: () => ({
source: 'mock',
sourceUrl: 'https://example.invalid/mock/initech-2024-annual.html',
contentType: 'text/html',
rawContent: `<html><body>
<h1>Initech, Inc. — Annual Report 2024</h1>
<p>Filed: 2025-03-04.</p>
<p>Sales for the year totaled approximately $402 million,
compared with $385 million in the prior year.</p>
<p>Sales for the year totaled $410 million per the management discussion
narrative on page 14.</p>
</body></html>`,
metadata: { mock: true, entityKey: 'INITECH', period: 'FY-2024' },
}),
},
];
export class MockConnector implements RetrievalConnector {
readonly name = 'mock';
readonly authRequired = false;
readonly rateLimit = { requestsPerSecond: 100, burstSize: 100 };
async isAvailable(): Promise<boolean> {
return true;
}
async fetch(params: FetchParams): Promise<RawPayload> {
await new Promise(r => setTimeout(r, 5));
const doc = MOCK_DOCS.find(d => d.matcher(params));
if (!doc) {
throw new RetrievalError(
'no-content',
`mock connector has no content for entity "${params.entity.id}"`,
{ entity: params.entity.id, aliases: params.entity.aliases, period: params.period },
);
}
const built = doc.build();
return { ...built, capturedAt: new Date().toISOString() };
}
}