feat: add website enrichment crawler

This commit is contained in:
2026-06-04 20:29:23 +02:00
parent ca42c8d5a6
commit 1f6e31c01c
25 changed files with 3539 additions and 56 deletions

View File

@@ -0,0 +1,84 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import path from "node:path";
import test from "node:test";
const leadDiscoveryPath = path.join(process.cwd(), "convex", "leadDiscovery.ts");
const leadDiscoverySource = readFileSync(leadDiscoveryPath, "utf8");
function hasPattern(source: string, pattern: RegExp) {
return pattern.test(source);
}
function extractExportSource(name: string) {
const marker = `export const ${name} = `;
const declarationIndex = leadDiscoverySource.indexOf(marker);
assert.notEqual(declarationIndex, -1, `Expected declaration for ${name}`);
const openBraceIndex = leadDiscoverySource.indexOf("{", declarationIndex);
let depth = 0;
let end = -1;
for (let index = openBraceIndex; index < leadDiscoverySource.length; index++) {
const char = leadDiscoverySource[index];
if (char === "{") {
depth += 1;
} else if (char === "}") {
depth -= 1;
if (depth === 0) {
end = index;
break;
}
}
}
assert.notEqual(end, -1, `Expected balanced braces for ${name}`);
return leadDiscoverySource.slice(openBraceIndex, end + 1);
}
test("startCampaignRun checks active campaign runs via by_type_and_status", () => {
const source = extractExportSource("startCampaignRun");
assert.equal(
hasPattern(
source,
/withIndex\(\s*"by_type_and_status"\s*,\s*\(q\)\s*=>[\s\S]*?q\.eq\("type",\s*"campaign"\)\.eq\("status",\s*"running"\),?[\s\S]*?\)/,
),
true,
"Campaign starts should only consider running campaign-type runs as blockers",
);
});
test("persistDiscoveredLeads does not schedule website enrichment jobs directly", () => {
const source = extractExportSource("persistDiscoveredLeads");
assert.equal(
source.includes("ctx.scheduler.runAfter"),
false,
"Lead persistence must not call runAfter",
);
});
test("processCampaignRun schedules website enrichment after lead persistence", () => {
const source = extractExportSource("processCampaignRun");
const persistIndex = source.indexOf(
"internal.leadDiscovery.persistDiscoveredLeads",
);
const queueCall = source.indexOf("internal.websiteEnrichment.queueLeadEnrichment");
const eventMessageIndex = source.indexOf("Website-Kontaktanreicherung geplant.");
assert.notEqual(persistIndex, -1, "processCampaignRun should persist discovered leads");
assert.notEqual(queueCall, -1, "processCampaignRun should schedule website enrichment");
assert.notEqual(eventMessageIndex, -1, "processCampaignRun should append enrichment schedule events");
assert.ok(
persistIndex < queueCall,
"processCampaignRun should schedule enrichment after persistence succeeds",
);
assert.ok(
queueCall < eventMessageIndex,
"processCampaignRun should append enrichment event after scheduling",
);
});