import assert from "node:assert/strict"; import { existsSync, readFileSync } from "node:fs"; import path from "node:path"; import test from "node:test"; import ts from "typescript"; const convexConfigPath = path.join(process.cwd(), "convex.json"); const convexConfigSource = readFileSync(convexConfigPath, "utf8"); const websiteEnrichmentPath = path.join( process.cwd(), "convex/websiteEnrichment.ts", ); const actionPath = path.join(process.cwd(), "convex/websiteEnrichmentAction.ts"); const websiteEnrichmentSource = readFileSync(websiteEnrichmentPath, "utf8"); const actionSource = readFileSync(actionPath, "utf8"); const websiteEnrichmentSourceFile = ts.createSourceFile( "websiteEnrichment.ts", websiteEnrichmentSource, ts.ScriptTarget.ES2022, true, ts.ScriptKind.TS, ); const actionSourceFile = ts.createSourceFile( "websiteEnrichmentAction.ts", actionSource, ts.ScriptTarget.ES2022, true, ts.ScriptKind.TS, ); function getExportedConstNames(file: ts.SourceFile) { const names = new Set(); const visit = (node: ts.Node) => { if (ts.isVariableStatement(node)) { const isExported = node.modifiers?.some( (mod) => mod.kind === ts.SyntaxKind.ExportKeyword, ); if (!isExported) { ts.forEachChild(node, visit); return; } const isConst = node.declarationList.flags & ts.NodeFlags.Const; if (!isConst) { ts.forEachChild(node, visit); return; } for (const declaration of node.declarationList.declarations) { if (ts.isIdentifier(declaration.name)) { names.add(declaration.name.text); } } } ts.forEachChild(node, visit); }; ts.forEachChild(file, visit); return names; } function hasPattern(source: string, pattern: RegExp) { return pattern.test(source); } test("website enrichment mutation module exists and has runtime assertions", () => { assert.equal( existsSync(websiteEnrichmentPath), true, "websiteEnrichment.ts should be present", ); assert.equal( hasPattern(websiteEnrichmentSource, /^"use node";/m), false, "websiteEnrichment.ts should not declare a Node runtime", ); }); test("website enrichment action module exists and uses Node runtime", () => { assert.equal( existsSync(actionPath), true, "websiteEnrichmentAction.ts should be present", ); assert.equal( hasPattern(actionSource, /^"use node";/m), true, "websiteEnrichmentAction.ts should declare Node runtime", ); }); test("module exports are split across mutations and action", () => { const mutationExports = getExportedConstNames(websiteEnrichmentSourceFile); const actionExports = getExportedConstNames(actionSourceFile); const requiredMutationExports = [ "queueLeadEnrichment", "startLeadEnrichmentRun", "persistLeadEnrichmentResult", "finishLeadEnrichmentRun", "patchLeadFromWebsiteEnrichment", ]; const requiredActionExports = ["processLeadEnrichment"]; for (const exportName of requiredMutationExports) { assert.equal( mutationExports.has(exportName), true, `Expected mutation export in websiteEnrichment.ts: ${exportName}`, ); } for (const exportName of requiredActionExports) { assert.equal( actionExports.has(exportName), true, `Expected action export in websiteEnrichmentAction.ts: ${exportName}`, ); } }); test("queueLeadEnrichment schedules internal.websiteEnrichmentAction.processLeadEnrichment", () => { assert.equal( hasPattern( websiteEnrichmentSource, /queueLeadEnrichment\s*=\s*internalMutation\([\s\S]*?ctx\.scheduler\.runAfter\(\s*0,\s*internal\.websiteEnrichmentAction\.processLeadEnrichment/, ), true, "Queue mutation should schedule action with runAfter(0, internal.websiteEnrichmentAction.processLeadEnrichment)", ); }); test("queueLeadEnrichment uses lead-aware run index and does not use fixed-size .take(50) windows", () => { const queueBodyMatch = websiteEnrichmentSource.match( /export const queueLeadEnrichment[\s\S]*?(?=\nexport const startLeadEnrichmentRun)/, ); assert.equal( queueBodyMatch !== null, true, "queueLeadEnrichment block should be parseable for source assertions", ); const queueBody = queueBodyMatch?.[0] ?? ""; assert.equal( hasPattern( queueBody, /withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"pending"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/, ), true, "Queue dedupe for pending runs should use direct type+status+leadId index.", ); assert.equal( hasPattern( queueBody, /withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"running"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/, ), true, "Queue dedupe for running runs should use direct type+status+leadId index.", ); assert.equal(hasPattern(queueBody, /take\(50\)/), false, "No fixed-size .take(50) window in dedupe queries."); }); test("website enrichment action uses Chromium desktop/mobile devices and runtime Playwright import", () => { assert.equal( hasPattern( actionSource, /import\s+type\s+\{[^\n]*BrowserContext[^\n]*\}\s+from\s+["']playwright-core["']/, ), true, "Action should import BrowserContext type for typed helper signatures", ); assert.equal( hasPattern(actionSource, /loadPlaywrightModules\(\)/), true, "Action should load Playwright at runtime from inside action", ); assert.equal( hasPattern(actionSource, /import\("playwright-core"\)/), true, "Action should use a dynamic import for playwright-core that Convex can detect as an external package", ); assert.equal( hasPattern(actionSource, /import\("@sparticuz\/chromium-min"\)/), true, "Action should use a dynamic import for @sparticuz/chromium-min as the lightweight browser package", ); assert.equal( hasPattern(actionSource, /TASK8_BROWSER_ASSET_URL/), true, "Action should reference TASK8_BROWSER_ASSET_URL when loading browser assets", ); assert.equal( hasPattern( actionSource, /TASK8_BROWSER_ASSET_URL[\s\S]{0,240}(throw|Error|required|missing|not configured|configured|konfiguriert|setze)/i, ), true, "Action should surface a clear error when the browser asset URL is not configured", ); assert.equal( hasPattern(actionSource, /import\("@sparticuz\/chromium"\)/), false, "Action should not import the oversized @sparticuz/chromium package", ); const externalPackages = JSON.parse(convexConfigSource).node?.externalPackages; assert.equal(Array.isArray(externalPackages), true, "convex.json should define node.externalPackages"); assert.equal( externalPackages?.includes("playwright-core"), true, "convex.json must include playwright-core in externalPackages", ); assert.equal( externalPackages?.includes("@sparticuz/chromium-min"), true, "convex.json should include @sparticuz/chromium-min for browser runtime", ); assert.equal( externalPackages?.includes("@sparticuz/chromium"), false, "convex.json should not include the oversized @sparticuz/chromium package", ); assert.equal( hasPattern(actionSource, /serverlessChromium/), true, "Runtime bootstrap should still use a serverless Chromium wrapper object for launch config", ); assert.equal( hasPattern(actionSource, /devices\["Desktop Chrome"\]/), true, "Desktop context should use Playwright Desktop Chrome device profile", ); assert.equal( hasPattern(actionSource, /devices\["iPhone 11"\]/), true, "Mobile context should use Playwright iPhone 11 device profile", ); }); test("website enrichment action invalidates stale @sparticuz/chromium-min cache when source changes", () => { assert.equal( hasPattern(actionSource, /CHROMIUM_SOURCE_MARKER_FILE/), true, "Action should declare a temporary marker file path for Chromium executable source cache tracking.", ); assert.equal( hasPattern( actionSource, /tmpdir\(\)/, ), true, "Action should derive temporary cache paths from os.tmpdir().", ); assert.equal( hasPattern(actionSource, /getChromiumSourceMarker\(/), true, "Action should hash executable sources into a stable marker.", ); assert.equal( hasPattern(actionSource, /clearChromiumCacheForSourceMismatch\(/), true, "Action should centralize cache invalidation in a dedicated helper.", ); assert.equal( hasPattern( actionSource, /rm\(CHROMIUM_EXECUTABLE_PATH,\s*\{ force: true, recursive: true \}\),/, ), true, "Action should remove /tmp/chromium when executable source changes.", ); assert.equal( hasPattern( actionSource, /rm\(CHROMIUM_PACK_PATH,\s*\{ force: true, recursive: true \}\),/, ), true, "Action should remove /tmp/chromium-pack when executable source changes.", ); assert.equal( hasPattern( actionSource, /clearChromiumCacheForSourceMismatch\(executableSource\)[\s\S]*?chromium\.executablePath\(executableSource\)/, ), true, "Action should clear stale cache before resolving Chromium executable path.", ); assert.equal( hasPattern( actionSource, /writeFile\([\s\S]*?CHROMIUM_SOURCE_MARKER_FILE,[\s\S]*?getChromiumSourceMarker\(executableSource\)/, ), true, "Action should persist the source marker after executable path resolution.", ); }); test("website enrichment action prepares Chromium AL2023 shared libraries for Convex runtime", () => { const hasChromiumHelpers = (hasPattern(actionSource, /inflate/) && hasPattern(actionSource, /setupLambdaEnvironment/)) || hasPattern(actionSource, /LD_LIBRARY_PATH/); assert.equal( hasChromiumHelpers, true, "Action should explicitly prepare chromium-min runtime environment for AL2023 shared libraries to avoid `/tmp/chromium: error while loading shared libraries: libnspr4.so` (inflate/setupLambdaEnvironment or LD_LIBRARY_PATH).", ); const hasAl2023LibPath = hasPattern( actionSource, /path\.join\(\s*tmpdir\(\),\s*["']al2023["'],\s*["']lib["']\s*\)/, ) || (hasPattern(actionSource, /LD_LIBRARY_PATH/) && hasPattern(actionSource, /al2023\/lib/)); const referencesRuntimeArchive = hasPattern(actionSource, /al2023\.tar\.br/); const referencesPackPath = hasPattern( actionSource, /CHROMIUM_PACK_PATH/, ); assert.equal( referencesRuntimeArchive && referencesPackPath && hasAl2023LibPath, true, "Action should reference al2023.tar.br, track CHROMIUM_PACK_PATH, and ensure /tmp/al2023/lib is prepared for Convex launch.", ); const executableIndex = actionSource.indexOf( "const executablePath = await resolveChromiumExecutablePath(", ); const launchIndex = actionSource.indexOf("chromium.launch({"); const hasSetupIndex = Math.max( actionSource.indexOf("setupLambdaEnvironment("), actionSource.indexOf("LD_LIBRARY_PATH"), actionSource.indexOf("path.join(tmpdir(), \"al2023\", \"lib\")"), ); assert.equal( executableIndex >= 0 && hasSetupIndex > executableIndex && hasSetupIndex < launchIndex, true, "Executable resolution and AL2023 shared-library setup should happen before chromium launch in the action runtime path.", ); }); test("processLeadEnrichment wraps Playwright bootstrap in protected try/catch", () => { assert.equal( hasPattern( actionSource, /try\s*\{[\s\S]*?const \{ playwrightCore, serverlessChromium \}\s*=\s*await loadPlaywrightModules\(\);[\s\S]*?const executablePath = await resolveChromiumExecutablePath\(\s*serverlessChromium,\s*\);[\s\S]*?browser = await playwrightCore\.chromium\.launch\([\s\S]*?executablePath,[\s\S]*?desktopContext = await browser\.newContext\([\s\S]*?mobileContext = await browser\.newContext\(/, ), true, "Playwright runtime bootstrap should use resolveChromiumExecutablePath() inside the action's try/catch-protected block", ); assert.equal( hasPattern( actionSource, /catch\s*\(error\)\s*\{[\s\S]*?finishLeadEnrichmentRun[\s\S]*?runs\.appendEvent[\s\S]*?patchLeadFromWebsiteEnrichment/, ), true, "Bootstrap failures should be handled by finish + error event + lead patch in catch", ); }); test("persistence caps candidates and links before writing", () => { assert.equal( hasPattern(actionSource, /MAX_PERSISTED_LINKS\s*=\s*120/), true, "Action should define MAX_PERSISTED_LINKS with value 120.", ); assert.equal( hasPattern(actionSource, /MAX_PERSISTED_EMAIL_CANDIDATES\s*=\s*40/), true, "Action should define MAX_PERSISTED_EMAIL_CANDIDATES with value 40.", ); assert.equal( hasPattern( actionSource, /deduplicateCrawlLinks\(allLinks\)[\s\S]*?slice\([\s\S]*?MAX_PERSISTED_LINKS/, ), true, "Action should dedupe and cap link persistence at MAX_PERSISTED_LINKS.", ); assert.equal( hasPattern( actionSource, /validCandidates\.slice\([\s\S]*?MAX_PERSISTED_EMAIL_CANDIDATES/, ), true, "Action should cap candidate persistence at MAX_PERSISTED_EMAIL_CANDIDATES.", ); }); test("website enrichment process stores homepage screenshots in Convex storage as PNG", () => { assert.equal( hasPattern(actionSource, /ctx\.storage\.store\(/), true, "Action should store screenshot blobs via ctx.storage.store", ); assert.equal( hasPattern( actionSource, /new\s+Blob\(\[[\s\S]*?SCREENSHOT_MIME_TYPE/, ), true, "Action should wrap screenshots in Blob with image/png MIME type", ); }); test("startLeadEnrichmentRun marks missing website lead with contact status reason", () => { assert.equal( hasPattern( websiteEnrichmentSource, /if \(!lead\.websiteUrl\)\s*\{[\s\S]*?status:\s*"failed"[\s\S]*?contactStatusReason:\s*"Website-URL fehlt für das Website-Enrichment\."/, ), true, "Missing websiteUrl should set a specific contactStatusReason on the lead", ); }); test("website enrichment persistence inserts all required evidence table rows", () => { const expectedTables = [ "websiteCrawlPages", "websiteCrawlLinks", "websiteEmailCandidates", "websiteCrawlScreenshots", "websiteTechnicalChecks", ] as const; for (const tableName of expectedTables) { assert.equal( hasPattern( websiteEnrichmentSource, new RegExp(`ctx\\.db\\.insert\\(["']${tableName}["']`, "s"), ), true, `persistLeadEnrichmentResult should insert into ${tableName}`, ); } }); test("website enrichment flow uses TASK-7 email selection helper for lead patching", () => { assert.equal( hasPattern( actionSource, /getUsableContactEmailFromEntries\([\s\S]*?\)/, ), true, "Action should call getUsableContactEmailFromEntries", ); assert.equal( hasPattern( actionSource, /runMutation\(\s*internal\.websiteEnrichment\.patchLeadFromWebsiteEnrichment[\s\S]*?\{[\s\S]*?email:\s*usable\.email/, ), true, "Action should patch lead from usable email result", ); assert.equal( hasPattern( actionSource, /currentContactStatus\s*:\s*started\.lead\.contactStatus/, ), true, "Action should pass lead contact status to patchLeadFromWebsiteEnrichment", ); assert.equal( hasPattern(websiteEnrichmentSource, /args\.currentContactStatus\s*===\s*\"missing_contact\"/), true, "Lead patch mutation should only set new status for missing_contact", ); }); test("failure handling marks run as failed and writes lead-facing reason", () => { assert.equal( hasPattern( actionSource, /runMutation\(\s*internal\.websiteEnrichment\.finishLeadEnrichmentRun[\s\S]*?status:\s*"failed"/, ), true, "Action should persist failed run state on fatal crawl errors", ); assert.equal( hasPattern( actionSource, /runMutation\(\s*api\.runs\.appendEvent[\s\S]*?level:\s*"error"[\s\S]*?message:\s*"Website-Enrichment fehlgeschlagen/, ), true, "Action should append a visible error event on failure", ); assert.equal( hasPattern( actionSource, /contactStatusReason:\s*`Website-Enrichment fehlgeschlagen:\s*\$\{errorSummary\}`/, ), true, "Action should patch the lead with an actionable failure reason", ); assert.equal( hasPattern( actionSource, /contactStatusReason:\s*"Website-Enrichment fehlgeschlagen: Ungültige Website-URL\."/, ), true, "Invalid-url failure should also update lead contact status reason", ); }); test("website enrichment enforces TASK-8 crawler limits and runtime timeboxes", () => { assert.equal( hasPattern(actionSource, /TASK8_CRAWL_TIMEOUT_MS/g), true, "TASK8_CRAWL_TIMEOUT_MS environment override should be used", ); assert.equal( hasPattern(actionSource, /DEFAULT_CRAWL_TIMEOUT_MS\s*=\s*60_000/), true, "Default crawl timeout should be 60s", ); assert.equal( hasPattern(actionSource, /DEFAULT_CRAWL_MAX_PAGES\s*=\s*5/), true, "Default max crawl page count should be 5", ); });