import assert from "node:assert/strict"; import { existsSync, readFileSync } from "node:fs"; import path from "node:path"; import test from "node:test"; import ts from "typescript"; const convexConfigPath = path.join(process.cwd(), "convex.json"); const convexConfigSource = readFileSync(convexConfigPath, "utf8"); const websiteEnrichmentPath = path.join( process.cwd(), "convex/websiteEnrichment.ts", ); const actionPath = path.join(process.cwd(), "convex/websiteEnrichmentAction.ts"); const websiteEnrichmentSource = readFileSync(websiteEnrichmentPath, "utf8"); const actionSource = readFileSync(actionPath, "utf8"); const websiteEnrichmentSourceFile = ts.createSourceFile( "websiteEnrichment.ts", websiteEnrichmentSource, ts.ScriptTarget.ES2022, true, ts.ScriptKind.TS, ); const actionSourceFile = ts.createSourceFile( "websiteEnrichmentAction.ts", actionSource, ts.ScriptTarget.ES2022, true, ts.ScriptKind.TS, ); function getExportedConstNames(file: ts.SourceFile) { const names = new Set(); const visit = (node: ts.Node) => { if (ts.isVariableStatement(node)) { const isExported = node.modifiers?.some( (mod) => mod.kind === ts.SyntaxKind.ExportKeyword, ); if (!isExported) { ts.forEachChild(node, visit); return; } const isConst = node.declarationList.flags & ts.NodeFlags.Const; if (!isConst) { ts.forEachChild(node, visit); return; } for (const declaration of node.declarationList.declarations) { if (ts.isIdentifier(declaration.name)) { names.add(declaration.name.text); } } } ts.forEachChild(node, visit); }; ts.forEachChild(file, visit); return names; } function hasPattern(source: string, pattern: RegExp) { return pattern.test(source); } function extractExportSource(source: string, name: string) { const marker = `export const ${name} = `; const declarationIndex = source.indexOf(marker); assert.notEqual(declarationIndex, -1, `Expected declaration for ${name}`); const openBraceIndex = source.indexOf("{", declarationIndex); let depth = 0; let end = -1; for (let index = openBraceIndex; index < source.length; index += 1) { const char = source[index]; if (char === "{") { depth += 1; } else if (char === "}") { depth -= 1; if (depth === 0) { end = index; break; } } } assert.notEqual(end, -1, `Expected balanced braces for ${name}`); return source.slice(openBraceIndex, end + 1); } test("website enrichment mutation module exists and has runtime assertions", () => { assert.equal( existsSync(websiteEnrichmentPath), true, "websiteEnrichment.ts should be present", ); assert.equal( hasPattern(websiteEnrichmentSource, /^"use node";/m), false, "websiteEnrichment.ts should not declare a Node runtime", ); }); test("website enrichment action module exists and uses Node runtime", () => { assert.equal( existsSync(actionPath), true, "websiteEnrichmentAction.ts should be present", ); assert.equal( hasPattern(actionSource, /^"use node";/m), true, "websiteEnrichmentAction.ts should declare Node runtime", ); }); test("module exports are split across mutations and action", () => { const mutationExports = getExportedConstNames(websiteEnrichmentSourceFile); const actionExports = getExportedConstNames(actionSourceFile); const requiredMutationExports = [ "queueLeadEnrichment", "startLeadEnrichmentRun", "persistLeadEnrichmentResult", "finishLeadEnrichmentRun", "patchLeadFromWebsiteEnrichment", ]; const requiredActionExports = ["processLeadEnrichment"]; for (const exportName of requiredMutationExports) { assert.equal( mutationExports.has(exportName), true, `Expected mutation export in websiteEnrichment.ts: ${exportName}`, ); } for (const exportName of requiredActionExports) { assert.equal( actionExports.has(exportName), true, `Expected action export in websiteEnrichmentAction.ts: ${exportName}`, ); } }); test("queueLeadEnrichment schedules internal.websiteEnrichmentAction.processLeadEnrichment", () => { assert.equal( hasPattern( websiteEnrichmentSource, /queueLeadEnrichment\s*=\s*internalMutation\([\s\S]*?ctx\.scheduler\.runAfter\(\s*0,\s*internal\.websiteEnrichmentAction\.processLeadEnrichment/, ), true, "Queue mutation should schedule action with runAfter(0, internal.websiteEnrichmentAction.processLeadEnrichment)", ); }); test("queueLeadEnrichment uses lead-aware run index and does not use fixed-size .take(50) windows", () => { const queueBodyMatch = websiteEnrichmentSource.match( /export const queueLeadEnrichment[\s\S]*?(?=\nexport const startLeadEnrichmentRun)/, ); assert.equal( queueBodyMatch !== null, true, "queueLeadEnrichment block should be parseable for source assertions", ); const queueBody = queueBodyMatch?.[0] ?? ""; assert.equal( hasPattern( queueBody, /withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"pending"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/, ), true, "Queue dedupe for pending runs should use direct type+status+leadId index.", ); assert.equal( hasPattern( queueBody, /withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"running"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/, ), true, "Queue dedupe for running runs should use direct type+status+leadId index.", ); assert.equal(hasPattern(queueBody, /take\(50\)/), false, "No fixed-size .take(50) window in dedupe queries."); }); test("website enrichment action uses Chromium desktop/mobile devices and runtime Playwright import", () => { assert.equal( hasPattern( actionSource, /import\s+type\s+\{[^\n]*BrowserContext[^\n]*\}\s+from\s+["']playwright-core["']/, ), true, "Action should import BrowserContext type for typed helper signatures", ); assert.equal( hasPattern(actionSource, /loadPlaywrightModules\(\)/), true, "Action should load Playwright at runtime from inside action", ); assert.equal( hasPattern(actionSource, /import\("playwright-core"\)/), true, "Action should use a dynamic import for playwright-core that Convex can detect as an external package", ); assert.equal( hasPattern(actionSource, /import\("@sparticuz\/chromium-min"\)/), true, "Action should use a dynamic import for @sparticuz/chromium-min as the lightweight browser package", ); assert.equal( hasPattern(actionSource, /TASK8_BROWSER_ASSET_URL/), true, "Action should reference TASK8_BROWSER_ASSET_URL when loading browser assets", ); assert.equal( hasPattern( actionSource, /TASK8_BROWSER_ASSET_URL[\s\S]{0,240}(throw|Error|required|missing|not configured|configured|konfiguriert|setze)/i, ), true, "Action should surface a clear error when the browser asset URL is not configured", ); assert.equal( hasPattern(actionSource, /import\("@sparticuz\/chromium"\)/), false, "Action should not import the oversized @sparticuz/chromium package", ); const externalPackages = JSON.parse(convexConfigSource).node?.externalPackages; assert.equal(Array.isArray(externalPackages), true, "convex.json should define node.externalPackages"); assert.equal( externalPackages?.includes("playwright-core"), true, "convex.json must include playwright-core in externalPackages", ); assert.equal( externalPackages?.includes("@sparticuz/chromium-min"), true, "convex.json should include @sparticuz/chromium-min for browser runtime", ); assert.equal( externalPackages?.includes("@sparticuz/chromium"), false, "convex.json should not include the oversized @sparticuz/chromium package", ); assert.equal( hasPattern(actionSource, /serverlessChromium/), true, "Runtime bootstrap should still use a serverless Chromium wrapper object for launch config", ); assert.equal( hasPattern(actionSource, /devices\["Desktop Chrome"\]/), true, "Desktop context should use Playwright Desktop Chrome device profile", ); assert.equal( hasPattern(actionSource, /devices\["iPhone 11"\]/), true, "Mobile context should use Playwright iPhone 11 device profile", ); }); test("website enrichment action invalidates stale @sparticuz/chromium-min cache when source changes", () => { assert.equal( hasPattern(actionSource, /CHROMIUM_SOURCE_MARKER_FILE/), true, "Action should declare a temporary marker file path for Chromium executable source cache tracking.", ); assert.equal( hasPattern( actionSource, /tmpdir\(\)/, ), true, "Action should derive temporary cache paths from os.tmpdir().", ); assert.equal( hasPattern(actionSource, /getChromiumSourceMarker\(/), true, "Action should hash executable sources into a stable marker.", ); assert.equal( hasPattern(actionSource, /clearChromiumCacheForSourceMismatch\(/), true, "Action should centralize cache invalidation in a dedicated helper.", ); assert.equal( hasPattern( actionSource, /rm\(CHROMIUM_EXECUTABLE_PATH,\s*\{ force: true, recursive: true \}\),/, ), true, "Action should remove /tmp/chromium when executable source changes.", ); assert.equal( hasPattern( actionSource, /rm\(CHROMIUM_PACK_PATH,\s*\{ force: true, recursive: true \}\),/, ), true, "Action should remove /tmp/chromium-pack when executable source changes.", ); assert.equal( hasPattern( actionSource, /clearChromiumCacheForSourceMismatch\(executableSource\)[\s\S]*?chromium\.executablePath\(executableSource\)/, ), true, "Action should clear stale cache before resolving Chromium executable path.", ); assert.equal( hasPattern( actionSource, /writeFile\([\s\S]*?CHROMIUM_SOURCE_MARKER_FILE,[\s\S]*?getChromiumSourceMarker\(executableSource\)/, ), true, "Action should persist the source marker after executable path resolution.", ); }); test("website enrichment action prepares Chromium AL2023 shared libraries for Convex runtime", () => { const hasChromiumHelpers = (hasPattern(actionSource, /inflate/) && hasPattern(actionSource, /setupLambdaEnvironment/)) || hasPattern(actionSource, /LD_LIBRARY_PATH/); assert.equal( hasChromiumHelpers, true, "Action should explicitly prepare chromium-min runtime environment for AL2023 shared libraries to avoid `/tmp/chromium: error while loading shared libraries: libnspr4.so` (inflate/setupLambdaEnvironment or LD_LIBRARY_PATH).", ); const hasAl2023LibPath = hasPattern( actionSource, /path\.join\(\s*tmpdir\(\),\s*["']al2023["'],\s*["']lib["']\s*\)/, ) || (hasPattern(actionSource, /LD_LIBRARY_PATH/) && hasPattern(actionSource, /al2023\/lib/)); const referencesRuntimeArchive = hasPattern(actionSource, /al2023\.tar\.br/); const referencesPackPath = hasPattern( actionSource, /CHROMIUM_PACK_PATH/, ); assert.equal( referencesRuntimeArchive && referencesPackPath && hasAl2023LibPath, true, "Action should reference al2023.tar.br, track CHROMIUM_PACK_PATH, and ensure /tmp/al2023/lib is prepared for Convex launch.", ); const executableIndex = actionSource.indexOf( "resolveChromiumExecutablePath(", actionSource.indexOf("export const processLeadEnrichment"), ); const launchIndex = actionSource.indexOf("chromium.launch({"); const hasSetupIndex = Math.max( actionSource.indexOf("setupLambdaEnvironment("), actionSource.indexOf("LD_LIBRARY_PATH"), actionSource.indexOf("path.join(tmpdir(), \"al2023\", \"lib\")"), ); assert.equal( executableIndex >= 0 && hasSetupIndex > executableIndex && hasSetupIndex < launchIndex, true, "Executable resolution and AL2023 shared-library setup should happen before chromium launch in the action runtime path.", ); }); test("processLeadEnrichment wraps Playwright bootstrap in protected try/catch", () => { assert.equal( hasPattern( actionSource, /try\s*\{[\s\S]*?const \{ playwrightCore, serverlessChromium \}\s*=[\s\S]*?loadPlaywrightModules\(\)[\s\S]*?const executablePath = await withActionTimeout\([\s\S]*?resolveChromiumExecutablePath\(\s*serverlessChromium\s*\)[\s\S]*?browser = await withActionTimeout\([\s\S]*?playwrightCore\.chromium\.launch\([\s\S]*?executablePath,[\s\S]*?desktopContext = await withActionTimeout\([\s\S]*?browser\.newContext\([\s\S]*?mobileContext = await withActionTimeout\([\s\S]*?browser\.newContext\(/, ), true, "Playwright runtime bootstrap should use resolveChromiumExecutablePath() inside the action's try/catch-protected block", ); assert.equal( hasPattern( actionSource, /catch\s*\(error\)\s*\{[\s\S]*?finishLeadEnrichmentRun[\s\S]*?runs\.appendEvent[\s\S]*?patchLeadFromWebsiteEnrichment/, ), true, "Bootstrap failures should be handled by finish + error event + lead patch in catch", ); }); test("persistence caps candidates and links before writing", () => { assert.equal( hasPattern(actionSource, /MAX_PERSISTED_LINKS\s*=\s*120/), true, "Action should define MAX_PERSISTED_LINKS with value 120.", ); assert.equal( hasPattern(actionSource, /MAX_PERSISTED_EMAIL_CANDIDATES\s*=\s*40/), true, "Action should define MAX_PERSISTED_EMAIL_CANDIDATES with value 40.", ); assert.equal( hasPattern( actionSource, /deduplicateCrawlLinks\(allLinks\)[\s\S]*?slice\([\s\S]*?MAX_PERSISTED_LINKS/, ), true, "Action should dedupe and cap link persistence at MAX_PERSISTED_LINKS.", ); assert.equal( hasPattern( actionSource, /validCandidates\.slice\([\s\S]*?MAX_PERSISTED_EMAIL_CANDIDATES/, ), true, "Action should cap candidate persistence at MAX_PERSISTED_EMAIL_CANDIDATES.", ); }); test("website enrichment process stores homepage screenshots in Convex storage as PNG", () => { assert.equal( hasPattern(actionSource, /ctx\.storage\.store\(/), true, "Action should store screenshot blobs via ctx.storage.store", ); assert.equal( hasPattern( actionSource, /new\s+Blob\(\[[\s\S]*?SCREENSHOT_MIME_TYPE/, ), true, "Action should wrap screenshots in Blob with image/png MIME type", ); }); test("startLeadEnrichmentRun marks missing website lead with contact status reason", () => { assert.equal( hasPattern( websiteEnrichmentSource, /if \(!lead\.websiteUrl\)\s*\{[\s\S]*?status:\s*"failed"[\s\S]*?contactStatusReason:\s*"Website-URL fehlt für das Website-Enrichment\."/, ), true, "Missing websiteUrl should set a specific contactStatusReason on the lead", ); }); test("website enrichment persistence inserts all required evidence table rows", () => { const expectedTables = [ "websiteCrawlPages", "websiteCrawlLinks", "websiteEmailCandidates", "websiteCrawlScreenshots", "websiteTechnicalChecks", ] as const; for (const tableName of expectedTables) { assert.equal( hasPattern( websiteEnrichmentSource, new RegExp(`ctx\\.db\\.insert\\(["']${tableName}["']`, "s"), ), true, `persistLeadEnrichmentResult should insert into ${tableName}`, ); } }); test("website enrichment flow uses TASK-7 email selection helper for lead patching", () => { assert.equal( hasPattern( actionSource, /getUsableContactEmailFromEntries\([\s\S]*?\)/, ), true, "Action should call getUsableContactEmailFromEntries", ); assert.equal( hasPattern( actionSource, /runMutation\(\s*internal\.websiteEnrichment\.patchLeadFromWebsiteEnrichment[\s\S]*?\{[\s\S]*?email:\s*usable\.email/, ), true, "Action should patch lead from usable email result", ); assert.equal( hasPattern( actionSource, /currentContactStatus\s*:\s*started\.lead\.contactStatus/, ), true, "Action should pass lead contact status to patchLeadFromWebsiteEnrichment", ); assert.equal( hasPattern(websiteEnrichmentSource, /args\.currentContactStatus\s*===\s*\"missing_contact\"/), true, "Lead patch mutation should only set new status for missing_contact", ); }); test("failure handling marks run as failed and writes lead-facing reason", () => { assert.equal( hasPattern( actionSource, /runMutation\(\s*internal\.websiteEnrichment\.finishLeadEnrichmentRun[\s\S]*?status:\s*"failed"/, ), true, "Action should persist failed run state on fatal crawl errors", ); assert.equal( hasPattern( actionSource, /runMutation\(\s*api\.runs\.appendEvent[\s\S]*?level:\s*"error"[\s\S]*?message:\s*"Website-Enrichment fehlgeschlagen/, ), true, "Action should append a visible error event on failure", ); assert.equal( hasPattern( actionSource, /contactStatusReason:\s*`Website-Enrichment fehlgeschlagen:\s*\$\{errorSummary\}`/, ), true, "Action should patch the lead with an actionable failure reason", ); assert.equal( hasPattern( actionSource, /contactStatusReason:\s*"Website-Enrichment fehlgeschlagen: Ungültige Website-URL\."/, ), true, "Invalid-url failure should also update lead contact status reason", ); }); test("website enrichment enforces TASK-8 crawler limits and runtime timeboxes", () => { assert.equal( hasPattern(actionSource, /TASK8_CRAWL_TIMEOUT_MS/g), true, "TASK8_CRAWL_TIMEOUT_MS environment override should be used", ); assert.equal( hasPattern(actionSource, /DEFAULT_CRAWL_TIMEOUT_MS\s*=\s*60_000/), true, "Default crawl timeout should be 60s", ); assert.equal( hasPattern(actionSource, /DEFAULT_CRAWL_MAX_PAGES\s*=\s*5/), true, "Default max crawl page count should be 5", ); }); test("website enrichment guards long browser work before Convex action runtime aborts", () => { assert.equal( hasPattern(actionSource, /DEFAULT_ACTION_BUDGET_MS\s*=\s*120_000/), true, "Action should keep an overall runtime budget below the observed Convex abort window.", ); assert.equal( hasPattern(actionSource, /TASK8_ACTION_BUDGET_MS/), true, "Action runtime budget should be configurable for manual tuning.", ); assert.equal( hasPattern(actionSource, /function actionBudgetMs\(\)/), true, "Action should resolve a bounded runtime budget.", ); assert.equal( hasPattern(actionSource, /function remainingActionBudgetMs\(/), true, "Action should calculate remaining runtime before long awaits.", ); assert.equal( hasPattern(actionSource, /async function withActionTimeout/), true, "Action should wrap long promises so JS catch runs before Convex kills the runtime.", ); const processBody = extractExportSource(actionSource, "processLeadEnrichment"); assert.equal( hasPattern(processBody, /const actionStartedAt = Date\.now\(\)/), true, "processLeadEnrichment should track action start time.", ); assert.equal( hasPattern(processBody, /const actionBudget = actionBudgetMs\(\)/), true, "processLeadEnrichment should resolve the action budget once.", ); const guardedPatterns = [ /withActionTimeout\([\s\S]*loadPlaywrightModules\(\)/, /withActionTimeout\([\s\S]*resolveChromiumExecutablePath\(/, /withActionTimeout\([\s\S]*prepareChromiumSharedLibraries\(/, /withActionTimeout\([\s\S]*playwrightCore\.chromium\.launch\(/, /withActionTimeout\([\s\S]*crawlPage\(\s*desktopContext,\s*rootUrl/, /withActionTimeout\([\s\S]*captureHomepageScreenshot\(/, ]; for (const pattern of guardedPatterns) { assert.equal( hasPattern(processBody, pattern), true, `Expected long await to be guarded by withActionTimeout: ${pattern}`, ); } assert.equal( hasPattern(processBody, /Math\.min\(\s*timeoutMs,\s*remainingActionBudgetMs\(/), true, "Per-page crawl timeout should be capped by remaining action budget.", ); assert.equal( hasPattern( processBody, /desktopContext\.request\.get\([\s\S]*timeout:\s*Math\.min\([\s\S]*remainingActionBudgetMs\(/, ), true, "Internal link checks should cap request timeouts by remaining action budget.", ); }); test("processLeadEnrichment schedules PageSpeed audit jobs after successful enrichment", () => { const processBody = extractExportSource(actionSource, "processLeadEnrichment"); const persistIndex = processBody.indexOf( "internal.websiteEnrichment.persistLeadEnrichmentResult", ); const queueIndex = processBody.indexOf( "internal.pageSpeed.queueLeadPageSpeedAudit", persistIndex, ); const finishIndex = processBody.indexOf( "internal.websiteEnrichment.finishLeadEnrichmentRun", persistIndex, ); assert.notEqual(queueIndex, -1, "processLeadEnrichment should queue PageSpeed audits"); assert.notEqual(persistIndex, -1, "processLeadEnrichment should persist website enrichment result"); assert.notEqual(finishIndex, -1, "processLeadEnrichment should finish enrichment run"); assert.equal( hasPattern( processBody, /runMutation\(\s*internal\.pageSpeed\.queueLeadPageSpeedAudit[\s\S]*leadId:\s*started\.lead\._id[\s\S]*parentRunId:\s*runId[\s\S]*\)/, ), true, "Queue call should pass lead ID and parent run ID", ); assert.equal(queueIndex > persistIndex, true, "PageSpeed queueing should happen after persistence"); assert.equal(queueIndex < finishIndex, true, "PageSpeed queueing should happen before success finish"); }); test("processLeadEnrichment records warning on PageSpeed queue failure and continues", () => { const processBody = extractExportSource(actionSource, "processLeadEnrichment"); assert.equal( hasPattern( processBody, /try\s*\{[\s\S]*internal\.pageSpeed\.queueLeadPageSpeedAudit[\s\S]*\}\s*catch\s*\([^)]*\)\s*\{[\s\S]*api\.runs\.appendEvent[\s\S]*level:\s*"warning"/, ), true, "Queueing PageSpeed should be wrapped in warning-safe try/catch", ); assert.equal( hasPattern( processBody, /PageSpeed-Analyse konnte nicht in die Warteschlange gesetzt werden\./, ), true, "Warning event should describe queue failure", ); }); test("processLeadEnrichment regression: queue PageSpeed on invalid URL failure when started lead exists", () => { const processBody = extractExportSource(actionSource, "processLeadEnrichment"); const invalidUrlStart = processBody.indexOf("if (!rootUrl)"); assert.notEqual(invalidUrlStart, -1, "Invalid URL guard should exist"); const invalidUrlReturnNull = processBody.indexOf("return null;", invalidUrlStart); assert.notEqual( invalidUrlReturnNull, -1, "Invalid URL branch should return null", ); const queueCallInInvalidUrl = processBody.indexOf( "internal.pageSpeed.queueLeadPageSpeedAudit", invalidUrlStart, ); assert.equal( queueCallInInvalidUrl > invalidUrlStart && queueCallInInvalidUrl < invalidUrlReturnNull, true, "Invalid URL failure path should queue PageSpeed before returning.", ); const invalidUrlBranch = processBody.slice(invalidUrlStart, invalidUrlReturnNull); assert.equal( hasPattern( invalidUrlBranch, /leadId:\s*started\.lead\._id[\s\S]*?parentRunId:\s*runId/, ), true, "Invalid URL queue payload should use started.lead._id and parentRunId runId.", ); }); test("processLeadEnrichment regression: queue PageSpeed in fatal catch path with started lead", () => { const processBody = extractExportSource(actionSource, "processLeadEnrichment"); const outerCatchStart = processBody.lastIndexOf("catch (error)"); assert.notEqual(outerCatchStart, -1, "Outer catch block should exist"); const startedGuard = processBody.indexOf("if (started)", outerCatchStart); assert.notEqual(startedGuard, -1, "Outer catch should guard lead patch by started check."); const catchReturnNull = processBody.indexOf("return null;", outerCatchStart); assert.notEqual( catchReturnNull, -1, "Outer catch should return null on unrecoverable errors.", ); const queueCallInCatch = processBody.indexOf( "internal.pageSpeed.queueLeadPageSpeedAudit", outerCatchStart, ); assert.equal( queueCallInCatch > outerCatchStart && queueCallInCatch > startedGuard && queueCallInCatch < catchReturnNull, true, "Fatal catch path should queue PageSpeed before returning, while started lead exists.", ); const catchBlock = processBody.slice(outerCatchStart, catchReturnNull); assert.equal( hasPattern( catchBlock, /leadId:\s*started\.lead\._id[\s\S]*?parentRunId:\s*runId/, ), true, "Catch-path PageSpeed queue payload should use started.lead._id and parentRunId runId.", ); });