534 lines
17 KiB
TypeScript
534 lines
17 KiB
TypeScript
import assert from "node:assert/strict";
|
|
import { existsSync, readFileSync } from "node:fs";
|
|
import path from "node:path";
|
|
import test from "node:test";
|
|
import ts from "typescript";
|
|
|
|
const convexConfigPath = path.join(process.cwd(), "convex.json");
|
|
const convexConfigSource = readFileSync(convexConfigPath, "utf8");
|
|
|
|
const websiteEnrichmentPath = path.join(
|
|
process.cwd(),
|
|
"convex/websiteEnrichment.ts",
|
|
);
|
|
const actionPath = path.join(process.cwd(), "convex/websiteEnrichmentAction.ts");
|
|
|
|
const websiteEnrichmentSource = readFileSync(websiteEnrichmentPath, "utf8");
|
|
const actionSource = readFileSync(actionPath, "utf8");
|
|
|
|
const websiteEnrichmentSourceFile = ts.createSourceFile(
|
|
"websiteEnrichment.ts",
|
|
websiteEnrichmentSource,
|
|
ts.ScriptTarget.ES2022,
|
|
true,
|
|
ts.ScriptKind.TS,
|
|
);
|
|
const actionSourceFile = ts.createSourceFile(
|
|
"websiteEnrichmentAction.ts",
|
|
actionSource,
|
|
ts.ScriptTarget.ES2022,
|
|
true,
|
|
ts.ScriptKind.TS,
|
|
);
|
|
|
|
function getExportedConstNames(file: ts.SourceFile) {
|
|
const names = new Set<string>();
|
|
|
|
const visit = (node: ts.Node) => {
|
|
if (ts.isVariableStatement(node)) {
|
|
const isExported = node.modifiers?.some(
|
|
(mod) => mod.kind === ts.SyntaxKind.ExportKeyword,
|
|
);
|
|
|
|
if (!isExported) {
|
|
ts.forEachChild(node, visit);
|
|
return;
|
|
}
|
|
|
|
const isConst = node.declarationList.flags & ts.NodeFlags.Const;
|
|
|
|
if (!isConst) {
|
|
ts.forEachChild(node, visit);
|
|
return;
|
|
}
|
|
|
|
for (const declaration of node.declarationList.declarations) {
|
|
if (ts.isIdentifier(declaration.name)) {
|
|
names.add(declaration.name.text);
|
|
}
|
|
}
|
|
}
|
|
|
|
ts.forEachChild(node, visit);
|
|
};
|
|
|
|
ts.forEachChild(file, visit);
|
|
return names;
|
|
}
|
|
|
|
function hasPattern(source: string, pattern: RegExp) {
|
|
return pattern.test(source);
|
|
}
|
|
|
|
test("website enrichment mutation module exists and has runtime assertions", () => {
|
|
assert.equal(
|
|
existsSync(websiteEnrichmentPath),
|
|
true,
|
|
"websiteEnrichment.ts should be present",
|
|
);
|
|
|
|
assert.equal(
|
|
hasPattern(websiteEnrichmentSource, /^"use node";/m),
|
|
false,
|
|
"websiteEnrichment.ts should not declare a Node runtime",
|
|
);
|
|
});
|
|
|
|
test("website enrichment action module exists and uses Node runtime", () => {
|
|
assert.equal(
|
|
existsSync(actionPath),
|
|
true,
|
|
"websiteEnrichmentAction.ts should be present",
|
|
);
|
|
|
|
assert.equal(
|
|
hasPattern(actionSource, /^"use node";/m),
|
|
true,
|
|
"websiteEnrichmentAction.ts should declare Node runtime",
|
|
);
|
|
});
|
|
|
|
test("module exports are split across mutations and action", () => {
|
|
const mutationExports = getExportedConstNames(websiteEnrichmentSourceFile);
|
|
const actionExports = getExportedConstNames(actionSourceFile);
|
|
|
|
const requiredMutationExports = [
|
|
"queueLeadEnrichment",
|
|
"startLeadEnrichmentRun",
|
|
"persistLeadEnrichmentResult",
|
|
"finishLeadEnrichmentRun",
|
|
"patchLeadFromWebsiteEnrichment",
|
|
];
|
|
const requiredActionExports = ["processLeadEnrichment"];
|
|
|
|
for (const exportName of requiredMutationExports) {
|
|
assert.equal(
|
|
mutationExports.has(exportName),
|
|
true,
|
|
`Expected mutation export in websiteEnrichment.ts: ${exportName}`,
|
|
);
|
|
}
|
|
|
|
for (const exportName of requiredActionExports) {
|
|
assert.equal(
|
|
actionExports.has(exportName),
|
|
true,
|
|
`Expected action export in websiteEnrichmentAction.ts: ${exportName}`,
|
|
);
|
|
}
|
|
});
|
|
|
|
test("queueLeadEnrichment schedules internal.websiteEnrichmentAction.processLeadEnrichment", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
websiteEnrichmentSource,
|
|
/queueLeadEnrichment\s*=\s*internalMutation\([\s\S]*?ctx\.scheduler\.runAfter\(\s*0,\s*internal\.websiteEnrichmentAction\.processLeadEnrichment/,
|
|
),
|
|
true,
|
|
"Queue mutation should schedule action with runAfter(0, internal.websiteEnrichmentAction.processLeadEnrichment)",
|
|
);
|
|
});
|
|
|
|
test("queueLeadEnrichment uses lead-aware run index and does not use fixed-size .take(50) windows", () => {
|
|
const queueBodyMatch = websiteEnrichmentSource.match(
|
|
/export const queueLeadEnrichment[\s\S]*?(?=\nexport const startLeadEnrichmentRun)/,
|
|
);
|
|
assert.equal(
|
|
queueBodyMatch !== null,
|
|
true,
|
|
"queueLeadEnrichment block should be parseable for source assertions",
|
|
);
|
|
|
|
const queueBody = queueBodyMatch?.[0] ?? "";
|
|
assert.equal(
|
|
hasPattern(
|
|
queueBody,
|
|
/withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"pending"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/,
|
|
),
|
|
true,
|
|
"Queue dedupe for pending runs should use direct type+status+leadId index.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
queueBody,
|
|
/withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"running"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/,
|
|
),
|
|
true,
|
|
"Queue dedupe for running runs should use direct type+status+leadId index.",
|
|
);
|
|
assert.equal(hasPattern(queueBody, /take\(50\)/), false, "No fixed-size .take(50) window in dedupe queries.");
|
|
});
|
|
|
|
test("website enrichment action uses Chromium desktop/mobile devices and runtime Playwright import", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/import\s+type\s+\{[^\n]*BrowserContext[^\n]*\}\s+from\s+["']playwright-core["']/,
|
|
),
|
|
true,
|
|
"Action should import BrowserContext type for typed helper signatures",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /loadPlaywrightModules\(\)/),
|
|
true,
|
|
"Action should load Playwright at runtime from inside action",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /import\("playwright-core"\)/),
|
|
true,
|
|
"Action should use a dynamic import for playwright-core that Convex can detect as an external package",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /import\("@sparticuz\/chromium-min"\)/),
|
|
true,
|
|
"Action should use a dynamic import for @sparticuz/chromium-min as the lightweight browser package",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /TASK8_BROWSER_ASSET_URL/),
|
|
true,
|
|
"Action should reference TASK8_BROWSER_ASSET_URL when loading browser assets",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/TASK8_BROWSER_ASSET_URL[\s\S]{0,240}(throw|Error|required|missing|not configured|configured|konfiguriert|setze)/i,
|
|
),
|
|
true,
|
|
"Action should surface a clear error when the browser asset URL is not configured",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /import\("@sparticuz\/chromium"\)/),
|
|
false,
|
|
"Action should not import the oversized @sparticuz/chromium package",
|
|
);
|
|
const externalPackages = JSON.parse(convexConfigSource).node?.externalPackages;
|
|
assert.equal(Array.isArray(externalPackages), true, "convex.json should define node.externalPackages");
|
|
assert.equal(
|
|
externalPackages?.includes("playwright-core"),
|
|
true,
|
|
"convex.json must include playwright-core in externalPackages",
|
|
);
|
|
assert.equal(
|
|
externalPackages?.includes("@sparticuz/chromium-min"),
|
|
true,
|
|
"convex.json should include @sparticuz/chromium-min for browser runtime",
|
|
);
|
|
assert.equal(
|
|
externalPackages?.includes("@sparticuz/chromium"),
|
|
false,
|
|
"convex.json should not include the oversized @sparticuz/chromium package",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /serverlessChromium/),
|
|
true,
|
|
"Runtime bootstrap should still use a serverless Chromium wrapper object for launch config",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /devices\["Desktop Chrome"\]/),
|
|
true,
|
|
"Desktop context should use Playwright Desktop Chrome device profile",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /devices\["iPhone 11"\]/),
|
|
true,
|
|
"Mobile context should use Playwright iPhone 11 device profile",
|
|
);
|
|
});
|
|
|
|
test("website enrichment action invalidates stale @sparticuz/chromium-min cache when source changes", () => {
|
|
assert.equal(
|
|
hasPattern(actionSource, /CHROMIUM_SOURCE_MARKER_FILE/),
|
|
true,
|
|
"Action should declare a temporary marker file path for Chromium executable source cache tracking.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/tmpdir\(\)/,
|
|
),
|
|
true,
|
|
"Action should derive temporary cache paths from os.tmpdir().",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /getChromiumSourceMarker\(/),
|
|
true,
|
|
"Action should hash executable sources into a stable marker.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /clearChromiumCacheForSourceMismatch\(/),
|
|
true,
|
|
"Action should centralize cache invalidation in a dedicated helper.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/rm\(CHROMIUM_EXECUTABLE_PATH,\s*\{ force: true, recursive: true \}\),/,
|
|
),
|
|
true,
|
|
"Action should remove /tmp/chromium when executable source changes.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/rm\(CHROMIUM_PACK_PATH,\s*\{ force: true, recursive: true \}\),/,
|
|
),
|
|
true,
|
|
"Action should remove /tmp/chromium-pack when executable source changes.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/clearChromiumCacheForSourceMismatch\(executableSource\)[\s\S]*?chromium\.executablePath\(executableSource\)/,
|
|
),
|
|
true,
|
|
"Action should clear stale cache before resolving Chromium executable path.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/writeFile\([\s\S]*?CHROMIUM_SOURCE_MARKER_FILE,[\s\S]*?getChromiumSourceMarker\(executableSource\)/,
|
|
),
|
|
true,
|
|
"Action should persist the source marker after executable path resolution.",
|
|
);
|
|
});
|
|
|
|
test("website enrichment action prepares Chromium AL2023 shared libraries for Convex runtime", () => {
|
|
const hasChromiumHelpers =
|
|
(hasPattern(actionSource, /inflate/) &&
|
|
hasPattern(actionSource, /setupLambdaEnvironment/)) ||
|
|
hasPattern(actionSource, /LD_LIBRARY_PATH/);
|
|
assert.equal(
|
|
hasChromiumHelpers,
|
|
true,
|
|
"Action should explicitly prepare chromium-min runtime environment for AL2023 shared libraries to avoid `/tmp/chromium: error while loading shared libraries: libnspr4.so` (inflate/setupLambdaEnvironment or LD_LIBRARY_PATH).",
|
|
);
|
|
|
|
const hasAl2023LibPath =
|
|
hasPattern(
|
|
actionSource,
|
|
/path\.join\(\s*tmpdir\(\),\s*["']al2023["'],\s*["']lib["']\s*\)/,
|
|
) ||
|
|
(hasPattern(actionSource, /LD_LIBRARY_PATH/) &&
|
|
hasPattern(actionSource, /al2023\/lib/));
|
|
|
|
const referencesRuntimeArchive = hasPattern(actionSource, /al2023\.tar\.br/);
|
|
const referencesPackPath = hasPattern(
|
|
actionSource,
|
|
/CHROMIUM_PACK_PATH/,
|
|
);
|
|
assert.equal(
|
|
referencesRuntimeArchive && referencesPackPath && hasAl2023LibPath,
|
|
true,
|
|
"Action should reference al2023.tar.br, track CHROMIUM_PACK_PATH, and ensure /tmp/al2023/lib is prepared for Convex launch.",
|
|
);
|
|
|
|
const executableIndex = actionSource.indexOf(
|
|
"const executablePath = await resolveChromiumExecutablePath(",
|
|
);
|
|
const launchIndex = actionSource.indexOf("chromium.launch({");
|
|
const hasSetupIndex = Math.max(
|
|
actionSource.indexOf("setupLambdaEnvironment("),
|
|
actionSource.indexOf("LD_LIBRARY_PATH"),
|
|
actionSource.indexOf("path.join(tmpdir(), \"al2023\", \"lib\")"),
|
|
);
|
|
assert.equal(
|
|
executableIndex >= 0 &&
|
|
hasSetupIndex > executableIndex &&
|
|
hasSetupIndex < launchIndex,
|
|
true,
|
|
"Executable resolution and AL2023 shared-library setup should happen before chromium launch in the action runtime path.",
|
|
);
|
|
});
|
|
|
|
test("processLeadEnrichment wraps Playwright bootstrap in protected try/catch", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/try\s*\{[\s\S]*?const \{ playwrightCore, serverlessChromium \}\s*=\s*await loadPlaywrightModules\(\);[\s\S]*?const executablePath = await resolveChromiumExecutablePath\(\s*serverlessChromium,\s*\);[\s\S]*?browser = await playwrightCore\.chromium\.launch\([\s\S]*?executablePath,[\s\S]*?desktopContext = await browser\.newContext\([\s\S]*?mobileContext = await browser\.newContext\(/,
|
|
),
|
|
true,
|
|
"Playwright runtime bootstrap should use resolveChromiumExecutablePath() inside the action's try/catch-protected block",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/catch\s*\(error\)\s*\{[\s\S]*?finishLeadEnrichmentRun[\s\S]*?runs\.appendEvent[\s\S]*?patchLeadFromWebsiteEnrichment/,
|
|
),
|
|
true,
|
|
"Bootstrap failures should be handled by finish + error event + lead patch in catch",
|
|
);
|
|
});
|
|
|
|
test("persistence caps candidates and links before writing", () => {
|
|
assert.equal(
|
|
hasPattern(actionSource, /MAX_PERSISTED_LINKS\s*=\s*120/),
|
|
true,
|
|
"Action should define MAX_PERSISTED_LINKS with value 120.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /MAX_PERSISTED_EMAIL_CANDIDATES\s*=\s*40/),
|
|
true,
|
|
"Action should define MAX_PERSISTED_EMAIL_CANDIDATES with value 40.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/deduplicateCrawlLinks\(allLinks\)[\s\S]*?slice\([\s\S]*?MAX_PERSISTED_LINKS/,
|
|
),
|
|
true,
|
|
"Action should dedupe and cap link persistence at MAX_PERSISTED_LINKS.",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/validCandidates\.slice\([\s\S]*?MAX_PERSISTED_EMAIL_CANDIDATES/,
|
|
),
|
|
true,
|
|
"Action should cap candidate persistence at MAX_PERSISTED_EMAIL_CANDIDATES.",
|
|
);
|
|
});
|
|
|
|
test("website enrichment process stores homepage screenshots in Convex storage as PNG", () => {
|
|
assert.equal(
|
|
hasPattern(actionSource, /ctx\.storage\.store\(/),
|
|
true,
|
|
"Action should store screenshot blobs via ctx.storage.store",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/new\s+Blob\(\[[\s\S]*?SCREENSHOT_MIME_TYPE/,
|
|
),
|
|
true,
|
|
"Action should wrap screenshots in Blob with image/png MIME type",
|
|
);
|
|
});
|
|
|
|
test("startLeadEnrichmentRun marks missing website lead with contact status reason", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
websiteEnrichmentSource,
|
|
/if \(!lead\.websiteUrl\)\s*\{[\s\S]*?status:\s*"failed"[\s\S]*?contactStatusReason:\s*"Website-URL fehlt für das Website-Enrichment\."/,
|
|
),
|
|
true,
|
|
"Missing websiteUrl should set a specific contactStatusReason on the lead",
|
|
);
|
|
});
|
|
|
|
test("website enrichment persistence inserts all required evidence table rows", () => {
|
|
const expectedTables = [
|
|
"websiteCrawlPages",
|
|
"websiteCrawlLinks",
|
|
"websiteEmailCandidates",
|
|
"websiteCrawlScreenshots",
|
|
"websiteTechnicalChecks",
|
|
] as const;
|
|
|
|
for (const tableName of expectedTables) {
|
|
assert.equal(
|
|
hasPattern(
|
|
websiteEnrichmentSource,
|
|
new RegExp(`ctx\\.db\\.insert\\(["']${tableName}["']`, "s"),
|
|
),
|
|
true,
|
|
`persistLeadEnrichmentResult should insert into ${tableName}`,
|
|
);
|
|
}
|
|
});
|
|
|
|
test("website enrichment flow uses TASK-7 email selection helper for lead patching", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/getUsableContactEmailFromEntries\([\s\S]*?\)/,
|
|
),
|
|
true,
|
|
"Action should call getUsableContactEmailFromEntries",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/runMutation\(\s*internal\.websiteEnrichment\.patchLeadFromWebsiteEnrichment[\s\S]*?\{[\s\S]*?email:\s*usable\.email/,
|
|
),
|
|
true,
|
|
"Action should patch lead from usable email result",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/currentContactStatus\s*:\s*started\.lead\.contactStatus/,
|
|
),
|
|
true,
|
|
"Action should pass lead contact status to patchLeadFromWebsiteEnrichment",
|
|
);
|
|
assert.equal(
|
|
hasPattern(websiteEnrichmentSource, /args\.currentContactStatus\s*===\s*\"missing_contact\"/),
|
|
true,
|
|
"Lead patch mutation should only set new status for missing_contact",
|
|
);
|
|
});
|
|
|
|
test("failure handling marks run as failed and writes lead-facing reason", () => {
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/runMutation\(\s*internal\.websiteEnrichment\.finishLeadEnrichmentRun[\s\S]*?status:\s*"failed"/,
|
|
),
|
|
true,
|
|
"Action should persist failed run state on fatal crawl errors",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/runMutation\(\s*api\.runs\.appendEvent[\s\S]*?level:\s*"error"[\s\S]*?message:\s*"Website-Enrichment fehlgeschlagen/,
|
|
),
|
|
true,
|
|
"Action should append a visible error event on failure",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/contactStatusReason:\s*`Website-Enrichment fehlgeschlagen:\s*\$\{errorSummary\}`/,
|
|
),
|
|
true,
|
|
"Action should patch the lead with an actionable failure reason",
|
|
);
|
|
assert.equal(
|
|
hasPattern(
|
|
actionSource,
|
|
/contactStatusReason:\s*"Website-Enrichment fehlgeschlagen: Ungültige Website-URL\."/,
|
|
),
|
|
true,
|
|
"Invalid-url failure should also update lead contact status reason",
|
|
);
|
|
});
|
|
|
|
test("website enrichment enforces TASK-8 crawler limits and runtime timeboxes", () => {
|
|
assert.equal(
|
|
hasPattern(actionSource, /TASK8_CRAWL_TIMEOUT_MS/g),
|
|
true,
|
|
"TASK8_CRAWL_TIMEOUT_MS environment override should be used",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /DEFAULT_CRAWL_TIMEOUT_MS\s*=\s*60_000/),
|
|
true,
|
|
"Default crawl timeout should be 60s",
|
|
);
|
|
assert.equal(
|
|
hasPattern(actionSource, /DEFAULT_CRAWL_MAX_PAGES\s*=\s*5/),
|
|
true,
|
|
"Default max crawl page count should be 5",
|
|
);
|
|
});
|