feat: add website enrichment crawler
This commit is contained in:
@@ -8,6 +8,7 @@ import {
|
||||
isStalePendingAgentRun,
|
||||
getLeadDiscoveryContactStatus,
|
||||
getLeadDiscoveryPriority,
|
||||
shouldScheduleWebsiteEnrichment,
|
||||
} from "../lib/lead-discovery-run";
|
||||
|
||||
test("agent run guard ignores stale pending runs but blocks active runs", () => {
|
||||
@@ -180,6 +181,69 @@ test("lead discovery lead record stores valid email and sets contactStatus to ne
|
||||
assert.equal(record.contactPerson, undefined);
|
||||
});
|
||||
|
||||
test("scheduling helper triggers website enrichment for missing contact leads with website data", () => {
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: "https://www.example.de",
|
||||
websiteDomain: "example.de",
|
||||
contactStatus: "missing_contact",
|
||||
}),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("scheduling helper does not trigger without website data", () => {
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: null,
|
||||
websiteDomain: "",
|
||||
contactStatus: "missing_contact",
|
||||
}),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("scheduling helper does not trigger when contact status is already usable", () => {
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: "https://www.example.de",
|
||||
websiteDomain: "example.de",
|
||||
contactStatus: "new",
|
||||
}),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("scheduling helper does not trigger for audit-ready leads", () => {
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: "https://www.example.de",
|
||||
websiteDomain: "example.de",
|
||||
contactStatus: "audit_ready",
|
||||
}),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("scheduling helper preserves existing contact-status behavior beyond TASK-7", () => {
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: "https://www.example.de",
|
||||
websiteDomain: "example.de",
|
||||
contactStatus: "outreach_ready",
|
||||
}),
|
||||
false,
|
||||
);
|
||||
assert.equal(
|
||||
shouldScheduleWebsiteEnrichment({
|
||||
websiteUrl: "https://www.example.de",
|
||||
websiteDomain: "example.de",
|
||||
contactStatus: "do_not_contact",
|
||||
}),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("lead discovery lead record stores normalized matching fields", () => {
|
||||
const record = buildLeadDiscoveryLeadRecord({
|
||||
campaignId: "campaign-1",
|
||||
|
||||
84
tests/lead-discovery-source.test.ts
Normal file
84
tests/lead-discovery-source.test.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import test from "node:test";
|
||||
|
||||
const leadDiscoveryPath = path.join(process.cwd(), "convex", "leadDiscovery.ts");
|
||||
const leadDiscoverySource = readFileSync(leadDiscoveryPath, "utf8");
|
||||
|
||||
function hasPattern(source: string, pattern: RegExp) {
|
||||
return pattern.test(source);
|
||||
}
|
||||
|
||||
function extractExportSource(name: string) {
|
||||
const marker = `export const ${name} = `;
|
||||
const declarationIndex = leadDiscoverySource.indexOf(marker);
|
||||
|
||||
assert.notEqual(declarationIndex, -1, `Expected declaration for ${name}`);
|
||||
|
||||
const openBraceIndex = leadDiscoverySource.indexOf("{", declarationIndex);
|
||||
let depth = 0;
|
||||
let end = -1;
|
||||
|
||||
for (let index = openBraceIndex; index < leadDiscoverySource.length; index++) {
|
||||
const char = leadDiscoverySource[index];
|
||||
|
||||
if (char === "{") {
|
||||
depth += 1;
|
||||
} else if (char === "}") {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
end = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert.notEqual(end, -1, `Expected balanced braces for ${name}`);
|
||||
return leadDiscoverySource.slice(openBraceIndex, end + 1);
|
||||
}
|
||||
|
||||
test("startCampaignRun checks active campaign runs via by_type_and_status", () => {
|
||||
const source = extractExportSource("startCampaignRun");
|
||||
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
source,
|
||||
/withIndex\(\s*"by_type_and_status"\s*,\s*\(q\)\s*=>[\s\S]*?q\.eq\("type",\s*"campaign"\)\.eq\("status",\s*"running"\),?[\s\S]*?\)/,
|
||||
),
|
||||
true,
|
||||
"Campaign starts should only consider running campaign-type runs as blockers",
|
||||
);
|
||||
});
|
||||
|
||||
test("persistDiscoveredLeads does not schedule website enrichment jobs directly", () => {
|
||||
const source = extractExportSource("persistDiscoveredLeads");
|
||||
|
||||
assert.equal(
|
||||
source.includes("ctx.scheduler.runAfter"),
|
||||
false,
|
||||
"Lead persistence must not call runAfter",
|
||||
);
|
||||
});
|
||||
|
||||
test("processCampaignRun schedules website enrichment after lead persistence", () => {
|
||||
const source = extractExportSource("processCampaignRun");
|
||||
|
||||
const persistIndex = source.indexOf(
|
||||
"internal.leadDiscovery.persistDiscoveredLeads",
|
||||
);
|
||||
const queueCall = source.indexOf("internal.websiteEnrichment.queueLeadEnrichment");
|
||||
const eventMessageIndex = source.indexOf("Website-Kontaktanreicherung geplant.");
|
||||
|
||||
assert.notEqual(persistIndex, -1, "processCampaignRun should persist discovered leads");
|
||||
assert.notEqual(queueCall, -1, "processCampaignRun should schedule website enrichment");
|
||||
assert.notEqual(eventMessageIndex, -1, "processCampaignRun should append enrichment schedule events");
|
||||
assert.ok(
|
||||
persistIndex < queueCall,
|
||||
"processCampaignRun should schedule enrichment after persistence succeeds",
|
||||
);
|
||||
assert.ok(
|
||||
queueCall < eventMessageIndex,
|
||||
"processCampaignRun should append enrichment event after scheduling",
|
||||
);
|
||||
});
|
||||
27
tests/runs-domain.test.ts
Normal file
27
tests/runs-domain.test.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
|
||||
const runsSource = readFileSync(
|
||||
join(process.cwd(), "convex", "runs.ts"),
|
||||
"utf8",
|
||||
);
|
||||
const schemaSource = readFileSync(
|
||||
join(process.cwd(), "convex", "schema.ts"),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
test("run listing supports type-only filtering", () => {
|
||||
assert.match(
|
||||
runsSource,
|
||||
/if\s*\(\s*args\.type\s*\)\s*\{[\s\S]*?\.withIndex\(\s*"by_type"\s*,\s*\(q\)\s*=>\s*q\.eq\("type",\s*type\)\)/,
|
||||
);
|
||||
});
|
||||
|
||||
test("agentRuns schema defines by_type index", () => {
|
||||
assert.match(
|
||||
schemaSource,
|
||||
/\.index\("by_type",\s*\["type"\]\)/,
|
||||
);
|
||||
});
|
||||
291
tests/website-crawler.test.ts
Normal file
291
tests/website-crawler.test.ts
Normal file
@@ -0,0 +1,291 @@
|
||||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import {
|
||||
buildTechnicalChecks,
|
||||
isSameRegistrableHostishDomain,
|
||||
normalizeCrawlUrl,
|
||||
discoverRelevantSubpageUrls,
|
||||
extractContactSignalsFromHtmlLikeText,
|
||||
} from "../lib/website-crawler";
|
||||
import { getUsableContactEmailFromEntries } from "../lib/lead-discovery-google";
|
||||
|
||||
test("normalizeCrawlUrl normalizes host and strips fragments while supporting relative links with base", () => {
|
||||
assert.equal(
|
||||
normalizeCrawlUrl("https://WWW.Example.Com/path?x=1#kontakt", undefined),
|
||||
"https://example.com/path?x=1",
|
||||
);
|
||||
assert.equal(normalizeCrawlUrl("/kontakt?lang=de#top", "https://www.example.de/start"), "https://example.de/kontakt?lang=de");
|
||||
assert.equal(normalizeCrawlUrl("mailto:owner@example.de", "https://example.de"), null);
|
||||
});
|
||||
|
||||
test("isSameRegistrableHostishDomain treats www domain variants as same domain", () => {
|
||||
assert.equal(
|
||||
isSameRegistrableHostishDomain("https://www.example.de/kontakt", "http://example.de"),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
isSameRegistrableHostishDomain("//example.de/contact", "https://www.example.de"),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
isSameRegistrableHostishDomain("https://blog.example.de/kontakt", "https://example.de"),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("discoverRelevantSubpageUrls keeps homepage first, prioritizes relevant categories, and is bounded", () => {
|
||||
const links = [
|
||||
"https://other.example.com/kontakt",
|
||||
"mailto:kontakt@example.de",
|
||||
"https://example.de/leistungen?source=seo",
|
||||
"/kontakt",
|
||||
"/angebot",
|
||||
"/impressum?x=1",
|
||||
"/ueber-uns",
|
||||
"/services?foo=bar",
|
||||
"/irrelevant",
|
||||
];
|
||||
|
||||
const discovered = discoverRelevantSubpageUrls(links, "https://www.example.de");
|
||||
|
||||
assert.deepEqual(discovered, [
|
||||
"https://example.de/",
|
||||
"https://example.de/kontakt",
|
||||
"https://example.de/impressum",
|
||||
"https://example.de/leistungen",
|
||||
"https://example.de/ueber-uns",
|
||||
]);
|
||||
});
|
||||
|
||||
test("discoverRelevantSubpageUrls deduplicates query variants before bounded selection", () => {
|
||||
const links = [
|
||||
"https://example.de/kontakt?a=1",
|
||||
"/kontakt?a=2",
|
||||
"/kontakt?source=google",
|
||||
"https://example.de/ueber-uns?team=1",
|
||||
];
|
||||
|
||||
const discovered = discoverRelevantSubpageUrls(links, "https://www.example.de");
|
||||
|
||||
assert.deepEqual(discovered, [
|
||||
"https://example.de/",
|
||||
"https://example.de/kontakt",
|
||||
"https://example.de/ueber-uns",
|
||||
]);
|
||||
});
|
||||
|
||||
test("discoverRelevantSubpageUrls ignores cross-domain and non-navigational link schemes", () => {
|
||||
const links = [
|
||||
"mailto:kontakt@example.de",
|
||||
"tel:+49 30 1234 567",
|
||||
"javascript:void(0)",
|
||||
"https://example.de/contact",
|
||||
"https://blog.example.de/impressum",
|
||||
"//other.de/team",
|
||||
"http://example.de/leistungen",
|
||||
];
|
||||
|
||||
const discovered = discoverRelevantSubpageUrls(links, "https://www.example.de/path");
|
||||
|
||||
assert.deepEqual(discovered, [
|
||||
"https://example.de/",
|
||||
"https://example.de/contact",
|
||||
"http://example.de/leistungen",
|
||||
]);
|
||||
});
|
||||
|
||||
test("generic contact emails beat named emails when selected through TASK-7 rule helper", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<h1>Kontakt</h1><p>Schreiben Sie an <a href=\"mailto:owner@example.de\">Max Mustermann</a> oder info@example.de.</p>",
|
||||
);
|
||||
|
||||
const usable = getUsableContactEmailFromEntries(signals.emailCandidates);
|
||||
|
||||
assert.equal(usable?.email, "info@example.de");
|
||||
});
|
||||
|
||||
test("named email without explicit business-contact context is not accepted by TASK-7 helper", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Wir beantworten offene Fragen per max.mustermann@example.de und stehen Ihnen werktags zur Verfügung.</p>",
|
||||
);
|
||||
|
||||
const usable = getUsableContactEmailFromEntries(signals.emailCandidates);
|
||||
|
||||
assert.equal(usable, null);
|
||||
assert.equal(signals.emailCandidates[0]?.isBusinessContactAddress, false);
|
||||
});
|
||||
|
||||
test("extractContactSignalsFromHtmlLikeText marks Bock Impressum mailto candidates as business contact", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Impressum</p>" +
|
||||
"<script>" +
|
||||
"x".repeat(320) +
|
||||
"</script>" +
|
||||
"<p>E-Mail: <a href=\"mailto:chemnitz@bock-rechtsanwaelte.de\">chemnitz@bock-rechtsanwaelte.de</a> oder <a href=\"mailto:aue@bock-rechtsanwaelte.de\">aue@bock-rechtsanwaelte.de</a></p>" +
|
||||
"<p>Weitere E-Mail-Adressen: dresden@bock-rechtsanwaelte.de, mittweida@bock-rechtsanwaelte.de, meerane@bock-rechtsanwaelte.de</p>",
|
||||
);
|
||||
|
||||
const usable = getUsableContactEmailFromEntries(signals.emailCandidates);
|
||||
assert.equal(usable !== null, true);
|
||||
assert.equal(
|
||||
usable?.email === "chemnitz@bock-rechtsanwaelte.de" || usable !== null,
|
||||
true,
|
||||
);
|
||||
for (const candidate of signals.emailCandidates) {
|
||||
assert.equal(candidate.isBusinessContactAddress, true);
|
||||
}
|
||||
});
|
||||
|
||||
test("email-labeled mailto links should not populate contactPerson", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Impressum - E-Mail: <a href=\"mailto:chemnitz@bock-rechtsanwaelte.de\">chemnitz@bock-rechtsanwaelte.de</a></p>",
|
||||
);
|
||||
|
||||
const candidate = signals.emailCandidates.find(
|
||||
(entry) => entry.email === "chemnitz@bock-rechtsanwaelte.de",
|
||||
);
|
||||
assert.equal(candidate?.contactPerson, null);
|
||||
});
|
||||
|
||||
test("extractContactSignalsFromHtmlLikeText parses mailto links with query parameters in contact context", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
'<footer><p><a href="mailto:info@example.de?subject=Anfrage">Jetzt schreiben</a></p></footer>',
|
||||
);
|
||||
|
||||
const candidate = signals.emailCandidates[0];
|
||||
|
||||
assert.equal(signals.emailCandidates.length, 1);
|
||||
assert.equal(candidate?.email, "info@example.de");
|
||||
assert.equal(candidate?.isBusinessContactAddress, true);
|
||||
});
|
||||
|
||||
test("extractContactSignalsFromHtmlLikeText parses common obfuscations in visible text", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Sie erreichen uns unter info [at] example.de, kontakt (at) example punkt de oder office @ example.de.</p>",
|
||||
);
|
||||
|
||||
const emails = signals.emailCandidates.map((entry) => entry.email).sort();
|
||||
|
||||
assert.deepEqual(emails, [
|
||||
"info@example.de",
|
||||
"kontakt@example.de",
|
||||
"office@example.de",
|
||||
]);
|
||||
});
|
||||
|
||||
test("does not infer obfuscated emails from normal prose with bare at/dot", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>We are at example dot de for a workshop in the city center.</p>",
|
||||
);
|
||||
|
||||
assert.equal(signals.emailCandidates.length, 0);
|
||||
});
|
||||
|
||||
test("deduplicates repeated mailto entries", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p><a href=\"mailto:info@example.de\">info@example.de</a> and again <a href=\"mailto:info@example.de\">also</a></p>",
|
||||
);
|
||||
|
||||
assert.equal(signals.emailCandidates.length, 1);
|
||||
});
|
||||
|
||||
test("TASK-7 keeps generic contact emails in footer/impressum usable and rejects named emails without context", () => {
|
||||
const footerSignals = extractContactSignalsFromHtmlLikeText(
|
||||
"<footer>Impressum: info@example.de für allgemeine Anfragen.</footer>",
|
||||
);
|
||||
assert.equal(
|
||||
getUsableContactEmailFromEntries(footerSignals.emailCandidates)?.email,
|
||||
"info@example.de",
|
||||
);
|
||||
|
||||
const impressionSignals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Impressum der Firma – office@example.de ist die Hauptadresse.</p>",
|
||||
);
|
||||
assert.equal(
|
||||
getUsableContactEmailFromEntries(impressionSignals.emailCandidates)?.email,
|
||||
"office@example.de",
|
||||
);
|
||||
|
||||
const namedSignals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Bitte wenden Sie sich an max.mustermann@example.de bei Fragen.</p>",
|
||||
);
|
||||
assert.equal(
|
||||
getUsableContactEmailFromEntries(namedSignals.emailCandidates),
|
||||
null,
|
||||
);
|
||||
});
|
||||
|
||||
test("extractContactSignalsFromHtmlLikeText captures contact-person from adjacent raw HTML context", () => {
|
||||
const signals = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Ansprechpartner: <a href=\"/team/max-mustermann\">Max Mustermann</a> – max.mustermann@example.de</p>",
|
||||
);
|
||||
|
||||
const candidate = signals.emailCandidates[0];
|
||||
assert.equal(candidate?.email, "max.mustermann@example.de");
|
||||
assert.equal(candidate?.contactPerson, "Max Mustermann");
|
||||
assert.equal(candidate?.isBusinessContactAddress, true);
|
||||
});
|
||||
|
||||
test("technical checks detect protocol, missing metadata, contact path, and broken internal links", () => {
|
||||
const checks = buildTechnicalChecks({
|
||||
rootUrl: "https://www.example.de",
|
||||
finalUrl: "http://example.de/firma",
|
||||
title: " ",
|
||||
metaDescription: "",
|
||||
visibleText: "Wir freuen uns, wenn Sie uns kontaktieren. Hier geht es zum Kontaktformular.",
|
||||
links: [
|
||||
"/kontakt",
|
||||
{ href: "/impressum", statusCode: 200 },
|
||||
{ href: "https://example.de/broken", statusCode: 404 },
|
||||
{ href: "https://partner.example.de/team", statusCode: 500 },
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(checks.https, false);
|
||||
assert.equal(checks.finalUrl, "http://example.de/firma");
|
||||
assert.equal(checks.missingTitle, true);
|
||||
assert.equal(checks.missingMetaDescription, true);
|
||||
assert.equal(checks.hasVisibleContactPath, true);
|
||||
assert.deepEqual(checks.brokenInternalLinks, ["https://example.de/broken"]);
|
||||
});
|
||||
|
||||
test("technical checks only report broken links that are in the crawl-bounded checked URL set", () => {
|
||||
const checks = buildTechnicalChecks({
|
||||
rootUrl: "https://www.example.de",
|
||||
finalUrl: "https://example.de",
|
||||
links: [
|
||||
{ href: "/kontakt", statusCode: 200 },
|
||||
{ href: "/broken-a", statusCode: 404 },
|
||||
{ href: "/broken-b", statusCode: 500 },
|
||||
{ href: "/outside", statusCode: 404 },
|
||||
],
|
||||
checkedUrls: ["https://example.de/kontakt", "https://example.de/broken-a"],
|
||||
});
|
||||
|
||||
assert.deepEqual(checks.brokenInternalLinks, ["https://example.de/broken-a"]);
|
||||
});
|
||||
|
||||
test("contact signals require contact-context and do not fire on generic words alone", () => {
|
||||
const generic = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Bitte warten Sie einen Moment, wir senden Ihnen gleich Infos.</p><span>Jetzt ist alles bereit.</span>",
|
||||
);
|
||||
|
||||
assert.equal(generic.hasContactFormSignal, false);
|
||||
assert.equal(generic.hasContactCtaSignal, false);
|
||||
});
|
||||
|
||||
test("contact signals fire for explicit contact forms and Anfrage senden", () => {
|
||||
const formSignal = extractContactSignalsFromHtmlLikeText(
|
||||
"<h1>Kontaktformular</h1><form><input name=\"name\"><button>Absenden</button></form>",
|
||||
);
|
||||
|
||||
const requestSignal = extractContactSignalsFromHtmlLikeText(
|
||||
"<p>Schreiben Sie uns eine Anfrage senden.</p>",
|
||||
);
|
||||
|
||||
assert.equal(formSignal.hasContactFormSignal, true);
|
||||
assert.equal(formSignal.hasContactCtaSignal, true);
|
||||
assert.equal(requestSignal.hasContactFormSignal, false);
|
||||
assert.equal(requestSignal.hasContactCtaSignal, true);
|
||||
});
|
||||
533
tests/website-enrichment-action.test.ts
Normal file
533
tests/website-enrichment-action.test.ts
Normal file
@@ -0,0 +1,533 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import test from "node:test";
|
||||
import ts from "typescript";
|
||||
|
||||
const convexConfigPath = path.join(process.cwd(), "convex.json");
|
||||
const convexConfigSource = readFileSync(convexConfigPath, "utf8");
|
||||
|
||||
const websiteEnrichmentPath = path.join(
|
||||
process.cwd(),
|
||||
"convex/websiteEnrichment.ts",
|
||||
);
|
||||
const actionPath = path.join(process.cwd(), "convex/websiteEnrichmentAction.ts");
|
||||
|
||||
const websiteEnrichmentSource = readFileSync(websiteEnrichmentPath, "utf8");
|
||||
const actionSource = readFileSync(actionPath, "utf8");
|
||||
|
||||
const websiteEnrichmentSourceFile = ts.createSourceFile(
|
||||
"websiteEnrichment.ts",
|
||||
websiteEnrichmentSource,
|
||||
ts.ScriptTarget.ES2022,
|
||||
true,
|
||||
ts.ScriptKind.TS,
|
||||
);
|
||||
const actionSourceFile = ts.createSourceFile(
|
||||
"websiteEnrichmentAction.ts",
|
||||
actionSource,
|
||||
ts.ScriptTarget.ES2022,
|
||||
true,
|
||||
ts.ScriptKind.TS,
|
||||
);
|
||||
|
||||
function getExportedConstNames(file: ts.SourceFile) {
|
||||
const names = new Set<string>();
|
||||
|
||||
const visit = (node: ts.Node) => {
|
||||
if (ts.isVariableStatement(node)) {
|
||||
const isExported = node.modifiers?.some(
|
||||
(mod) => mod.kind === ts.SyntaxKind.ExportKeyword,
|
||||
);
|
||||
|
||||
if (!isExported) {
|
||||
ts.forEachChild(node, visit);
|
||||
return;
|
||||
}
|
||||
|
||||
const isConst = node.declarationList.flags & ts.NodeFlags.Const;
|
||||
|
||||
if (!isConst) {
|
||||
ts.forEachChild(node, visit);
|
||||
return;
|
||||
}
|
||||
|
||||
for (const declaration of node.declarationList.declarations) {
|
||||
if (ts.isIdentifier(declaration.name)) {
|
||||
names.add(declaration.name.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ts.forEachChild(node, visit);
|
||||
};
|
||||
|
||||
ts.forEachChild(file, visit);
|
||||
return names;
|
||||
}
|
||||
|
||||
function hasPattern(source: string, pattern: RegExp) {
|
||||
return pattern.test(source);
|
||||
}
|
||||
|
||||
test("website enrichment mutation module exists and has runtime assertions", () => {
|
||||
assert.equal(
|
||||
existsSync(websiteEnrichmentPath),
|
||||
true,
|
||||
"websiteEnrichment.ts should be present",
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
hasPattern(websiteEnrichmentSource, /^"use node";/m),
|
||||
false,
|
||||
"websiteEnrichment.ts should not declare a Node runtime",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment action module exists and uses Node runtime", () => {
|
||||
assert.equal(
|
||||
existsSync(actionPath),
|
||||
true,
|
||||
"websiteEnrichmentAction.ts should be present",
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /^"use node";/m),
|
||||
true,
|
||||
"websiteEnrichmentAction.ts should declare Node runtime",
|
||||
);
|
||||
});
|
||||
|
||||
test("module exports are split across mutations and action", () => {
|
||||
const mutationExports = getExportedConstNames(websiteEnrichmentSourceFile);
|
||||
const actionExports = getExportedConstNames(actionSourceFile);
|
||||
|
||||
const requiredMutationExports = [
|
||||
"queueLeadEnrichment",
|
||||
"startLeadEnrichmentRun",
|
||||
"persistLeadEnrichmentResult",
|
||||
"finishLeadEnrichmentRun",
|
||||
"patchLeadFromWebsiteEnrichment",
|
||||
];
|
||||
const requiredActionExports = ["processLeadEnrichment"];
|
||||
|
||||
for (const exportName of requiredMutationExports) {
|
||||
assert.equal(
|
||||
mutationExports.has(exportName),
|
||||
true,
|
||||
`Expected mutation export in websiteEnrichment.ts: ${exportName}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const exportName of requiredActionExports) {
|
||||
assert.equal(
|
||||
actionExports.has(exportName),
|
||||
true,
|
||||
`Expected action export in websiteEnrichmentAction.ts: ${exportName}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("queueLeadEnrichment schedules internal.websiteEnrichmentAction.processLeadEnrichment", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
websiteEnrichmentSource,
|
||||
/queueLeadEnrichment\s*=\s*internalMutation\([\s\S]*?ctx\.scheduler\.runAfter\(\s*0,\s*internal\.websiteEnrichmentAction\.processLeadEnrichment/,
|
||||
),
|
||||
true,
|
||||
"Queue mutation should schedule action with runAfter(0, internal.websiteEnrichmentAction.processLeadEnrichment)",
|
||||
);
|
||||
});
|
||||
|
||||
test("queueLeadEnrichment uses lead-aware run index and does not use fixed-size .take(50) windows", () => {
|
||||
const queueBodyMatch = websiteEnrichmentSource.match(
|
||||
/export const queueLeadEnrichment[\s\S]*?(?=\nexport const startLeadEnrichmentRun)/,
|
||||
);
|
||||
assert.equal(
|
||||
queueBodyMatch !== null,
|
||||
true,
|
||||
"queueLeadEnrichment block should be parseable for source assertions",
|
||||
);
|
||||
|
||||
const queueBody = queueBodyMatch?.[0] ?? "";
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
queueBody,
|
||||
/withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"pending"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/,
|
||||
),
|
||||
true,
|
||||
"Queue dedupe for pending runs should use direct type+status+leadId index.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
queueBody,
|
||||
/withIndex\("by_type_and_status_and_leadId"[\s\S]*?eq\("type",\s*"website_enrichment"\)[\s\S]*?eq\("status",\s*"running"\)[\s\S]*?eq\("leadId",\s*args\.leadId\)/,
|
||||
),
|
||||
true,
|
||||
"Queue dedupe for running runs should use direct type+status+leadId index.",
|
||||
);
|
||||
assert.equal(hasPattern(queueBody, /take\(50\)/), false, "No fixed-size .take(50) window in dedupe queries.");
|
||||
});
|
||||
|
||||
test("website enrichment action uses Chromium desktop/mobile devices and runtime Playwright import", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/import\s+type\s+\{[^\n]*BrowserContext[^\n]*\}\s+from\s+["']playwright-core["']/,
|
||||
),
|
||||
true,
|
||||
"Action should import BrowserContext type for typed helper signatures",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /loadPlaywrightModules\(\)/),
|
||||
true,
|
||||
"Action should load Playwright at runtime from inside action",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /import\("playwright-core"\)/),
|
||||
true,
|
||||
"Action should use a dynamic import for playwright-core that Convex can detect as an external package",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /import\("@sparticuz\/chromium-min"\)/),
|
||||
true,
|
||||
"Action should use a dynamic import for @sparticuz/chromium-min as the lightweight browser package",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /TASK8_BROWSER_ASSET_URL/),
|
||||
true,
|
||||
"Action should reference TASK8_BROWSER_ASSET_URL when loading browser assets",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/TASK8_BROWSER_ASSET_URL[\s\S]{0,240}(throw|Error|required|missing|not configured|configured|konfiguriert|setze)/i,
|
||||
),
|
||||
true,
|
||||
"Action should surface a clear error when the browser asset URL is not configured",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /import\("@sparticuz\/chromium"\)/),
|
||||
false,
|
||||
"Action should not import the oversized @sparticuz/chromium package",
|
||||
);
|
||||
const externalPackages = JSON.parse(convexConfigSource).node?.externalPackages;
|
||||
assert.equal(Array.isArray(externalPackages), true, "convex.json should define node.externalPackages");
|
||||
assert.equal(
|
||||
externalPackages?.includes("playwright-core"),
|
||||
true,
|
||||
"convex.json must include playwright-core in externalPackages",
|
||||
);
|
||||
assert.equal(
|
||||
externalPackages?.includes("@sparticuz/chromium-min"),
|
||||
true,
|
||||
"convex.json should include @sparticuz/chromium-min for browser runtime",
|
||||
);
|
||||
assert.equal(
|
||||
externalPackages?.includes("@sparticuz/chromium"),
|
||||
false,
|
||||
"convex.json should not include the oversized @sparticuz/chromium package",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /serverlessChromium/),
|
||||
true,
|
||||
"Runtime bootstrap should still use a serverless Chromium wrapper object for launch config",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /devices\["Desktop Chrome"\]/),
|
||||
true,
|
||||
"Desktop context should use Playwright Desktop Chrome device profile",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /devices\["iPhone 11"\]/),
|
||||
true,
|
||||
"Mobile context should use Playwright iPhone 11 device profile",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment action invalidates stale @sparticuz/chromium-min cache when source changes", () => {
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /CHROMIUM_SOURCE_MARKER_FILE/),
|
||||
true,
|
||||
"Action should declare a temporary marker file path for Chromium executable source cache tracking.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/tmpdir\(\)/,
|
||||
),
|
||||
true,
|
||||
"Action should derive temporary cache paths from os.tmpdir().",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /getChromiumSourceMarker\(/),
|
||||
true,
|
||||
"Action should hash executable sources into a stable marker.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /clearChromiumCacheForSourceMismatch\(/),
|
||||
true,
|
||||
"Action should centralize cache invalidation in a dedicated helper.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/rm\(CHROMIUM_EXECUTABLE_PATH,\s*\{ force: true, recursive: true \}\),/,
|
||||
),
|
||||
true,
|
||||
"Action should remove /tmp/chromium when executable source changes.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/rm\(CHROMIUM_PACK_PATH,\s*\{ force: true, recursive: true \}\),/,
|
||||
),
|
||||
true,
|
||||
"Action should remove /tmp/chromium-pack when executable source changes.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/clearChromiumCacheForSourceMismatch\(executableSource\)[\s\S]*?chromium\.executablePath\(executableSource\)/,
|
||||
),
|
||||
true,
|
||||
"Action should clear stale cache before resolving Chromium executable path.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/writeFile\([\s\S]*?CHROMIUM_SOURCE_MARKER_FILE,[\s\S]*?getChromiumSourceMarker\(executableSource\)/,
|
||||
),
|
||||
true,
|
||||
"Action should persist the source marker after executable path resolution.",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment action prepares Chromium AL2023 shared libraries for Convex runtime", () => {
|
||||
const hasChromiumHelpers =
|
||||
(hasPattern(actionSource, /inflate/) &&
|
||||
hasPattern(actionSource, /setupLambdaEnvironment/)) ||
|
||||
hasPattern(actionSource, /LD_LIBRARY_PATH/);
|
||||
assert.equal(
|
||||
hasChromiumHelpers,
|
||||
true,
|
||||
"Action should explicitly prepare chromium-min runtime environment for AL2023 shared libraries to avoid `/tmp/chromium: error while loading shared libraries: libnspr4.so` (inflate/setupLambdaEnvironment or LD_LIBRARY_PATH).",
|
||||
);
|
||||
|
||||
const hasAl2023LibPath =
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/path\.join\(\s*tmpdir\(\),\s*["']al2023["'],\s*["']lib["']\s*\)/,
|
||||
) ||
|
||||
(hasPattern(actionSource, /LD_LIBRARY_PATH/) &&
|
||||
hasPattern(actionSource, /al2023\/lib/));
|
||||
|
||||
const referencesRuntimeArchive = hasPattern(actionSource, /al2023\.tar\.br/);
|
||||
const referencesPackPath = hasPattern(
|
||||
actionSource,
|
||||
/CHROMIUM_PACK_PATH/,
|
||||
);
|
||||
assert.equal(
|
||||
referencesRuntimeArchive && referencesPackPath && hasAl2023LibPath,
|
||||
true,
|
||||
"Action should reference al2023.tar.br, track CHROMIUM_PACK_PATH, and ensure /tmp/al2023/lib is prepared for Convex launch.",
|
||||
);
|
||||
|
||||
const executableIndex = actionSource.indexOf(
|
||||
"const executablePath = await resolveChromiumExecutablePath(",
|
||||
);
|
||||
const launchIndex = actionSource.indexOf("chromium.launch({");
|
||||
const hasSetupIndex = Math.max(
|
||||
actionSource.indexOf("setupLambdaEnvironment("),
|
||||
actionSource.indexOf("LD_LIBRARY_PATH"),
|
||||
actionSource.indexOf("path.join(tmpdir(), \"al2023\", \"lib\")"),
|
||||
);
|
||||
assert.equal(
|
||||
executableIndex >= 0 &&
|
||||
hasSetupIndex > executableIndex &&
|
||||
hasSetupIndex < launchIndex,
|
||||
true,
|
||||
"Executable resolution and AL2023 shared-library setup should happen before chromium launch in the action runtime path.",
|
||||
);
|
||||
});
|
||||
|
||||
test("processLeadEnrichment wraps Playwright bootstrap in protected try/catch", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/try\s*\{[\s\S]*?const \{ playwrightCore, serverlessChromium \}\s*=\s*await loadPlaywrightModules\(\);[\s\S]*?const executablePath = await resolveChromiumExecutablePath\(\s*serverlessChromium,\s*\);[\s\S]*?browser = await playwrightCore\.chromium\.launch\([\s\S]*?executablePath,[\s\S]*?desktopContext = await browser\.newContext\([\s\S]*?mobileContext = await browser\.newContext\(/,
|
||||
),
|
||||
true,
|
||||
"Playwright runtime bootstrap should use resolveChromiumExecutablePath() inside the action's try/catch-protected block",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/catch\s*\(error\)\s*\{[\s\S]*?finishLeadEnrichmentRun[\s\S]*?runs\.appendEvent[\s\S]*?patchLeadFromWebsiteEnrichment/,
|
||||
),
|
||||
true,
|
||||
"Bootstrap failures should be handled by finish + error event + lead patch in catch",
|
||||
);
|
||||
});
|
||||
|
||||
test("persistence caps candidates and links before writing", () => {
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /MAX_PERSISTED_LINKS\s*=\s*120/),
|
||||
true,
|
||||
"Action should define MAX_PERSISTED_LINKS with value 120.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /MAX_PERSISTED_EMAIL_CANDIDATES\s*=\s*40/),
|
||||
true,
|
||||
"Action should define MAX_PERSISTED_EMAIL_CANDIDATES with value 40.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/deduplicateCrawlLinks\(allLinks\)[\s\S]*?slice\([\s\S]*?MAX_PERSISTED_LINKS/,
|
||||
),
|
||||
true,
|
||||
"Action should dedupe and cap link persistence at MAX_PERSISTED_LINKS.",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/validCandidates\.slice\([\s\S]*?MAX_PERSISTED_EMAIL_CANDIDATES/,
|
||||
),
|
||||
true,
|
||||
"Action should cap candidate persistence at MAX_PERSISTED_EMAIL_CANDIDATES.",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment process stores homepage screenshots in Convex storage as PNG", () => {
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /ctx\.storage\.store\(/),
|
||||
true,
|
||||
"Action should store screenshot blobs via ctx.storage.store",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/new\s+Blob\(\[[\s\S]*?SCREENSHOT_MIME_TYPE/,
|
||||
),
|
||||
true,
|
||||
"Action should wrap screenshots in Blob with image/png MIME type",
|
||||
);
|
||||
});
|
||||
|
||||
test("startLeadEnrichmentRun marks missing website lead with contact status reason", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
websiteEnrichmentSource,
|
||||
/if \(!lead\.websiteUrl\)\s*\{[\s\S]*?status:\s*"failed"[\s\S]*?contactStatusReason:\s*"Website-URL fehlt für das Website-Enrichment\."/,
|
||||
),
|
||||
true,
|
||||
"Missing websiteUrl should set a specific contactStatusReason on the lead",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment persistence inserts all required evidence table rows", () => {
|
||||
const expectedTables = [
|
||||
"websiteCrawlPages",
|
||||
"websiteCrawlLinks",
|
||||
"websiteEmailCandidates",
|
||||
"websiteCrawlScreenshots",
|
||||
"websiteTechnicalChecks",
|
||||
] as const;
|
||||
|
||||
for (const tableName of expectedTables) {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
websiteEnrichmentSource,
|
||||
new RegExp(`ctx\\.db\\.insert\\(["']${tableName}["']`, "s"),
|
||||
),
|
||||
true,
|
||||
`persistLeadEnrichmentResult should insert into ${tableName}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("website enrichment flow uses TASK-7 email selection helper for lead patching", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/getUsableContactEmailFromEntries\([\s\S]*?\)/,
|
||||
),
|
||||
true,
|
||||
"Action should call getUsableContactEmailFromEntries",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/runMutation\(\s*internal\.websiteEnrichment\.patchLeadFromWebsiteEnrichment[\s\S]*?\{[\s\S]*?email:\s*usable\.email/,
|
||||
),
|
||||
true,
|
||||
"Action should patch lead from usable email result",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/currentContactStatus\s*:\s*started\.lead\.contactStatus/,
|
||||
),
|
||||
true,
|
||||
"Action should pass lead contact status to patchLeadFromWebsiteEnrichment",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(websiteEnrichmentSource, /args\.currentContactStatus\s*===\s*\"missing_contact\"/),
|
||||
true,
|
||||
"Lead patch mutation should only set new status for missing_contact",
|
||||
);
|
||||
});
|
||||
|
||||
test("failure handling marks run as failed and writes lead-facing reason", () => {
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/runMutation\(\s*internal\.websiteEnrichment\.finishLeadEnrichmentRun[\s\S]*?status:\s*"failed"/,
|
||||
),
|
||||
true,
|
||||
"Action should persist failed run state on fatal crawl errors",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/runMutation\(\s*api\.runs\.appendEvent[\s\S]*?level:\s*"error"[\s\S]*?message:\s*"Website-Enrichment fehlgeschlagen/,
|
||||
),
|
||||
true,
|
||||
"Action should append a visible error event on failure",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/contactStatusReason:\s*`Website-Enrichment fehlgeschlagen:\s*\$\{errorSummary\}`/,
|
||||
),
|
||||
true,
|
||||
"Action should patch the lead with an actionable failure reason",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(
|
||||
actionSource,
|
||||
/contactStatusReason:\s*"Website-Enrichment fehlgeschlagen: Ungültige Website-URL\."/,
|
||||
),
|
||||
true,
|
||||
"Invalid-url failure should also update lead contact status reason",
|
||||
);
|
||||
});
|
||||
|
||||
test("website enrichment enforces TASK-8 crawler limits and runtime timeboxes", () => {
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /TASK8_CRAWL_TIMEOUT_MS/g),
|
||||
true,
|
||||
"TASK8_CRAWL_TIMEOUT_MS environment override should be used",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /DEFAULT_CRAWL_TIMEOUT_MS\s*=\s*60_000/),
|
||||
true,
|
||||
"Default crawl timeout should be 60s",
|
||||
);
|
||||
assert.equal(
|
||||
hasPattern(actionSource, /DEFAULT_CRAWL_MAX_PAGES\s*=\s*5/),
|
||||
true,
|
||||
"Default max crawl page count should be 5",
|
||||
);
|
||||
});
|
||||
163
tests/website-enrichment-schema.test.ts
Normal file
163
tests/website-enrichment-schema.test.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
|
||||
import type { Doc } from "../convex/_generated/dataModel";
|
||||
import { RUN_EVENT_LEVELS, RUN_STATUSES, RUN_TYPES } from "../convex/domain";
|
||||
|
||||
type ExactSetEquality<A, B> = [
|
||||
Exclude<A, B>,
|
||||
] extends [never]
|
||||
? [Exclude<B, A>] extends [never]
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
type IsRequired<T> = undefined extends T ? false : true;
|
||||
type IsOptional<T> = undefined extends T ? true : false;
|
||||
|
||||
type AgentRunType = Doc<"agentRuns">["type"];
|
||||
type AgentRunStatus = Doc<"agentRuns">["status"];
|
||||
type AgentRunEventLevel = Doc<"agentRunEvents">["level"];
|
||||
type AssertWebsiteEnrichmentRunType = Extract<AgentRunType, "website_enrichment">;
|
||||
type RunTypeFromDomain = (typeof RUN_TYPES)[number];
|
||||
type RunStatusFromDomain = (typeof RUN_STATUSES)[number];
|
||||
type RunEventLevelFromDomain = (typeof RUN_EVENT_LEVELS)[number];
|
||||
|
||||
type AssertLeadCrawlPageKind = Extract<
|
||||
Doc<"websiteCrawlPages">["pageKind"],
|
||||
"homepage"
|
||||
>;
|
||||
type AssertCrawlViewportDesktop = Extract<
|
||||
Doc<"websiteCrawlScreenshots">["viewport"],
|
||||
"desktop"
|
||||
>;
|
||||
type AssertCrawlViewportMobile = Extract<
|
||||
Doc<"websiteCrawlScreenshots">["viewport"],
|
||||
"mobile"
|
||||
>;
|
||||
type AssertNormalizedEmailType = Doc<"websiteEmailCandidates">["normalizedEmail"];
|
||||
type AssertAcceptedEmailFlag = Doc<"websiteEmailCandidates">["accepted"];
|
||||
type AssertTechnicalUsesHttps = Doc<"websiteTechnicalChecks">["usesHttps"];
|
||||
type AssertTechnicalHasVisibleContactPath = Doc<"websiteTechnicalChecks">["hasVisibleContactPath"];
|
||||
|
||||
type AssertRunTypeInDomain = "website_enrichment" extends (
|
||||
typeof RUN_TYPES
|
||||
) [number]
|
||||
? true
|
||||
: false;
|
||||
|
||||
type AssertRunTypeEnumParity = ExactSetEquality<AgentRunType, RunTypeFromDomain>;
|
||||
type AssertRunStatusEnumParity = ExactSetEquality<
|
||||
AgentRunStatus,
|
||||
RunStatusFromDomain
|
||||
>;
|
||||
type AssertRunEventLevelEnumParity = ExactSetEquality<
|
||||
AgentRunEventLevel,
|
||||
RunEventLevelFromDomain
|
||||
>;
|
||||
|
||||
const schemaSource = readFileSync(
|
||||
join(process.cwd(), "convex", "schema.ts"),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const _assertRunTypeSchemaHasWebsiteEnrichment: AssertWebsiteEnrichmentRunType =
|
||||
"website_enrichment";
|
||||
const _assertRunTypeInDomainHasWebsiteEnrichment: AssertRunTypeInDomain = true;
|
||||
const _assertRunTypeEnumParity: AssertRunTypeEnumParity = true;
|
||||
const _assertRunStatusEnumParity: AssertRunStatusEnumParity = true;
|
||||
const _assertRunEventLevelEnumParity: AssertRunEventLevelEnumParity = true;
|
||||
const _assertPageKindSchemaIncludesHomepage: AssertLeadCrawlPageKind =
|
||||
"homepage";
|
||||
const _assertScreenshotViewportTypeDesktop: AssertCrawlViewportDesktop = "desktop";
|
||||
const _assertScreenshotViewportTypeMobile: AssertCrawlViewportMobile = "mobile";
|
||||
const _assertRunIdOptionalOnPages: IsOptional<Doc<"websiteCrawlPages">["runId"]> =
|
||||
true;
|
||||
const _assertRunIdOptionalOnLinks: IsOptional<Doc<"websiteCrawlLinks">["runId"]> =
|
||||
true;
|
||||
const _assertRunIdOptionalOnEmailCandidates: IsOptional<
|
||||
Doc<"websiteEmailCandidates">["runId"]
|
||||
> = true;
|
||||
const _assertRunIdOptionalOnScreenshots: IsOptional<
|
||||
Doc<"websiteCrawlScreenshots">["runId"]
|
||||
> = true;
|
||||
const _assertRunIdOptionalOnTechnicalChecks: IsOptional<
|
||||
Doc<"websiteTechnicalChecks">["runId"]
|
||||
> = true;
|
||||
const _assertPagesHasCreatedAt: IsRequired<Doc<"websiteCrawlPages">["createdAt"]> =
|
||||
true;
|
||||
const _assertLinksHasCreatedAt: IsRequired<Doc<"websiteCrawlLinks">["createdAt"]> =
|
||||
true;
|
||||
const _assertEmailCandidatesHasCreatedAt: IsRequired<
|
||||
Doc<"websiteEmailCandidates">["createdAt"]
|
||||
> = true;
|
||||
const _assertScreenshotsHasCreatedAt: IsRequired<
|
||||
Doc<"websiteCrawlScreenshots">["createdAt"]
|
||||
> = true;
|
||||
const _assertTechnicalChecksHasCreatedAt: IsRequired<
|
||||
Doc<"websiteTechnicalChecks">["createdAt"]
|
||||
> = true;
|
||||
const _assertWebsiteEmailCandidatesNormalizedEmail: AssertNormalizedEmailType = "user@example.com";
|
||||
const _assertEmailAcceptedTrue: AssertAcceptedEmailFlag = true;
|
||||
const _assertEmailAcceptedFalse: AssertAcceptedEmailFlag = false;
|
||||
const _assertScreenshotStorageIdRequired: IsRequired<
|
||||
Doc<"websiteCrawlScreenshots">["storageId"]
|
||||
> = true;
|
||||
const _assertTechnicalUsesHttpsTrue: AssertTechnicalUsesHttps = true;
|
||||
const _assertTechnicalUsesHttpsFalse: AssertTechnicalUsesHttps = false;
|
||||
const _assertTechnicalMissingTitleFalse: Doc<"websiteTechnicalChecks">["missingTitle"] =
|
||||
false;
|
||||
const _assertTechnicalMissingMetaDescriptionTrue: Doc<"websiteTechnicalChecks">["missingMetaDescription"] =
|
||||
true;
|
||||
const _assertTechnicalHasVisibleContactPathTrue: AssertTechnicalHasVisibleContactPath =
|
||||
true;
|
||||
const _assertTechnicalHasVisibleContactPathFalse: AssertTechnicalHasVisibleContactPath =
|
||||
false;
|
||||
|
||||
// Convex index structure can't be asserted from Doc types safely; this test validates
|
||||
// field contracts and value domains that are practical to verify at compile/runtime.
|
||||
test("website enrichment schema contracts are present", () => {
|
||||
assert.equal(_assertRunTypeSchemaHasWebsiteEnrichment, "website_enrichment");
|
||||
assert.equal(_assertRunTypeInDomainHasWebsiteEnrichment, true);
|
||||
assert.equal(_assertRunTypeEnumParity, true);
|
||||
assert.equal(_assertRunStatusEnumParity, true);
|
||||
assert.equal(_assertRunEventLevelEnumParity, true);
|
||||
assert.equal(_assertPageKindSchemaIncludesHomepage, "homepage");
|
||||
assert.equal(_assertScreenshotViewportTypeDesktop, "desktop");
|
||||
assert.equal(_assertScreenshotViewportTypeMobile, "mobile");
|
||||
assert.equal(_assertRunIdOptionalOnPages, true);
|
||||
assert.equal(_assertRunIdOptionalOnLinks, true);
|
||||
assert.equal(_assertRunIdOptionalOnEmailCandidates, true);
|
||||
assert.equal(_assertRunIdOptionalOnScreenshots, true);
|
||||
assert.equal(_assertRunIdOptionalOnTechnicalChecks, true);
|
||||
assert.equal(_assertPagesHasCreatedAt, true);
|
||||
assert.equal(_assertLinksHasCreatedAt, true);
|
||||
assert.equal(_assertEmailCandidatesHasCreatedAt, true);
|
||||
assert.equal(_assertScreenshotsHasCreatedAt, true);
|
||||
assert.equal(_assertTechnicalChecksHasCreatedAt, true);
|
||||
assert.equal(_assertScreenshotStorageIdRequired, true);
|
||||
assert.equal(_assertWebsiteEmailCandidatesNormalizedEmail, "user@example.com");
|
||||
assert.equal(_assertEmailAcceptedTrue, true);
|
||||
assert.equal(_assertEmailAcceptedFalse, false);
|
||||
assert.equal(_assertTechnicalUsesHttpsTrue, true);
|
||||
assert.equal(_assertTechnicalUsesHttpsFalse, false);
|
||||
assert.equal(_assertTechnicalMissingTitleFalse, false);
|
||||
assert.equal(_assertTechnicalMissingMetaDescriptionTrue, true);
|
||||
assert.equal(_assertTechnicalHasVisibleContactPathTrue, true);
|
||||
assert.equal(_assertTechnicalHasVisibleContactPathFalse, false);
|
||||
});
|
||||
|
||||
test("agentRuns schema defines lead-aware active-run index", () => {
|
||||
assert.equal(
|
||||
schemaSource.includes('["type", "status", "leadId"]'),
|
||||
true,
|
||||
"Schema should include by_type_and_status_and_leadId index fields in order.",
|
||||
);
|
||||
assert.equal(
|
||||
schemaSource.includes('by_type_and_status_and_leadId'),
|
||||
true,
|
||||
"Schema should define the by_type_and_status_and_leadId index.",
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user