Fix audit generation and enrichment fallback
This commit is contained in:
@@ -17,7 +17,7 @@ import {
|
||||
getUsableContactEmailFromEntries,
|
||||
normalizeEmailAddress,
|
||||
} from "../lib/lead-discovery-google";
|
||||
import { api, internal } from "./_generated/api";
|
||||
import { internal } from "./_generated/api";
|
||||
import type { Doc, Id } from "./_generated/dataModel";
|
||||
import { internalAction, type ActionCtx } from "./_generated/server";
|
||||
|
||||
@@ -30,6 +30,17 @@ const ACTION_TIMEOUT_BUFFER_MS = 5_000;
|
||||
const MAX_PERSISTED_LINKS = 120;
|
||||
const MAX_PERSISTED_EMAIL_CANDIDATES = 40;
|
||||
const SCREENSHOT_MIME_TYPE = "image/png";
|
||||
const MAX_BROWSERLESS_PAGE_BYTES = 750_000;
|
||||
const MAX_BROWSERLESS_LINK_TEXT_CHARS = 180;
|
||||
const BROWSERLESS_CRAWL_PATHS = [
|
||||
"/",
|
||||
"/kontakt",
|
||||
"/impressum",
|
||||
"/leistungen",
|
||||
"/ueber-uns",
|
||||
];
|
||||
const BROWSERLESS_USER_AGENT =
|
||||
"Mozilla/5.0 (compatible; WebDevPipelineBot/1.0; +https://webdev-pipeline.local)";
|
||||
const CHROMIUM_SOURCE_MARKER_FILE = path.join(tmpdir(), "chromium-source.sha256");
|
||||
const CHROMIUM_EXECUTABLE_PATH = path.join(tmpdir(), "chromium");
|
||||
const CHROMIUM_PACK_PATH = path.join(tmpdir(), "chromium-pack");
|
||||
@@ -116,11 +127,41 @@ type ServerlessChromiumModule = {
|
||||
inflate: (filePath: string) => Promise<string>;
|
||||
setupLambdaEnvironment: (baseLibPath: string) => void;
|
||||
};
|
||||
type PlaywrightClosableResource = {
|
||||
close: () => Promise<unknown>;
|
||||
};
|
||||
|
||||
function messageFromError(error: unknown) {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function isPlaywrightTargetClosedError(error: unknown) {
|
||||
const message = messageFromError(error);
|
||||
return /Target page, context or browser has been closed|Target closed|Browser has been closed|Context has been closed|Page has been closed/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
async function closePlaywrightResourceSafely(
|
||||
resource: PlaywrightClosableResource | null,
|
||||
label: string,
|
||||
) {
|
||||
if (!resource) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await resource.close();
|
||||
} catch (error) {
|
||||
if (isPlaywrightTargetClosedError(error)) {
|
||||
return;
|
||||
}
|
||||
console.warn(`Playwright cleanup ignored failed close for ${label}.`, {
|
||||
error: messageFromError(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function readPositiveIntEnv(key: string, fallback: number) {
|
||||
const raw = process.env[key]?.trim();
|
||||
if (!raw) {
|
||||
@@ -230,6 +271,280 @@ function isGenericBusinessEmail(email: string) {
|
||||
return GENERIC_EMAIL_LOCALS.has(base);
|
||||
}
|
||||
|
||||
function decodeHtmlCodePoint(rawCode: string, radix: number) {
|
||||
const codePoint = Number.parseInt(rawCode, radix);
|
||||
if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
return String.fromCodePoint(codePoint);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function decodeHtmlText(input: string) {
|
||||
return input
|
||||
.replace(/&#(\d+);/g, (_, code: string) =>
|
||||
decodeHtmlCodePoint(code, 10),
|
||||
)
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_, code: string) =>
|
||||
decodeHtmlCodePoint(code, 16),
|
||||
)
|
||||
.replace(/ | | /gi, " ")
|
||||
.replace(/&/gi, "&")
|
||||
.replace(/</gi, "<")
|
||||
.replace(/>/gi, ">")
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'|'/gi, "'")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function stripHtmlForLabel(input: string) {
|
||||
return decodeHtmlText(
|
||||
input
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
||||
.replace(/<[^>]*>/g, " "),
|
||||
);
|
||||
}
|
||||
|
||||
function getHtmlAttribute(tag: string, attribute: string) {
|
||||
const match = new RegExp(
|
||||
`\\b${attribute}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`,
|
||||
"i",
|
||||
).exec(tag);
|
||||
const value = match?.[1] ?? match?.[2] ?? match?.[3];
|
||||
return value ? decodeHtmlText(value) : "";
|
||||
}
|
||||
|
||||
function extractFirstTagText(html: string, tagName: string) {
|
||||
const match = new RegExp(`<${tagName}\\b[^>]*>([\\s\\S]*?)<\\/${tagName}>`, "i").exec(
|
||||
html,
|
||||
);
|
||||
return match?.[1] ? stripHtmlForLabel(match[1]) : "";
|
||||
}
|
||||
|
||||
function extractMetaDescriptionFromHtml(html: string) {
|
||||
const metaTags = html.matchAll(/<meta\b[^>]*>/gi);
|
||||
for (const match of metaTags) {
|
||||
const tag = match[0] ?? "";
|
||||
const name = getHtmlAttribute(tag, "name") || getHtmlAttribute(tag, "property");
|
||||
if (!/^(description|og:description|twitter:description)$/i.test(name)) {
|
||||
continue;
|
||||
}
|
||||
const content = getHtmlAttribute(tag, "content");
|
||||
if (content) {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function extractHeadingsFromHtml(html: string) {
|
||||
return Array.from(html.matchAll(/<h[1-3]\b[^>]*>([\s\S]*?)<\/h[1-3]>/gi))
|
||||
.map((match) => stripHtmlForLabel(match[1] ?? ""))
|
||||
.filter((heading) => heading.length > 0)
|
||||
.slice(0, 12);
|
||||
}
|
||||
|
||||
function extractAnchorLinksFromHtml(
|
||||
html: string,
|
||||
finalUrl: string,
|
||||
rootUrl: string,
|
||||
) {
|
||||
return Array.from(html.matchAll(/<a\b([^>]*)>([\s\S]*?)<\/a>/gi))
|
||||
.map((match) => {
|
||||
const href = getHtmlAttribute(match[1] ?? "", "href");
|
||||
const normalizedHref = normalizeCrawlUrl(href, finalUrl);
|
||||
if (!normalizedHref) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
href: normalizedHref,
|
||||
text: stripHtmlForLabel(match[2] ?? "").slice(
|
||||
0,
|
||||
MAX_BROWSERLESS_LINK_TEXT_CHARS,
|
||||
),
|
||||
isInternal: isSameRegistrableHostishDomain(normalizedHref, rootUrl),
|
||||
};
|
||||
})
|
||||
.filter(
|
||||
(entry): entry is { href: string; text: string; isInternal: boolean } =>
|
||||
entry !== null,
|
||||
);
|
||||
}
|
||||
|
||||
function makeBrowserlessCrawlTargets(
|
||||
rootUrl: string,
|
||||
homepageLinks: string[],
|
||||
maxPages: number,
|
||||
) {
|
||||
const normalizedRoot = normalizeCrawlUrl(rootUrl);
|
||||
if (!normalizedRoot) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const discoveredUrls = discoverRelevantSubpageUrls(homepageLinks, normalizedRoot);
|
||||
const fallbackUrls = BROWSERLESS_CRAWL_PATHS.map((pathname) =>
|
||||
normalizeCrawlUrl(pathname, normalizedRoot),
|
||||
).filter((url): url is string => url !== null);
|
||||
const seen = new Set<string>();
|
||||
const targets: string[] = [];
|
||||
|
||||
for (const candidate of [normalizedRoot, ...discoveredUrls, ...fallbackUrls]) {
|
||||
const normalized = normalizeCrawlUrl(candidate, normalizedRoot);
|
||||
if (!normalized || seen.has(normalized)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(normalized);
|
||||
targets.push(normalized);
|
||||
if (targets.length >= maxPages) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return targets;
|
||||
}
|
||||
|
||||
async function readLimitedBrowserlessResponseText(
|
||||
response: Response,
|
||||
signal?: AbortSignal,
|
||||
) {
|
||||
if (!response.body) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const chunks: Uint8Array[] = [];
|
||||
let totalBytes = 0;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Website-Enrichment Fetch wurde abgebrochen.");
|
||||
}
|
||||
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const nextChunk = value.slice(
|
||||
0,
|
||||
Math.max(0, MAX_BROWSERLESS_PAGE_BYTES - totalBytes),
|
||||
);
|
||||
if (nextChunk.length > 0) {
|
||||
chunks.push(nextChunk);
|
||||
totalBytes += nextChunk.length;
|
||||
}
|
||||
|
||||
if (totalBytes >= MAX_BROWSERLESS_PAGE_BYTES) {
|
||||
await reader.cancel().catch(() => undefined);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
|
||||
const output = new Uint8Array(totalBytes);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
output.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
return new TextDecoder().decode(output);
|
||||
}
|
||||
|
||||
async function fetchBrowserlessPage(targetUrl: string, timeoutMs: number) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), Math.max(1, timeoutMs));
|
||||
|
||||
try {
|
||||
const response = await fetch(targetUrl, {
|
||||
headers: { "User-Agent": BROWSERLESS_USER_AGENT },
|
||||
redirect: "follow",
|
||||
signal: controller.signal,
|
||||
});
|
||||
const contentType = response.headers.get("content-type") ?? "";
|
||||
if (
|
||||
response.status >= 400 ||
|
||||
(contentType && !/text|html|xml|xhtml/i.test(contentType))
|
||||
) {
|
||||
await response.body?.cancel().catch(() => undefined);
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
finalUrl: normalizeCrawlUrl(response.url || targetUrl, targetUrl) ?? targetUrl,
|
||||
html: await readLimitedBrowserlessResponseText(
|
||||
response,
|
||||
controller.signal,
|
||||
),
|
||||
status: response.status,
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function crawlPageWithoutBrowser(
|
||||
targetUrl: string,
|
||||
rootUrl: string,
|
||||
timeoutMs: number,
|
||||
) {
|
||||
const fetched = await fetchBrowserlessPage(targetUrl, timeoutMs);
|
||||
if (!fetched || !fetched.html.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const finalUrl = fetched.finalUrl;
|
||||
const signals = extractContactSignalsFromHtmlLikeText(fetched.html);
|
||||
const links = extractAnchorLinksFromHtml(fetched.html, finalUrl, rootUrl);
|
||||
const emailCandidates = signals.emailCandidates
|
||||
.map((entry) => {
|
||||
const normalizedEmail = normalizeEmailAddress(entry.email);
|
||||
if (!normalizedEmail) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
email: normalizedEmail,
|
||||
emailSource: finalUrl,
|
||||
contactPerson: entry.contactPerson ?? null,
|
||||
isBusinessContactAddress: entry.isBusinessContactAddress,
|
||||
isGeneric: isGenericBusinessEmail(normalizedEmail),
|
||||
sourceUrl: finalUrl,
|
||||
accepted: false,
|
||||
normalizedEmail,
|
||||
};
|
||||
})
|
||||
.filter((entry): entry is NonNullable<typeof entry> => entry !== null);
|
||||
|
||||
return {
|
||||
sourceUrl: targetUrl,
|
||||
finalUrl,
|
||||
pageKind: makePageKind(finalUrl, rootUrl),
|
||||
title: extractFirstTagText(fetched.html, "title"),
|
||||
metaDescription: extractMetaDescriptionFromHtml(fetched.html),
|
||||
headings: extractHeadingsFromHtml(fetched.html),
|
||||
visibleText: signals.visibleText,
|
||||
links,
|
||||
emailCandidates,
|
||||
hasContactFormSignal: signals.hasContactFormSignal,
|
||||
hasContactCtaSignal: signals.hasContactCtaSignal,
|
||||
} satisfies PageResult;
|
||||
}
|
||||
|
||||
async function loadPlaywrightModules() {
|
||||
const [playwrightCore, chromiumPackage] = await Promise.all([
|
||||
import("playwright-core"),
|
||||
@@ -327,7 +642,7 @@ async function captureHomepageScreenshot(
|
||||
mimeType: SCREENSHOT_MIME_TYPE,
|
||||
} satisfies StoredScreenshot;
|
||||
} finally {
|
||||
await page.close();
|
||||
await closePlaywrightResourceSafely(page, "homepage screenshot page");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -428,7 +743,7 @@ async function crawlPage(
|
||||
hasContactCtaSignal: signals.hasContactCtaSignal,
|
||||
} satisfies PageResult;
|
||||
} finally {
|
||||
await page.close();
|
||||
await closePlaywrightResourceSafely(page, "crawl page");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -458,9 +773,226 @@ function deduplicateCrawlLinks(links: PersistedCrawlLink[]) {
|
||||
return [...unique.values()];
|
||||
}
|
||||
|
||||
async function processLeadEnrichmentWithoutBrowser(
|
||||
ctx: ActionCtx,
|
||||
args: {
|
||||
runId: Id<"agentRuns">;
|
||||
lead: WebsiteLead;
|
||||
rootUrl: string;
|
||||
timeoutMs: number;
|
||||
maxPages: number;
|
||||
actionStartedAt: number;
|
||||
actionBudget: number;
|
||||
},
|
||||
): Promise<Id<"agentRuns">> {
|
||||
const {
|
||||
runId,
|
||||
lead,
|
||||
rootUrl,
|
||||
timeoutMs,
|
||||
maxPages,
|
||||
actionStartedAt,
|
||||
actionBudget,
|
||||
} = args;
|
||||
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "warning",
|
||||
message:
|
||||
"Chromium ist nicht konfiguriert; Website-Enrichment nutzt browserlosen Fetch-Fallback.",
|
||||
details: [{ label: "Lead", value: lead._id }],
|
||||
});
|
||||
|
||||
const homepage = await withActionTimeout(
|
||||
crawlPageWithoutBrowser(
|
||||
rootUrl,
|
||||
rootUrl,
|
||||
Math.min(timeoutMs, remainingActionBudgetMs(actionStartedAt, actionBudget)),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Homepage browserlos crawlen",
|
||||
);
|
||||
if (!homepage) {
|
||||
throw new Error("Homepage konnte im browserlosen Fallback nicht geladen werden.");
|
||||
}
|
||||
|
||||
const crawlTargets = makeBrowserlessCrawlTargets(
|
||||
rootUrl,
|
||||
homepage.links.map((link) => link.href),
|
||||
maxPages,
|
||||
);
|
||||
const crawledPages: PageResult[] = [homepage];
|
||||
const crawledUrls = new Set<string>();
|
||||
const normalizedHomepageUrl = normalizeCrawlUrl(homepage.finalUrl, rootUrl);
|
||||
if (normalizedHomepageUrl) {
|
||||
crawledUrls.add(normalizedHomepageUrl);
|
||||
}
|
||||
|
||||
for (const pageUrl of crawlTargets.slice(1)) {
|
||||
const normalizedTarget = normalizeCrawlUrl(pageUrl, rootUrl);
|
||||
if (!normalizedTarget || crawledUrls.has(normalizedTarget)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const crawled = await withActionTimeout(
|
||||
crawlPageWithoutBrowser(
|
||||
normalizedTarget,
|
||||
rootUrl,
|
||||
Math.min(
|
||||
timeoutMs,
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
`Unterseite browserlos crawlen: ${normalizedTarget}`,
|
||||
);
|
||||
if (crawled) {
|
||||
crawledPages.push(crawled);
|
||||
const normalizedCrawledUrl = normalizeCrawlUrl(crawled.finalUrl, rootUrl);
|
||||
if (normalizedCrawledUrl) {
|
||||
crawledUrls.add(normalizedCrawledUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const allLinks: PersistedCrawlLink[] = crawledPages.flatMap((page) =>
|
||||
page.links.map((link) => ({
|
||||
...link,
|
||||
pageUrl: page.finalUrl,
|
||||
})),
|
||||
);
|
||||
const technicalInput = buildTechnicalChecks({
|
||||
rootUrl,
|
||||
finalUrl: homepage.finalUrl,
|
||||
title: homepage.title,
|
||||
metaDescription: homepage.metaDescription,
|
||||
visibleText: homepage.visibleText,
|
||||
checkedUrls: crawledPages.map((page) => page.finalUrl),
|
||||
links: allLinks.map((link) => link.href),
|
||||
});
|
||||
const validCandidates = deduplicateLeadEmailCandidates(
|
||||
crawledPages.flatMap((page) => page.emailCandidates),
|
||||
);
|
||||
const persistedLinks = deduplicateCrawlLinks(allLinks).slice(
|
||||
0,
|
||||
MAX_PERSISTED_LINKS,
|
||||
);
|
||||
const persistedCandidates = validCandidates.slice(
|
||||
0,
|
||||
MAX_PERSISTED_EMAIL_CANDIDATES,
|
||||
);
|
||||
const usable = getUsableContactEmailFromEntries(
|
||||
validCandidates.map((candidate) => ({
|
||||
email: candidate.email,
|
||||
emailSource: candidate.emailSource,
|
||||
contactPerson: candidate.contactPerson,
|
||||
isBusinessContactAddress: candidate.isBusinessContactAddress,
|
||||
})),
|
||||
);
|
||||
|
||||
await ctx.runMutation(internal.websiteEnrichment.persistLeadEnrichmentResult, {
|
||||
runId,
|
||||
leadId: lead._id,
|
||||
pages: crawledPages.map((page) => ({
|
||||
sourceUrl: page.sourceUrl,
|
||||
finalUrl: page.finalUrl,
|
||||
pageKind: page.pageKind,
|
||||
title: page.title,
|
||||
metaDescription: page.metaDescription,
|
||||
headings: page.headings,
|
||||
visibleTextExcerpt: trimExcerpt(page.visibleText),
|
||||
hasContactFormSignal: page.hasContactFormSignal,
|
||||
hasContactCtaSignal: page.hasContactCtaSignal,
|
||||
})),
|
||||
links: persistedLinks.map((link) => ({
|
||||
pageUrl: link.pageUrl,
|
||||
href: link.href,
|
||||
text: link.text,
|
||||
isInternal: link.isInternal,
|
||||
})),
|
||||
emailCandidates: persistedCandidates.map((candidate) => ({
|
||||
email: candidate.email,
|
||||
normalizedEmail: candidate.normalizedEmail,
|
||||
emailSource: candidate.emailSource,
|
||||
sourceUrl: candidate.sourceUrl,
|
||||
contactPerson: candidate.contactPerson ?? undefined,
|
||||
isBusinessContactAddress: candidate.isBusinessContactAddress,
|
||||
isGeneric: candidate.isGeneric,
|
||||
accepted: usable !== null && candidate.normalizedEmail === usable.email,
|
||||
})),
|
||||
screenshots: [],
|
||||
technicalChecks: [
|
||||
{
|
||||
sourceUrl: homepage.sourceUrl,
|
||||
finalUrl: homepage.finalUrl,
|
||||
usesHttps: technicalInput.https,
|
||||
missingTitle: technicalInput.missingTitle,
|
||||
missingMetaDescription: technicalInput.missingMetaDescription,
|
||||
hasVisibleContactPath: technicalInput.hasVisibleContactPath,
|
||||
brokenInternalLinkCount: technicalInput.brokenInternalLinks.length,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
if (usable) {
|
||||
await ctx.runMutation(internal.websiteEnrichment.patchLeadFromWebsiteEnrichment, {
|
||||
leadId: lead._id,
|
||||
email: usable.email,
|
||||
emailSource: usable.emailSource ?? undefined,
|
||||
contactPerson: usable.contactPerson ?? undefined,
|
||||
currentContactStatus: lead.contactStatus,
|
||||
});
|
||||
} else {
|
||||
await ctx.runMutation(internal.websiteEnrichment.patchLeadFromWebsiteEnrichment, {
|
||||
leadId: lead._id,
|
||||
currentContactStatus: lead.contactStatus,
|
||||
contactStatusReason:
|
||||
"Browserloses Website-Enrichment abgeschlossen, aber kein verwertbarer Kontakt gefunden.",
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
await ctx.runMutation(internal.pageSpeed.queueLeadPageSpeedAudit, {
|
||||
leadId: lead._id,
|
||||
parentRunId: runId,
|
||||
});
|
||||
} catch (pageSpeedQueueError) {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "warning",
|
||||
message: "PageSpeed-Analyse konnte nicht in die Warteschlange gesetzt werden.",
|
||||
details: [
|
||||
{ label: "Lead", value: lead._id },
|
||||
{
|
||||
label: "Fehler",
|
||||
value: messageFromError(pageSpeedQueueError),
|
||||
source: "pagespeed_queue",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
await ctx.runMutation(internal.websiteEnrichment.finishLeadEnrichmentRun, {
|
||||
runId,
|
||||
status: "succeeded",
|
||||
currentStep: "website_enrichment",
|
||||
errors: 0,
|
||||
});
|
||||
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "info",
|
||||
message: usable
|
||||
? "Website-Enrichment browserlos mit nutzbarer E-Mail abgeschlossen."
|
||||
: "Website-Enrichment browserlos abgeschlossen, aber ohne nutzbare E-Mail.",
|
||||
});
|
||||
|
||||
return runId;
|
||||
}
|
||||
|
||||
export const processLeadEnrichment = internalAction({
|
||||
args: { runId: v.id("agentRuns") },
|
||||
handler: async (ctx, args) => {
|
||||
handler: async (ctx, args): Promise<Id<"agentRuns"> | null> => {
|
||||
let started: StartedLead | null = null;
|
||||
const runId = args.runId;
|
||||
const actionStartedAt = Date.now();
|
||||
@@ -486,7 +1018,7 @@ export const processLeadEnrichment = internalAction({
|
||||
parentRunId: runId,
|
||||
});
|
||||
} catch (pageSpeedQueueError) {
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "warning",
|
||||
message: "PageSpeed-Analyse konnte nicht in die Warteschlange gesetzt werden.",
|
||||
@@ -508,7 +1040,7 @@ export const processLeadEnrichment = internalAction({
|
||||
errorSummary: "Ungültige Website-URL.",
|
||||
errors: 1,
|
||||
});
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "error",
|
||||
message: "Website-Enrichment fehlgeschlagen: Ungültige Website-URL.",
|
||||
@@ -526,6 +1058,18 @@ export const processLeadEnrichment = internalAction({
|
||||
const timeoutMs = crawlTimeoutMs();
|
||||
const maxPages = crawlMaxPages();
|
||||
|
||||
if (!getChromiumExecutableSource()) {
|
||||
return await processLeadEnrichmentWithoutBrowser(ctx, {
|
||||
runId,
|
||||
lead: started.lead,
|
||||
rootUrl,
|
||||
timeoutMs,
|
||||
maxPages,
|
||||
actionStartedAt,
|
||||
actionBudget,
|
||||
});
|
||||
}
|
||||
|
||||
const { playwrightCore, serverlessChromium } =
|
||||
await withActionTimeout(
|
||||
loadPlaywrightModules(),
|
||||
@@ -803,7 +1347,7 @@ export const processLeadEnrichment = internalAction({
|
||||
parentRunId: runId,
|
||||
});
|
||||
} catch (pageSpeedQueueError) {
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "warning",
|
||||
message: "PageSpeed-Analyse konnte nicht in die Warteschlange gesetzt werden.",
|
||||
@@ -825,7 +1369,7 @@ export const processLeadEnrichment = internalAction({
|
||||
errors: 0,
|
||||
});
|
||||
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "info",
|
||||
message: usable
|
||||
@@ -846,7 +1390,7 @@ export const processLeadEnrichment = internalAction({
|
||||
errors: 1,
|
||||
});
|
||||
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "error",
|
||||
message: "Website-Enrichment fehlgeschlagen.",
|
||||
@@ -862,7 +1406,7 @@ export const processLeadEnrichment = internalAction({
|
||||
parentRunId: runId,
|
||||
});
|
||||
} catch (pageSpeedQueueError) {
|
||||
await ctx.runMutation(api.runs.appendEvent, {
|
||||
await ctx.runMutation(internal.runs.appendEventInternal, {
|
||||
runId,
|
||||
level: "warning",
|
||||
message: "PageSpeed-Analyse konnte nicht in die Warteschlange gesetzt werden.",
|
||||
@@ -886,13 +1430,19 @@ export const processLeadEnrichment = internalAction({
|
||||
return null;
|
||||
} finally {
|
||||
if (desktopContext) {
|
||||
await desktopContext.close();
|
||||
await closePlaywrightResourceSafely(
|
||||
desktopContext,
|
||||
"desktop browser context",
|
||||
);
|
||||
}
|
||||
if (mobileContext) {
|
||||
await mobileContext.close();
|
||||
await closePlaywrightResourceSafely(
|
||||
mobileContext,
|
||||
"mobile browser context",
|
||||
);
|
||||
}
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
await closePlaywrightResourceSafely(browser, "browser");
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user