feat: add OpenRouter audit generation pipeline

This commit is contained in:
2026-06-05 11:06:01 +02:00
parent 370aeec2a0
commit 03cb65fde4
29 changed files with 5462 additions and 74 deletions

View File

@@ -23,6 +23,10 @@ import { internalAction, type ActionCtx } from "./_generated/server";
const DEFAULT_CRAWL_TIMEOUT_MS = 60_000;
const DEFAULT_CRAWL_MAX_PAGES = 5;
const DEFAULT_ACTION_BUDGET_MS = 120_000;
const MIN_ACTION_BUDGET_MS = 30_000;
const MAX_ACTION_BUDGET_MS = 140_000;
const ACTION_TIMEOUT_BUFFER_MS = 5_000;
const MAX_PERSISTED_LINKS = 120;
const MAX_PERSISTED_EMAIL_CANDIDATES = 40;
const SCREENSHOT_MIME_TYPE = "image/png";
@@ -140,6 +144,47 @@ function crawlMaxPages() {
);
}
function actionBudgetMs() {
return Math.max(
MIN_ACTION_BUDGET_MS,
Math.min(
MAX_ACTION_BUDGET_MS,
readPositiveIntEnv("TASK8_ACTION_BUDGET_MS", DEFAULT_ACTION_BUDGET_MS),
),
);
}
function remainingActionBudgetMs(startedAt: number, budgetMs: number) {
const elapsed = Date.now() - startedAt;
return Math.max(1_000, budgetMs - elapsed - ACTION_TIMEOUT_BUFFER_MS);
}
async function withActionTimeout<T>(
promise: Promise<T>,
timeoutMs: number,
label: string,
): Promise<T> {
let timeout: ReturnType<typeof setTimeout> | null = null;
try {
return await Promise.race([
promise,
new Promise<T>((_, reject) => {
timeout = setTimeout(() => {
reject(
new Error(
`Website-Enrichment Zeitbudget ueberschritten: ${label}.`,
),
);
}, Math.max(1, timeoutMs));
}),
]);
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
function makePageKind(url: string, rootUrl: string): EnrichmentPageKind {
const normalizedRoot = normalizeCrawlUrl(rootUrl);
if (!normalizedRoot) {
@@ -418,6 +463,8 @@ export const processLeadEnrichment = internalAction({
handler: async (ctx, args) => {
let started: StartedLead | null = null;
const runId = args.runId;
const actionStartedAt = Date.now();
const actionBudget = actionBudgetMs();
let browser: Browser | null = null;
let desktopContext: BrowserContext | null = null;
let mobileContext: BrowserContext | null = null;
@@ -480,9 +527,15 @@ export const processLeadEnrichment = internalAction({
const maxPages = crawlMaxPages();
const { playwrightCore, serverlessChromium } =
await loadPlaywrightModules();
const executablePath = await resolveChromiumExecutablePath(
serverlessChromium,
await withActionTimeout(
loadPlaywrightModules(),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Playwright-Module laden",
);
const executablePath = await withActionTimeout(
resolveChromiumExecutablePath(serverlessChromium),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Chromium executable vorbereiten",
);
const prepareChromiumSharedLibraries = async (
@@ -502,21 +555,50 @@ export const processLeadEnrichment = internalAction({
chromiumRuntime.setupLambdaEnvironment(path.join(tmpdir(), "al2023", "lib"));
};
await prepareChromiumSharedLibraries(serverlessChromium);
browser = await playwrightCore.chromium.launch({
headless: true,
executablePath,
args: serverlessChromium.args,
});
await withActionTimeout(
prepareChromiumSharedLibraries(serverlessChromium),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Chromium-Bibliotheken vorbereiten",
);
browser = await withActionTimeout(
playwrightCore.chromium.launch({
headless: true,
executablePath,
args: serverlessChromium.args,
timeout: remainingActionBudgetMs(actionStartedAt, actionBudget),
}),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Chromium starten",
);
const { devices } = playwrightCore;
desktopContext = await browser.newContext({
...devices["Desktop Chrome"],
});
mobileContext = await browser.newContext({
...devices["iPhone 11"],
});
desktopContext = await withActionTimeout(
browser.newContext({
...devices["Desktop Chrome"],
}),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Desktop-Kontext erstellen",
);
mobileContext = await withActionTimeout(
browser.newContext({
...devices["iPhone 11"],
}),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Mobile-Kontext erstellen",
);
const homepage = await crawlPage(desktopContext, rootUrl, rootUrl, timeoutMs);
const homepage = await withActionTimeout(
crawlPage(
desktopContext,
rootUrl,
rootUrl,
Math.min(
timeoutMs,
remainingActionBudgetMs(actionStartedAt, actionBudget),
),
),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Homepage crawlen",
);
if (!homepage) {
throw new Error("Homepage konnte nicht geladen werden.");
}
@@ -529,7 +611,19 @@ export const processLeadEnrichment = internalAction({
const crawledPages: PageResult[] = [homepage];
for (const pageUrl of crawlTargets.slice(1)) {
const crawled = await crawlPage(desktopContext, pageUrl, rootUrl, timeoutMs);
const crawled = await withActionTimeout(
crawlPage(
desktopContext,
pageUrl,
rootUrl,
Math.min(
timeoutMs,
remainingActionBudgetMs(actionStartedAt, actionBudget),
),
),
remainingActionBudgetMs(actionStartedAt, actionBudget),
`Unterseite crawlen: ${pageUrl}`,
);
if (crawled) {
crawledPages.push(crawled);
}
@@ -552,7 +646,10 @@ export const processLeadEnrichment = internalAction({
for (const href of uniqueInternalLinks.slice(0, 30)) {
try {
const response = await desktopContext.request.get(href, {
timeout: Math.max(1_000, timeoutMs - 1_000),
timeout: Math.min(
Math.max(1_000, timeoutMs - 1_000),
remainingActionBudgetMs(actionStartedAt, actionBudget),
),
});
const status = response.status();
checkMap.set(href, {
@@ -567,19 +664,33 @@ export const processLeadEnrichment = internalAction({
}
}
const desktopScreenshot = await captureHomepageScreenshot(
ctx,
desktopContext,
homepage.finalUrl,
"desktop",
timeoutMs,
const desktopScreenshot = await withActionTimeout(
captureHomepageScreenshot(
ctx,
desktopContext,
homepage.finalUrl,
"desktop",
Math.min(
timeoutMs,
remainingActionBudgetMs(actionStartedAt, actionBudget),
),
),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Desktop-Screenshot erfassen",
);
const mobileScreenshot = await captureHomepageScreenshot(
ctx,
mobileContext,
homepage.finalUrl,
"mobile",
timeoutMs,
const mobileScreenshot = await withActionTimeout(
captureHomepageScreenshot(
ctx,
mobileContext,
homepage.finalUrl,
"mobile",
Math.min(
timeoutMs,
remainingActionBudgetMs(actionStartedAt, actionBudget),
),
),
remainingActionBudgetMs(actionStartedAt, actionBudget),
"Mobile-Screenshot erfassen",
);
const technicalInput = buildTechnicalChecks({