feat: add OpenRouter audit generation pipeline
This commit is contained in:
@@ -23,6 +23,10 @@ import { internalAction, type ActionCtx } from "./_generated/server";
|
||||
|
||||
const DEFAULT_CRAWL_TIMEOUT_MS = 60_000;
|
||||
const DEFAULT_CRAWL_MAX_PAGES = 5;
|
||||
const DEFAULT_ACTION_BUDGET_MS = 120_000;
|
||||
const MIN_ACTION_BUDGET_MS = 30_000;
|
||||
const MAX_ACTION_BUDGET_MS = 140_000;
|
||||
const ACTION_TIMEOUT_BUFFER_MS = 5_000;
|
||||
const MAX_PERSISTED_LINKS = 120;
|
||||
const MAX_PERSISTED_EMAIL_CANDIDATES = 40;
|
||||
const SCREENSHOT_MIME_TYPE = "image/png";
|
||||
@@ -140,6 +144,47 @@ function crawlMaxPages() {
|
||||
);
|
||||
}
|
||||
|
||||
function actionBudgetMs() {
|
||||
return Math.max(
|
||||
MIN_ACTION_BUDGET_MS,
|
||||
Math.min(
|
||||
MAX_ACTION_BUDGET_MS,
|
||||
readPositiveIntEnv("TASK8_ACTION_BUDGET_MS", DEFAULT_ACTION_BUDGET_MS),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function remainingActionBudgetMs(startedAt: number, budgetMs: number) {
|
||||
const elapsed = Date.now() - startedAt;
|
||||
return Math.max(1_000, budgetMs - elapsed - ACTION_TIMEOUT_BUFFER_MS);
|
||||
}
|
||||
|
||||
async function withActionTimeout<T>(
|
||||
promise: Promise<T>,
|
||||
timeoutMs: number,
|
||||
label: string,
|
||||
): Promise<T> {
|
||||
let timeout: ReturnType<typeof setTimeout> | null = null;
|
||||
try {
|
||||
return await Promise.race([
|
||||
promise,
|
||||
new Promise<T>((_, reject) => {
|
||||
timeout = setTimeout(() => {
|
||||
reject(
|
||||
new Error(
|
||||
`Website-Enrichment Zeitbudget ueberschritten: ${label}.`,
|
||||
),
|
||||
);
|
||||
}, Math.max(1, timeoutMs));
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function makePageKind(url: string, rootUrl: string): EnrichmentPageKind {
|
||||
const normalizedRoot = normalizeCrawlUrl(rootUrl);
|
||||
if (!normalizedRoot) {
|
||||
@@ -418,6 +463,8 @@ export const processLeadEnrichment = internalAction({
|
||||
handler: async (ctx, args) => {
|
||||
let started: StartedLead | null = null;
|
||||
const runId = args.runId;
|
||||
const actionStartedAt = Date.now();
|
||||
const actionBudget = actionBudgetMs();
|
||||
let browser: Browser | null = null;
|
||||
let desktopContext: BrowserContext | null = null;
|
||||
let mobileContext: BrowserContext | null = null;
|
||||
@@ -480,9 +527,15 @@ export const processLeadEnrichment = internalAction({
|
||||
const maxPages = crawlMaxPages();
|
||||
|
||||
const { playwrightCore, serverlessChromium } =
|
||||
await loadPlaywrightModules();
|
||||
const executablePath = await resolveChromiumExecutablePath(
|
||||
serverlessChromium,
|
||||
await withActionTimeout(
|
||||
loadPlaywrightModules(),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Playwright-Module laden",
|
||||
);
|
||||
const executablePath = await withActionTimeout(
|
||||
resolveChromiumExecutablePath(serverlessChromium),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Chromium executable vorbereiten",
|
||||
);
|
||||
|
||||
const prepareChromiumSharedLibraries = async (
|
||||
@@ -502,21 +555,50 @@ export const processLeadEnrichment = internalAction({
|
||||
chromiumRuntime.setupLambdaEnvironment(path.join(tmpdir(), "al2023", "lib"));
|
||||
};
|
||||
|
||||
await prepareChromiumSharedLibraries(serverlessChromium);
|
||||
browser = await playwrightCore.chromium.launch({
|
||||
headless: true,
|
||||
executablePath,
|
||||
args: serverlessChromium.args,
|
||||
});
|
||||
await withActionTimeout(
|
||||
prepareChromiumSharedLibraries(serverlessChromium),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Chromium-Bibliotheken vorbereiten",
|
||||
);
|
||||
browser = await withActionTimeout(
|
||||
playwrightCore.chromium.launch({
|
||||
headless: true,
|
||||
executablePath,
|
||||
args: serverlessChromium.args,
|
||||
timeout: remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
}),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Chromium starten",
|
||||
);
|
||||
const { devices } = playwrightCore;
|
||||
desktopContext = await browser.newContext({
|
||||
...devices["Desktop Chrome"],
|
||||
});
|
||||
mobileContext = await browser.newContext({
|
||||
...devices["iPhone 11"],
|
||||
});
|
||||
desktopContext = await withActionTimeout(
|
||||
browser.newContext({
|
||||
...devices["Desktop Chrome"],
|
||||
}),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Desktop-Kontext erstellen",
|
||||
);
|
||||
mobileContext = await withActionTimeout(
|
||||
browser.newContext({
|
||||
...devices["iPhone 11"],
|
||||
}),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Mobile-Kontext erstellen",
|
||||
);
|
||||
|
||||
const homepage = await crawlPage(desktopContext, rootUrl, rootUrl, timeoutMs);
|
||||
const homepage = await withActionTimeout(
|
||||
crawlPage(
|
||||
desktopContext,
|
||||
rootUrl,
|
||||
rootUrl,
|
||||
Math.min(
|
||||
timeoutMs,
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Homepage crawlen",
|
||||
);
|
||||
if (!homepage) {
|
||||
throw new Error("Homepage konnte nicht geladen werden.");
|
||||
}
|
||||
@@ -529,7 +611,19 @@ export const processLeadEnrichment = internalAction({
|
||||
const crawledPages: PageResult[] = [homepage];
|
||||
|
||||
for (const pageUrl of crawlTargets.slice(1)) {
|
||||
const crawled = await crawlPage(desktopContext, pageUrl, rootUrl, timeoutMs);
|
||||
const crawled = await withActionTimeout(
|
||||
crawlPage(
|
||||
desktopContext,
|
||||
pageUrl,
|
||||
rootUrl,
|
||||
Math.min(
|
||||
timeoutMs,
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
`Unterseite crawlen: ${pageUrl}`,
|
||||
);
|
||||
if (crawled) {
|
||||
crawledPages.push(crawled);
|
||||
}
|
||||
@@ -552,7 +646,10 @@ export const processLeadEnrichment = internalAction({
|
||||
for (const href of uniqueInternalLinks.slice(0, 30)) {
|
||||
try {
|
||||
const response = await desktopContext.request.get(href, {
|
||||
timeout: Math.max(1_000, timeoutMs - 1_000),
|
||||
timeout: Math.min(
|
||||
Math.max(1_000, timeoutMs - 1_000),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
});
|
||||
const status = response.status();
|
||||
checkMap.set(href, {
|
||||
@@ -567,19 +664,33 @@ export const processLeadEnrichment = internalAction({
|
||||
}
|
||||
}
|
||||
|
||||
const desktopScreenshot = await captureHomepageScreenshot(
|
||||
ctx,
|
||||
desktopContext,
|
||||
homepage.finalUrl,
|
||||
"desktop",
|
||||
timeoutMs,
|
||||
const desktopScreenshot = await withActionTimeout(
|
||||
captureHomepageScreenshot(
|
||||
ctx,
|
||||
desktopContext,
|
||||
homepage.finalUrl,
|
||||
"desktop",
|
||||
Math.min(
|
||||
timeoutMs,
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Desktop-Screenshot erfassen",
|
||||
);
|
||||
const mobileScreenshot = await captureHomepageScreenshot(
|
||||
ctx,
|
||||
mobileContext,
|
||||
homepage.finalUrl,
|
||||
"mobile",
|
||||
timeoutMs,
|
||||
const mobileScreenshot = await withActionTimeout(
|
||||
captureHomepageScreenshot(
|
||||
ctx,
|
||||
mobileContext,
|
||||
homepage.finalUrl,
|
||||
"mobile",
|
||||
Math.min(
|
||||
timeoutMs,
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
),
|
||||
),
|
||||
remainingActionBudgetMs(actionStartedAt, actionBudget),
|
||||
"Mobile-Screenshot erfassen",
|
||||
);
|
||||
|
||||
const technicalInput = buildTechnicalChecks({
|
||||
|
||||
Reference in New Issue
Block a user