Fix MVP audit evidence pipeline

This commit is contained in:
2026-06-08 08:33:15 +02:00
parent a45b92ea0a
commit ff18fc202e
16 changed files with 771 additions and 52 deletions

View File

@@ -252,32 +252,49 @@ export const getAuditGenerationEvidence = internalQuery({
return null;
}
const runIdFilter = {
table: "by_runId" as const,
value: args.runId,
};
const leadIdFilter = {
table: "by_leadId" as const,
value: lead._id,
};
const latestSuccessfulEnrichmentRun = await ctx.db
.query("agentRuns")
.withIndex("by_type_and_status_and_leadId", (q) =>
q
.eq("type", "website_enrichment")
.eq("status", "succeeded")
.eq("leadId", lead._id),
)
.order("desc")
.take(1);
const enrichmentEvidenceRunId =
latestSuccessfulEnrichmentRun[0]?._id ?? args.runId;
const crawlPagesByRun = await ctx.db
.query("websiteCrawlPages")
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
.order("desc")
.take(40);
const technicalChecksByRun = await ctx.db
.query("websiteTechnicalChecks")
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
.order("desc")
.take(80);
const screenshotsByRun = await ctx.db
const auditCaptureScreenshotsByRun = await ctx.db
.query("websiteCrawlScreenshots")
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
.withIndex("by_runId", (q) => q.eq("runId", args.runId))
.order("desc")
.take(20);
const enrichmentScreenshotsByRun =
enrichmentEvidenceRunId === args.runId
? []
: await ctx.db
.query("websiteCrawlScreenshots")
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
.order("desc")
.take(20);
const pageSpeedByRun = run.auditId
? await ctx.db
@@ -293,7 +310,7 @@ export const getAuditGenerationEvidence = internalQuery({
const crawlPages = crawlPagesByRun;
const technicalChecks = technicalChecksByRun;
const screenshots = screenshotsByRun;
const screenshots = [...auditCaptureScreenshotsByRun, ...enrichmentScreenshotsByRun];
return {
lead: {

View File

@@ -1,9 +1,9 @@
"use node";
import { join } from "node:path";
import { type DataContent, generateObject } from "ai";
import { createOpenRouterProvider } from "../lib/ai/openrouter-provider";
import { resolveModelProfile } from "../lib/ai/model-profiles";
import { loadLocalAuditSkillRegistry } from "../lib/ai/local-audit-skill-registry";
import {
auditClassificationSchema,
auditSummarySchema,
@@ -26,10 +26,7 @@ import {
type JinaReaderPageInput,
type ScreenshotOneRequest,
} from "../lib/external-audit-services";
import {
loadSkillsRegistry,
type AuditUsedSkill,
} from "../lib/skills-registry";
import { type AuditUsedSkill } from "../lib/skills-registry";
import { internal } from "./_generated/api";
import type { Id } from "./_generated/dataModel";
import {
@@ -455,11 +452,9 @@ async function appendRunEvent(
async function loadAuditSkillRegistry(
ctx: ActionCtx,
runId: Id<"agentRuns">,
): Promise<Awaited<ReturnType<typeof loadSkillsRegistry>>> {
): Promise<ReturnType<typeof loadLocalAuditSkillRegistry>> {
try {
return await loadSkillsRegistry(
join(process.cwd(), "v2_elemente", "skills.md"),
);
return loadLocalAuditSkillRegistry();
} catch (error) {
const safeErrorSummary = messageFromError(error);
try {

View File

@@ -6,6 +6,7 @@ import type { Doc, Id } from "./_generated/dataModel";
import type { MutationCtx, QueryCtx } from "./_generated/server";
export const AUDIT_REVIEW_NOTICE_AFTER_MS = 30 * 24 * 60 * 60 * 1000;
const DETAIL_EVIDENCE_LIMIT = 50;
const auditStatus = v.union(
v.literal("draft"),
@@ -103,6 +104,73 @@ const latestGenerationStage = (stages: Doc<"auditGenerations">[]) => {
return [...stages].sort((a, b) => b.updatedAt - a.updatedAt)[0] ?? null;
};
const normalizeComparableAuditUrl = (value: string | null | undefined) => {
const trimmed = value?.trim();
if (!trimmed) {
return "";
}
const normalizeParsedUrl = (parsedUrl: URL) => {
const hostname = parsedUrl.hostname.toLowerCase().replace(/^www\./, "");
const pathname = parsedUrl.pathname.replace(/\/+$/, "");
return `${hostname}${pathname}${parsedUrl.search}`.toLowerCase();
};
try {
return normalizeParsedUrl(new URL(trimmed));
} catch {
try {
return normalizeParsedUrl(new URL(`https://${trimmed}`));
} catch {
return trimmed
.toLowerCase()
.replace(/^https?:\/\//, "")
.replace(/^www\./, "")
.replace(/\/+$/, "");
}
}
};
const setIfPresent = <T>(
target: Map<string, T>,
url: string | null | undefined,
value: T,
) => {
const key = normalizeComparableAuditUrl(url);
if (key && !target.has(key)) {
target.set(key, value);
}
};
const findByUrl = <T>(source: Map<string, T>, ...urls: Array<string | null | undefined>) => {
for (const url of urls) {
const key = normalizeComparableAuditUrl(url);
if (key && source.has(key)) {
return source.get(key) ?? null;
}
}
return null;
};
const fallbackCheckedPageEvidence = (url: string) => ({
url,
sourceUrl: null,
finalUrl: null,
pageKind: null,
title: null,
metaDescription: null,
headings: [],
visibleTextExcerpt: null,
hasContactFormSignal: null,
hasContactCtaSignal: null,
usesHttps: null,
missingMetaDescription: null,
brokenInternalLinkCount: null,
screenshots: [],
createdAt: null,
});
const toIsoDate = (timestamp: number | undefined, fallback: number) => {
return new Date(timestamp ?? fallback).toISOString();
};
@@ -212,7 +280,127 @@ export const getDetail = query({
}
const lead = await ctx.db.get(audit.leadId);
return { audit, lead };
const latestSuccessfulEnrichmentRun = await ctx.db
.query("agentRuns")
.withIndex("by_type_and_status_and_leadId", (q) =>
q
.eq("type", "website_enrichment")
.eq("status", "succeeded")
.eq("leadId", audit.leadId),
)
.order("desc")
.take(1);
const enrichmentRunId = latestSuccessfulEnrichmentRun[0]?._id ?? null;
const crawlPages = enrichmentRunId
? await ctx.db
.query("websiteCrawlPages")
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
.order("desc")
.take(DETAIL_EVIDENCE_LIMIT)
: [];
const technicalChecks = enrichmentRunId
? await ctx.db
.query("websiteTechnicalChecks")
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
.order("desc")
.take(DETAIL_EVIDENCE_LIMIT)
: [];
const crawlScreenshots = enrichmentRunId
? await ctx.db
.query("websiteCrawlScreenshots")
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
.order("desc")
.take(DETAIL_EVIDENCE_LIMIT)
: [];
const pagesByUrl = new Map<string, Doc<"websiteCrawlPages">>();
for (const page of crawlPages) {
setIfPresent(pagesByUrl, page.sourceUrl, page);
setIfPresent(pagesByUrl, page.finalUrl, page);
}
const checksByUrl = new Map<string, Doc<"websiteTechnicalChecks">>();
for (const checks of technicalChecks) {
setIfPresent(checksByUrl, checks.sourceUrl, checks);
setIfPresent(checksByUrl, checks.finalUrl, checks);
}
const screenshotsByUrl = new Map<
string,
Array<{
id: Id<"_storage">;
url: string;
viewport: Doc<"websiteCrawlScreenshots">["viewport"];
sourceUrl: string;
width: number;
height: number;
createdAt: number;
}>
>();
for (const screenshot of crawlScreenshots) {
const url = await ctx.storage.getUrl(screenshot.storageId);
if (!url) {
continue;
}
const key = normalizeComparableAuditUrl(screenshot.sourceUrl);
if (!key) {
continue;
}
const current = screenshotsByUrl.get(key) ?? [];
current.push({
id: screenshot.storageId,
url,
viewport: screenshot.viewport,
sourceUrl: screenshot.sourceUrl,
width: screenshot.width,
height: screenshot.height,
createdAt: screenshot.createdAt,
});
screenshotsByUrl.set(key, current);
}
const checkedPages = audit.checkedPages.map((checkedUrl) => {
const page = findByUrl(pagesByUrl, checkedUrl);
if (!page) {
return fallbackCheckedPageEvidence(checkedUrl);
}
const checks = findByUrl(checksByUrl, checkedUrl, page.sourceUrl, page.finalUrl);
const screenshots = [
...(
findByUrl(screenshotsByUrl, checkedUrl, page.sourceUrl, page.finalUrl) ?? []
),
].sort((a, b) => b.createdAt - a.createdAt);
return {
url: checkedUrl,
sourceUrl: page.sourceUrl,
finalUrl: page.finalUrl,
pageKind: page.pageKind,
title: page.title ?? null,
metaDescription: page.metaDescription ?? null,
headings: page.headings.slice(0, DETAIL_EVIDENCE_LIMIT),
visibleTextExcerpt: page.visibleTextExcerpt ?? null,
hasContactFormSignal: page.hasContactFormSignal,
hasContactCtaSignal: page.hasContactCtaSignal,
usesHttps: checks?.usesHttps ?? null,
missingMetaDescription: checks?.missingMetaDescription ?? null,
brokenInternalLinkCount: checks?.brokenInternalLinkCount ?? null,
screenshots,
createdAt: page.createdAt,
};
});
return {
audit,
lead,
sourceSummaries: {
checkedPages,
},
};
},
});