Fix MVP audit evidence pipeline
This commit is contained in:
@@ -252,32 +252,49 @@ export const getAuditGenerationEvidence = internalQuery({
|
||||
return null;
|
||||
}
|
||||
|
||||
const runIdFilter = {
|
||||
table: "by_runId" as const,
|
||||
value: args.runId,
|
||||
};
|
||||
const leadIdFilter = {
|
||||
table: "by_leadId" as const,
|
||||
value: lead._id,
|
||||
};
|
||||
|
||||
const latestSuccessfulEnrichmentRun = await ctx.db
|
||||
.query("agentRuns")
|
||||
.withIndex("by_type_and_status_and_leadId", (q) =>
|
||||
q
|
||||
.eq("type", "website_enrichment")
|
||||
.eq("status", "succeeded")
|
||||
.eq("leadId", lead._id),
|
||||
)
|
||||
.order("desc")
|
||||
.take(1);
|
||||
const enrichmentEvidenceRunId =
|
||||
latestSuccessfulEnrichmentRun[0]?._id ?? args.runId;
|
||||
|
||||
const crawlPagesByRun = await ctx.db
|
||||
.query("websiteCrawlPages")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
|
||||
.order("desc")
|
||||
.take(40);
|
||||
|
||||
const technicalChecksByRun = await ctx.db
|
||||
.query("websiteTechnicalChecks")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
|
||||
.order("desc")
|
||||
.take(80);
|
||||
|
||||
const screenshotsByRun = await ctx.db
|
||||
const auditCaptureScreenshotsByRun = await ctx.db
|
||||
.query("websiteCrawlScreenshots")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", runIdFilter.value))
|
||||
.withIndex("by_runId", (q) => q.eq("runId", args.runId))
|
||||
.order("desc")
|
||||
.take(20);
|
||||
const enrichmentScreenshotsByRun =
|
||||
enrichmentEvidenceRunId === args.runId
|
||||
? []
|
||||
: await ctx.db
|
||||
.query("websiteCrawlScreenshots")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentEvidenceRunId))
|
||||
.order("desc")
|
||||
.take(20);
|
||||
|
||||
const pageSpeedByRun = run.auditId
|
||||
? await ctx.db
|
||||
@@ -293,7 +310,7 @@ export const getAuditGenerationEvidence = internalQuery({
|
||||
|
||||
const crawlPages = crawlPagesByRun;
|
||||
const technicalChecks = technicalChecksByRun;
|
||||
const screenshots = screenshotsByRun;
|
||||
const screenshots = [...auditCaptureScreenshotsByRun, ...enrichmentScreenshotsByRun];
|
||||
|
||||
return {
|
||||
lead: {
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
"use node";
|
||||
|
||||
import { join } from "node:path";
|
||||
import { type DataContent, generateObject } from "ai";
|
||||
import { createOpenRouterProvider } from "../lib/ai/openrouter-provider";
|
||||
import { resolveModelProfile } from "../lib/ai/model-profiles";
|
||||
import { loadLocalAuditSkillRegistry } from "../lib/ai/local-audit-skill-registry";
|
||||
import {
|
||||
auditClassificationSchema,
|
||||
auditSummarySchema,
|
||||
@@ -26,10 +26,7 @@ import {
|
||||
type JinaReaderPageInput,
|
||||
type ScreenshotOneRequest,
|
||||
} from "../lib/external-audit-services";
|
||||
import {
|
||||
loadSkillsRegistry,
|
||||
type AuditUsedSkill,
|
||||
} from "../lib/skills-registry";
|
||||
import { type AuditUsedSkill } from "../lib/skills-registry";
|
||||
import { internal } from "./_generated/api";
|
||||
import type { Id } from "./_generated/dataModel";
|
||||
import {
|
||||
@@ -455,11 +452,9 @@ async function appendRunEvent(
|
||||
async function loadAuditSkillRegistry(
|
||||
ctx: ActionCtx,
|
||||
runId: Id<"agentRuns">,
|
||||
): Promise<Awaited<ReturnType<typeof loadSkillsRegistry>>> {
|
||||
): Promise<ReturnType<typeof loadLocalAuditSkillRegistry>> {
|
||||
try {
|
||||
return await loadSkillsRegistry(
|
||||
join(process.cwd(), "v2_elemente", "skills.md"),
|
||||
);
|
||||
return loadLocalAuditSkillRegistry();
|
||||
} catch (error) {
|
||||
const safeErrorSummary = messageFromError(error);
|
||||
try {
|
||||
|
||||
190
convex/audits.ts
190
convex/audits.ts
@@ -6,6 +6,7 @@ import type { Doc, Id } from "./_generated/dataModel";
|
||||
import type { MutationCtx, QueryCtx } from "./_generated/server";
|
||||
|
||||
export const AUDIT_REVIEW_NOTICE_AFTER_MS = 30 * 24 * 60 * 60 * 1000;
|
||||
const DETAIL_EVIDENCE_LIMIT = 50;
|
||||
|
||||
const auditStatus = v.union(
|
||||
v.literal("draft"),
|
||||
@@ -103,6 +104,73 @@ const latestGenerationStage = (stages: Doc<"auditGenerations">[]) => {
|
||||
return [...stages].sort((a, b) => b.updatedAt - a.updatedAt)[0] ?? null;
|
||||
};
|
||||
|
||||
const normalizeComparableAuditUrl = (value: string | null | undefined) => {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const normalizeParsedUrl = (parsedUrl: URL) => {
|
||||
const hostname = parsedUrl.hostname.toLowerCase().replace(/^www\./, "");
|
||||
const pathname = parsedUrl.pathname.replace(/\/+$/, "");
|
||||
return `${hostname}${pathname}${parsedUrl.search}`.toLowerCase();
|
||||
};
|
||||
|
||||
try {
|
||||
return normalizeParsedUrl(new URL(trimmed));
|
||||
} catch {
|
||||
try {
|
||||
return normalizeParsedUrl(new URL(`https://${trimmed}`));
|
||||
} catch {
|
||||
return trimmed
|
||||
.toLowerCase()
|
||||
.replace(/^https?:\/\//, "")
|
||||
.replace(/^www\./, "")
|
||||
.replace(/\/+$/, "");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const setIfPresent = <T>(
|
||||
target: Map<string, T>,
|
||||
url: string | null | undefined,
|
||||
value: T,
|
||||
) => {
|
||||
const key = normalizeComparableAuditUrl(url);
|
||||
if (key && !target.has(key)) {
|
||||
target.set(key, value);
|
||||
}
|
||||
};
|
||||
|
||||
const findByUrl = <T>(source: Map<string, T>, ...urls: Array<string | null | undefined>) => {
|
||||
for (const url of urls) {
|
||||
const key = normalizeComparableAuditUrl(url);
|
||||
if (key && source.has(key)) {
|
||||
return source.get(key) ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const fallbackCheckedPageEvidence = (url: string) => ({
|
||||
url,
|
||||
sourceUrl: null,
|
||||
finalUrl: null,
|
||||
pageKind: null,
|
||||
title: null,
|
||||
metaDescription: null,
|
||||
headings: [],
|
||||
visibleTextExcerpt: null,
|
||||
hasContactFormSignal: null,
|
||||
hasContactCtaSignal: null,
|
||||
usesHttps: null,
|
||||
missingMetaDescription: null,
|
||||
brokenInternalLinkCount: null,
|
||||
screenshots: [],
|
||||
createdAt: null,
|
||||
});
|
||||
|
||||
const toIsoDate = (timestamp: number | undefined, fallback: number) => {
|
||||
return new Date(timestamp ?? fallback).toISOString();
|
||||
};
|
||||
@@ -212,7 +280,127 @@ export const getDetail = query({
|
||||
}
|
||||
|
||||
const lead = await ctx.db.get(audit.leadId);
|
||||
return { audit, lead };
|
||||
const latestSuccessfulEnrichmentRun = await ctx.db
|
||||
.query("agentRuns")
|
||||
.withIndex("by_type_and_status_and_leadId", (q) =>
|
||||
q
|
||||
.eq("type", "website_enrichment")
|
||||
.eq("status", "succeeded")
|
||||
.eq("leadId", audit.leadId),
|
||||
)
|
||||
.order("desc")
|
||||
.take(1);
|
||||
const enrichmentRunId = latestSuccessfulEnrichmentRun[0]?._id ?? null;
|
||||
|
||||
const crawlPages = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteCrawlPages")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
const technicalChecks = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteTechnicalChecks")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
const crawlScreenshots = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteCrawlScreenshots")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
|
||||
const pagesByUrl = new Map<string, Doc<"websiteCrawlPages">>();
|
||||
for (const page of crawlPages) {
|
||||
setIfPresent(pagesByUrl, page.sourceUrl, page);
|
||||
setIfPresent(pagesByUrl, page.finalUrl, page);
|
||||
}
|
||||
|
||||
const checksByUrl = new Map<string, Doc<"websiteTechnicalChecks">>();
|
||||
for (const checks of technicalChecks) {
|
||||
setIfPresent(checksByUrl, checks.sourceUrl, checks);
|
||||
setIfPresent(checksByUrl, checks.finalUrl, checks);
|
||||
}
|
||||
|
||||
const screenshotsByUrl = new Map<
|
||||
string,
|
||||
Array<{
|
||||
id: Id<"_storage">;
|
||||
url: string;
|
||||
viewport: Doc<"websiteCrawlScreenshots">["viewport"];
|
||||
sourceUrl: string;
|
||||
width: number;
|
||||
height: number;
|
||||
createdAt: number;
|
||||
}>
|
||||
>();
|
||||
for (const screenshot of crawlScreenshots) {
|
||||
const url = await ctx.storage.getUrl(screenshot.storageId);
|
||||
if (!url) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const key = normalizeComparableAuditUrl(screenshot.sourceUrl);
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const current = screenshotsByUrl.get(key) ?? [];
|
||||
current.push({
|
||||
id: screenshot.storageId,
|
||||
url,
|
||||
viewport: screenshot.viewport,
|
||||
sourceUrl: screenshot.sourceUrl,
|
||||
width: screenshot.width,
|
||||
height: screenshot.height,
|
||||
createdAt: screenshot.createdAt,
|
||||
});
|
||||
screenshotsByUrl.set(key, current);
|
||||
}
|
||||
|
||||
const checkedPages = audit.checkedPages.map((checkedUrl) => {
|
||||
const page = findByUrl(pagesByUrl, checkedUrl);
|
||||
if (!page) {
|
||||
return fallbackCheckedPageEvidence(checkedUrl);
|
||||
}
|
||||
|
||||
const checks = findByUrl(checksByUrl, checkedUrl, page.sourceUrl, page.finalUrl);
|
||||
const screenshots = [
|
||||
...(
|
||||
findByUrl(screenshotsByUrl, checkedUrl, page.sourceUrl, page.finalUrl) ?? []
|
||||
),
|
||||
].sort((a, b) => b.createdAt - a.createdAt);
|
||||
|
||||
return {
|
||||
url: checkedUrl,
|
||||
sourceUrl: page.sourceUrl,
|
||||
finalUrl: page.finalUrl,
|
||||
pageKind: page.pageKind,
|
||||
title: page.title ?? null,
|
||||
metaDescription: page.metaDescription ?? null,
|
||||
headings: page.headings.slice(0, DETAIL_EVIDENCE_LIMIT),
|
||||
visibleTextExcerpt: page.visibleTextExcerpt ?? null,
|
||||
hasContactFormSignal: page.hasContactFormSignal,
|
||||
hasContactCtaSignal: page.hasContactCtaSignal,
|
||||
usesHttps: checks?.usesHttps ?? null,
|
||||
missingMetaDescription: checks?.missingMetaDescription ?? null,
|
||||
brokenInternalLinkCount: checks?.brokenInternalLinkCount ?? null,
|
||||
screenshots,
|
||||
createdAt: page.createdAt,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
audit,
|
||||
lead,
|
||||
sourceSummaries: {
|
||||
checkedPages,
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user