Fix MVP audit evidence pipeline
This commit is contained in:
190
convex/audits.ts
190
convex/audits.ts
@@ -6,6 +6,7 @@ import type { Doc, Id } from "./_generated/dataModel";
|
||||
import type { MutationCtx, QueryCtx } from "./_generated/server";
|
||||
|
||||
export const AUDIT_REVIEW_NOTICE_AFTER_MS = 30 * 24 * 60 * 60 * 1000;
|
||||
const DETAIL_EVIDENCE_LIMIT = 50;
|
||||
|
||||
const auditStatus = v.union(
|
||||
v.literal("draft"),
|
||||
@@ -103,6 +104,73 @@ const latestGenerationStage = (stages: Doc<"auditGenerations">[]) => {
|
||||
return [...stages].sort((a, b) => b.updatedAt - a.updatedAt)[0] ?? null;
|
||||
};
|
||||
|
||||
const normalizeComparableAuditUrl = (value: string | null | undefined) => {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const normalizeParsedUrl = (parsedUrl: URL) => {
|
||||
const hostname = parsedUrl.hostname.toLowerCase().replace(/^www\./, "");
|
||||
const pathname = parsedUrl.pathname.replace(/\/+$/, "");
|
||||
return `${hostname}${pathname}${parsedUrl.search}`.toLowerCase();
|
||||
};
|
||||
|
||||
try {
|
||||
return normalizeParsedUrl(new URL(trimmed));
|
||||
} catch {
|
||||
try {
|
||||
return normalizeParsedUrl(new URL(`https://${trimmed}`));
|
||||
} catch {
|
||||
return trimmed
|
||||
.toLowerCase()
|
||||
.replace(/^https?:\/\//, "")
|
||||
.replace(/^www\./, "")
|
||||
.replace(/\/+$/, "");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const setIfPresent = <T>(
|
||||
target: Map<string, T>,
|
||||
url: string | null | undefined,
|
||||
value: T,
|
||||
) => {
|
||||
const key = normalizeComparableAuditUrl(url);
|
||||
if (key && !target.has(key)) {
|
||||
target.set(key, value);
|
||||
}
|
||||
};
|
||||
|
||||
const findByUrl = <T>(source: Map<string, T>, ...urls: Array<string | null | undefined>) => {
|
||||
for (const url of urls) {
|
||||
const key = normalizeComparableAuditUrl(url);
|
||||
if (key && source.has(key)) {
|
||||
return source.get(key) ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const fallbackCheckedPageEvidence = (url: string) => ({
|
||||
url,
|
||||
sourceUrl: null,
|
||||
finalUrl: null,
|
||||
pageKind: null,
|
||||
title: null,
|
||||
metaDescription: null,
|
||||
headings: [],
|
||||
visibleTextExcerpt: null,
|
||||
hasContactFormSignal: null,
|
||||
hasContactCtaSignal: null,
|
||||
usesHttps: null,
|
||||
missingMetaDescription: null,
|
||||
brokenInternalLinkCount: null,
|
||||
screenshots: [],
|
||||
createdAt: null,
|
||||
});
|
||||
|
||||
const toIsoDate = (timestamp: number | undefined, fallback: number) => {
|
||||
return new Date(timestamp ?? fallback).toISOString();
|
||||
};
|
||||
@@ -212,7 +280,127 @@ export const getDetail = query({
|
||||
}
|
||||
|
||||
const lead = await ctx.db.get(audit.leadId);
|
||||
return { audit, lead };
|
||||
const latestSuccessfulEnrichmentRun = await ctx.db
|
||||
.query("agentRuns")
|
||||
.withIndex("by_type_and_status_and_leadId", (q) =>
|
||||
q
|
||||
.eq("type", "website_enrichment")
|
||||
.eq("status", "succeeded")
|
||||
.eq("leadId", audit.leadId),
|
||||
)
|
||||
.order("desc")
|
||||
.take(1);
|
||||
const enrichmentRunId = latestSuccessfulEnrichmentRun[0]?._id ?? null;
|
||||
|
||||
const crawlPages = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteCrawlPages")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
const technicalChecks = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteTechnicalChecks")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
const crawlScreenshots = enrichmentRunId
|
||||
? await ctx.db
|
||||
.query("websiteCrawlScreenshots")
|
||||
.withIndex("by_runId", (q) => q.eq("runId", enrichmentRunId))
|
||||
.order("desc")
|
||||
.take(DETAIL_EVIDENCE_LIMIT)
|
||||
: [];
|
||||
|
||||
const pagesByUrl = new Map<string, Doc<"websiteCrawlPages">>();
|
||||
for (const page of crawlPages) {
|
||||
setIfPresent(pagesByUrl, page.sourceUrl, page);
|
||||
setIfPresent(pagesByUrl, page.finalUrl, page);
|
||||
}
|
||||
|
||||
const checksByUrl = new Map<string, Doc<"websiteTechnicalChecks">>();
|
||||
for (const checks of technicalChecks) {
|
||||
setIfPresent(checksByUrl, checks.sourceUrl, checks);
|
||||
setIfPresent(checksByUrl, checks.finalUrl, checks);
|
||||
}
|
||||
|
||||
const screenshotsByUrl = new Map<
|
||||
string,
|
||||
Array<{
|
||||
id: Id<"_storage">;
|
||||
url: string;
|
||||
viewport: Doc<"websiteCrawlScreenshots">["viewport"];
|
||||
sourceUrl: string;
|
||||
width: number;
|
||||
height: number;
|
||||
createdAt: number;
|
||||
}>
|
||||
>();
|
||||
for (const screenshot of crawlScreenshots) {
|
||||
const url = await ctx.storage.getUrl(screenshot.storageId);
|
||||
if (!url) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const key = normalizeComparableAuditUrl(screenshot.sourceUrl);
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const current = screenshotsByUrl.get(key) ?? [];
|
||||
current.push({
|
||||
id: screenshot.storageId,
|
||||
url,
|
||||
viewport: screenshot.viewport,
|
||||
sourceUrl: screenshot.sourceUrl,
|
||||
width: screenshot.width,
|
||||
height: screenshot.height,
|
||||
createdAt: screenshot.createdAt,
|
||||
});
|
||||
screenshotsByUrl.set(key, current);
|
||||
}
|
||||
|
||||
const checkedPages = audit.checkedPages.map((checkedUrl) => {
|
||||
const page = findByUrl(pagesByUrl, checkedUrl);
|
||||
if (!page) {
|
||||
return fallbackCheckedPageEvidence(checkedUrl);
|
||||
}
|
||||
|
||||
const checks = findByUrl(checksByUrl, checkedUrl, page.sourceUrl, page.finalUrl);
|
||||
const screenshots = [
|
||||
...(
|
||||
findByUrl(screenshotsByUrl, checkedUrl, page.sourceUrl, page.finalUrl) ?? []
|
||||
),
|
||||
].sort((a, b) => b.createdAt - a.createdAt);
|
||||
|
||||
return {
|
||||
url: checkedUrl,
|
||||
sourceUrl: page.sourceUrl,
|
||||
finalUrl: page.finalUrl,
|
||||
pageKind: page.pageKind,
|
||||
title: page.title ?? null,
|
||||
metaDescription: page.metaDescription ?? null,
|
||||
headings: page.headings.slice(0, DETAIL_EVIDENCE_LIMIT),
|
||||
visibleTextExcerpt: page.visibleTextExcerpt ?? null,
|
||||
hasContactFormSignal: page.hasContactFormSignal,
|
||||
hasContactCtaSignal: page.hasContactCtaSignal,
|
||||
usesHttps: checks?.usesHttps ?? null,
|
||||
missingMetaDescription: checks?.missingMetaDescription ?? null,
|
||||
brokenInternalLinkCount: checks?.brokenInternalLinkCount ?? null,
|
||||
screenshots,
|
||||
createdAt: page.createdAt,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
audit,
|
||||
lead,
|
||||
sourceSummaries: {
|
||||
checkedPages,
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user