feat: add OpenRouter audit generation pipeline
This commit is contained in:
565
lib/ai/audit-evidence.ts
Normal file
565
lib/ai/audit-evidence.ts
Normal file
@@ -0,0 +1,565 @@
|
||||
import {
|
||||
type SkillRegistryEntry,
|
||||
toAuditUsedSkill,
|
||||
type AuditUsedSkill,
|
||||
} from "../skills-registry";
|
||||
import {
|
||||
buildPageSpeedAuditInputs,
|
||||
type PageSpeedMinimalAuditResult,
|
||||
} from "../pagespeed-audit-input";
|
||||
|
||||
export type SkillRegistryEntryEvidence = SkillRegistryEntry;
|
||||
|
||||
export type AuditLeadEvidence = {
|
||||
companyName?: string | null;
|
||||
niche?: string | null;
|
||||
city?: string | null;
|
||||
websiteDomain?: string | null;
|
||||
websiteUrl?: string | null;
|
||||
address?: string | null;
|
||||
phone?: string | null;
|
||||
contactPerson?: string | null;
|
||||
};
|
||||
|
||||
export type AuditCrawlPageEvidence = {
|
||||
sourceUrl?: string | null;
|
||||
finalUrl?: string | null;
|
||||
title?: string | null;
|
||||
metaDescription?: string | null;
|
||||
pageKind?: string | null;
|
||||
hasContactFormSignal?: boolean;
|
||||
hasContactCtaSignal?: boolean;
|
||||
visibleText?: string | null;
|
||||
visibleTextExcerpt?: string | null;
|
||||
};
|
||||
|
||||
export type AuditTechnicalCheckEvidence = {
|
||||
sourceUrl?: string | null;
|
||||
finalUrl?: string | null;
|
||||
usesHttps?: boolean;
|
||||
missingTitle?: boolean;
|
||||
missingMetaDescription?: boolean;
|
||||
hasVisibleContactPath?: boolean;
|
||||
brokenInternalLinkCount?: number;
|
||||
};
|
||||
|
||||
export type AuditScreenshotEvidence = {
|
||||
storageId: string;
|
||||
viewport: string;
|
||||
sourceUrl: string;
|
||||
capturedAt: number;
|
||||
width: number;
|
||||
height: number;
|
||||
mimeType: string;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export type AuditEvidenceInput = {
|
||||
companyContext: string[];
|
||||
checkedPages: string[];
|
||||
observedUxSignals: string[];
|
||||
observedContentSignals: string[];
|
||||
observedTechnicalSignals: string[];
|
||||
screenshotReferences: Array<{
|
||||
storageId: string;
|
||||
sourceUrl: string;
|
||||
viewport: string;
|
||||
width: number;
|
||||
height: number;
|
||||
mimeType: string;
|
||||
capturedAt: number;
|
||||
}>;
|
||||
pageSpeedCustomerImplications: string[];
|
||||
selectedSkills: AuditUsedSkill[];
|
||||
};
|
||||
|
||||
export type AuditEvidenceInputArgs = {
|
||||
lead?: AuditLeadEvidence;
|
||||
crawlPages?: readonly AuditCrawlPageEvidence[];
|
||||
technicalChecks?: readonly AuditTechnicalCheckEvidence[];
|
||||
screenshots?: readonly AuditScreenshotEvidence[];
|
||||
pageSpeedInputs?: readonly PageSpeedMinimalAuditResult[];
|
||||
skillRegistry?: readonly SkillRegistryEntryEvidence[];
|
||||
};
|
||||
|
||||
const COMPANY_CONTEXT_LIMIT = 8;
|
||||
const CHECKED_PAGES_LIMIT = 8;
|
||||
const UX_SIGNAL_LIMIT = 6;
|
||||
const CONTENT_SIGNAL_LIMIT = 6;
|
||||
const TECHNICAL_SIGNAL_LIMIT = 6;
|
||||
const PAGESPEED_SIGNAL_LIMIT = 8;
|
||||
const SCREENSHOT_REFERENCE_LIMIT = 8;
|
||||
const SELECTED_SKILLS_LIMIT = 6;
|
||||
|
||||
const URL_PATTERN = /\bhttps?:\/\/[^\s<>"']+/i;
|
||||
const JSON_BRACKET_PATTERN = /\{[^}]*\}|\[[^\]]*\]/;
|
||||
const PAGESPEED_NOISE_PATTERN =
|
||||
/\b(?:raw\s*storage\s*id|rawstorageid|lighthouse|pagespeed|score)\b/i;
|
||||
const MACHINE_TOKEN_PATTERN = /\b[a-z\d_-]{24,}\b/i;
|
||||
|
||||
function trimAndNormalize(input: unknown): string {
|
||||
if (typeof input !== "string") {
|
||||
return "";
|
||||
}
|
||||
return input.replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function sanitizeCustomerText(value: unknown, maxLength = 180): string {
|
||||
let text = trimAndNormalize(value);
|
||||
if (!text) {
|
||||
return "";
|
||||
}
|
||||
|
||||
text = text.replace(/<[^>]*>/g, " ");
|
||||
text = text.replace(/\s{2,}/g, " ").trim();
|
||||
|
||||
if (URL_PATTERN.test(text)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (JSON_BRACKET_PATTERN.test(text)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (PAGESPEED_NOISE_PATTERN.test(text)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (MACHINE_TOKEN_PATTERN.test(text)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (text.length > maxLength) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!/[a-zäöüß]/i.test(text)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
function addUniqueCapped(
|
||||
bucket: string[],
|
||||
input: string,
|
||||
max: number,
|
||||
sanitizer = sanitizeCustomerText,
|
||||
): void {
|
||||
const candidate = sanitizer(input);
|
||||
if (!candidate) {
|
||||
return;
|
||||
}
|
||||
|
||||
const normalized = candidate.toLowerCase();
|
||||
const alreadyThere = bucket.some((line) => line.toLowerCase() === normalized);
|
||||
if (!alreadyThere && bucket.length < max) {
|
||||
bucket.push(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
function compactPath(urlLike: string): string {
|
||||
try {
|
||||
const parsed = new URL(urlLike);
|
||||
const normalizedPath = (parsed.pathname || "/").replace(/\/+/g, "/").trim();
|
||||
if (!normalizedPath || normalizedPath === "/") {
|
||||
return "Startseite";
|
||||
}
|
||||
return normalizedPath.replace(/^\//, "").slice(0, 70);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function compactLabelForPage(pageKind: string, pageLabel: string): string {
|
||||
if (pageLabel.length > 100) {
|
||||
return pageLabel.slice(0, 100);
|
||||
}
|
||||
|
||||
if (pageKind) {
|
||||
return `${pageKind}: ${pageLabel}`;
|
||||
}
|
||||
|
||||
return pageLabel;
|
||||
}
|
||||
|
||||
function toSafePath(url: string | null | undefined): string {
|
||||
if (!url) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return compactPath(url);
|
||||
}
|
||||
|
||||
function selectTopSkill(
|
||||
skills: readonly SkillRegistryEntryEvidence[],
|
||||
category: string,
|
||||
evidenceText: string,
|
||||
): AuditUsedSkill | null {
|
||||
const evidenceTokens = evidenceText
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((token) => token.length > 3);
|
||||
if (evidenceTokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const candidates = skills.filter((skill) => skill.category === category);
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const scored = candidates.map((candidate) => {
|
||||
const whenToUseText = candidate.whenToUse.toLowerCase();
|
||||
const matchCount = evidenceTokens.filter((token) =>
|
||||
whenToUseText.includes(token),
|
||||
).length;
|
||||
const score = 1 + Math.min(matchCount, 5) + (candidate.version ? 0.1 : 0);
|
||||
|
||||
return {
|
||||
candidate,
|
||||
score,
|
||||
name: candidate.name.toLowerCase(),
|
||||
};
|
||||
});
|
||||
|
||||
scored.sort((a, b) => {
|
||||
if (b.score !== a.score) {
|
||||
return b.score - a.score;
|
||||
}
|
||||
return a.name.localeCompare(b.name);
|
||||
});
|
||||
|
||||
return toAuditUsedSkill(scored[0]!.candidate);
|
||||
}
|
||||
|
||||
function buildObservedSignals(
|
||||
crawlPages: readonly AuditCrawlPageEvidence[],
|
||||
technicalChecks: readonly AuditTechnicalCheckEvidence[],
|
||||
): {
|
||||
ux: string[];
|
||||
content: string[];
|
||||
technical: string[];
|
||||
evidenceText: {
|
||||
design: boolean;
|
||||
ux: boolean;
|
||||
copy: boolean;
|
||||
seo: boolean;
|
||||
};
|
||||
} {
|
||||
const uxSignals: string[] = [];
|
||||
const contentSignals: string[] = [];
|
||||
const technicalSignals: string[] = [];
|
||||
|
||||
let designEvidence = false;
|
||||
let uxEvidence = false;
|
||||
let copyEvidence = false;
|
||||
let seoEvidence = false;
|
||||
|
||||
for (const page of crawlPages) {
|
||||
const title = trimAndNormalize(page.title ?? "");
|
||||
if (title) {
|
||||
if (title.length > 4) {
|
||||
copyEvidence = true;
|
||||
addUniqueCapped(
|
||||
contentSignals,
|
||||
`Seitentitel wurde erfasst: ${title}`,
|
||||
CONTENT_SIGNAL_LIMIT,
|
||||
(value) => sanitizeCustomerText(value, 150),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (page.hasContactFormSignal) {
|
||||
uxEvidence = true;
|
||||
addUniqueCapped(
|
||||
uxSignals,
|
||||
"Ein Kontaktformular wurde als potenzieller Einstiegspunkt erkannt.",
|
||||
UX_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
if (page.hasContactCtaSignal) {
|
||||
uxEvidence = true;
|
||||
addUniqueCapped(
|
||||
uxSignals,
|
||||
"Ein klarer Call-to-Action scheint auf der Seite aktiv zu sein.",
|
||||
UX_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
if (page.visibleText || page.visibleTextExcerpt) {
|
||||
copyEvidence = true;
|
||||
addUniqueCapped(
|
||||
contentSignals,
|
||||
"Sichtbarer Text wurde in der Crawl-Auswertung extrahiert.",
|
||||
CONTENT_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const check of technicalChecks) {
|
||||
if (check.usesHttps === false) {
|
||||
uxEvidence = true;
|
||||
addUniqueCapped(
|
||||
technicalSignals,
|
||||
"Ein Teil der Seiten ist nicht per HTTPS erreichbar.",
|
||||
TECHNICAL_SIGNAL_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
uxSignals,
|
||||
"Die sichere Übertragung der Seite ist nicht durchgängig verifiziert.",
|
||||
UX_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
if (check.missingMetaDescription) {
|
||||
seoEvidence = true;
|
||||
addUniqueCapped(
|
||||
technicalSignals,
|
||||
"Fehlende Meta-Beschreibungen können die Auffindbarkeit schwächen.",
|
||||
TECHNICAL_SIGNAL_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
contentSignals,
|
||||
"Meta-Informationen sind teilweise nicht vollständig vorhanden.",
|
||||
CONTENT_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
if (check.missingTitle) {
|
||||
seoEvidence = true;
|
||||
addUniqueCapped(
|
||||
technicalSignals,
|
||||
"Einige Seiten besitzen keinen aussagekräftigen Titel.",
|
||||
TECHNICAL_SIGNAL_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
contentSignals,
|
||||
"Seitentitel fehlen auf ausgewählten Seiten.",
|
||||
CONTENT_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
if (check.hasVisibleContactPath) {
|
||||
uxEvidence = true;
|
||||
addUniqueCapped(
|
||||
uxSignals,
|
||||
"Ein klarer Kontaktpfad scheint bereits vorhanden zu sein.",
|
||||
UX_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
const brokenLinks = check.brokenInternalLinkCount ?? 0;
|
||||
if (brokenLinks > 0) {
|
||||
addUniqueCapped(
|
||||
technicalSignals,
|
||||
`Es wurden ${Math.min(brokenLinks, 10)} interne Verlinkungen mit Fehlerstatus erkannt.`,
|
||||
TECHNICAL_SIGNAL_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
uxSignals,
|
||||
"Nutzer könnten durch interne Linkfehler im Fluss abbrechen.",
|
||||
UX_SIGNAL_LIMIT,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (crawlPages.length > 0 || technicalChecks.length > 0) {
|
||||
designEvidence = true;
|
||||
}
|
||||
if (
|
||||
crawlPages.some(
|
||||
(page) =>
|
||||
page.pageKind === "contact" ||
|
||||
page.pageKind === "impressum" ||
|
||||
page.pageKind === "services",
|
||||
)
|
||||
) {
|
||||
seoEvidence = true;
|
||||
uxEvidence = true;
|
||||
}
|
||||
|
||||
return {
|
||||
ux: uxSignals,
|
||||
content: contentSignals,
|
||||
technical: technicalSignals,
|
||||
evidenceText: {
|
||||
design: designEvidence,
|
||||
ux: uxEvidence,
|
||||
copy: copyEvidence,
|
||||
seo: seoEvidence,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function extractSkills(
|
||||
skillRegistry: readonly SkillRegistryEntryEvidence[],
|
||||
evidence: {
|
||||
design: boolean;
|
||||
ux: boolean;
|
||||
copy: boolean;
|
||||
seo: boolean;
|
||||
marketing: boolean;
|
||||
offer: boolean;
|
||||
},
|
||||
): AuditUsedSkill[] {
|
||||
const selected: AuditUsedSkill[] = [];
|
||||
const categoryOrder = ["design", "ux", "copy", "seo", "marketing", "offer"] as const;
|
||||
const evidenceText = {
|
||||
design:
|
||||
"visuale layout seite struktur design hierarchie conversion",
|
||||
ux:
|
||||
"kontakt formular cta nutzer flow conversion pfad",
|
||||
copy:
|
||||
"text klarheit copy headline ton local",
|
||||
seo: "local auffindbarkeit meta seo impressum kontakt",
|
||||
marketing: "positionierung unterscheidung angebot",
|
||||
offer: "angebot text preis rahmen",
|
||||
};
|
||||
|
||||
for (const category of categoryOrder) {
|
||||
if (!evidence[category]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const match = selectTopSkill(
|
||||
skillRegistry,
|
||||
category,
|
||||
evidenceText[category]!,
|
||||
);
|
||||
|
||||
if (match) {
|
||||
selected.push(match);
|
||||
}
|
||||
}
|
||||
|
||||
if (selected.length > SELECTED_SKILLS_LIMIT) {
|
||||
selected.length = SELECTED_SKILLS_LIMIT;
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
export function buildAuditEvidenceInput(
|
||||
args: AuditEvidenceInputArgs,
|
||||
): AuditEvidenceInput {
|
||||
const lead = args.lead ?? {};
|
||||
const crawlPages = args.crawlPages ?? [];
|
||||
const technicalChecks = args.technicalChecks ?? [];
|
||||
const screenshots = args.screenshots ?? [];
|
||||
const pageSpeedInputs = args.pageSpeedInputs ?? [];
|
||||
const skillRegistry = args.skillRegistry ?? [];
|
||||
|
||||
const companyContext: string[] = [];
|
||||
const checkedPages: string[] = [];
|
||||
const screenshotReferences = screenshots
|
||||
.slice(0, SCREENSHOT_REFERENCE_LIMIT)
|
||||
.map((screenshot) => ({
|
||||
storageId: screenshot.storageId,
|
||||
sourceUrl: screenshot.sourceUrl,
|
||||
viewport: screenshot.viewport,
|
||||
width: screenshot.width,
|
||||
height: screenshot.height,
|
||||
mimeType: screenshot.mimeType,
|
||||
capturedAt: screenshot.capturedAt,
|
||||
}));
|
||||
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Firma: ${lead.companyName ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(companyContext, `Sparte: ${lead.niche ?? ""}`, COMPANY_CONTEXT_LIMIT);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Ort: ${lead.city ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Adresse: ${lead.address ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Domain: ${lead.websiteDomain ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Kontaktperson: ${lead.contactPerson ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Telefon: ${lead.phone ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
addUniqueCapped(
|
||||
companyContext,
|
||||
`Website: ${lead.websiteUrl ?? ""}`,
|
||||
COMPANY_CONTEXT_LIMIT,
|
||||
);
|
||||
|
||||
for (const page of crawlPages) {
|
||||
const safePath = toSafePath(page.finalUrl ?? page.sourceUrl ?? "");
|
||||
const title = sanitizeCustomerText(page.title ?? "", 90);
|
||||
|
||||
const label = compactLabelForPage(
|
||||
page.pageKind ?? "Seite",
|
||||
title || safePath,
|
||||
);
|
||||
if (!label || label === page.pageKind) {
|
||||
continue;
|
||||
}
|
||||
|
||||
addUniqueCapped(checkedPages, label, CHECKED_PAGES_LIMIT);
|
||||
}
|
||||
|
||||
if (checkedPages.length === 0 && lead.companyName) {
|
||||
addUniqueCapped(
|
||||
checkedPages,
|
||||
`Website-Startseite analysiert: ${lead.companyName}`,
|
||||
CHECKED_PAGES_LIMIT,
|
||||
);
|
||||
}
|
||||
|
||||
const signals = buildObservedSignals(crawlPages, technicalChecks);
|
||||
const pageSpeedInputsOutput = buildPageSpeedAuditInputs(pageSpeedInputs);
|
||||
const pageSpeedCustomerImplications: string[] = [];
|
||||
|
||||
for (const implication of pageSpeedInputsOutput.customerImplications) {
|
||||
addUniqueCapped(
|
||||
pageSpeedCustomerImplications,
|
||||
implication,
|
||||
PAGESPEED_SIGNAL_LIMIT,
|
||||
sanitizeCustomerText,
|
||||
);
|
||||
}
|
||||
|
||||
const selectedSkills = extractSkills(skillRegistry, {
|
||||
...signals.evidenceText,
|
||||
marketing: false,
|
||||
offer: false,
|
||||
});
|
||||
|
||||
return {
|
||||
companyContext,
|
||||
checkedPages,
|
||||
observedUxSignals: signals.ux,
|
||||
observedContentSignals: signals.content,
|
||||
observedTechnicalSignals: signals.technical,
|
||||
screenshotReferences: screenshotReferences.map((reference) => ({
|
||||
...reference,
|
||||
width: Math.max(reference.width, 0),
|
||||
height: Math.max(reference.height, 0),
|
||||
capturedAt: Number(reference.capturedAt),
|
||||
})),
|
||||
pageSpeedCustomerImplications: pageSpeedCustomerImplications.slice(
|
||||
0,
|
||||
PAGESPEED_SIGNAL_LIMIT,
|
||||
),
|
||||
selectedSkills,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user