Files
webdev-pipeline/lib/ai/audit-evidence.ts

566 lines
14 KiB
TypeScript

import {
type SkillRegistryEntry,
toAuditUsedSkill,
type AuditUsedSkill,
} from "../skills-registry";
import {
buildPageSpeedAuditInputs,
type PageSpeedMinimalAuditResult,
} from "../pagespeed-audit-input";
export type SkillRegistryEntryEvidence = SkillRegistryEntry;
export type AuditLeadEvidence = {
companyName?: string | null;
niche?: string | null;
city?: string | null;
websiteDomain?: string | null;
websiteUrl?: string | null;
address?: string | null;
phone?: string | null;
contactPerson?: string | null;
};
export type AuditCrawlPageEvidence = {
sourceUrl?: string | null;
finalUrl?: string | null;
title?: string | null;
metaDescription?: string | null;
pageKind?: string | null;
hasContactFormSignal?: boolean;
hasContactCtaSignal?: boolean;
visibleText?: string | null;
visibleTextExcerpt?: string | null;
};
export type AuditTechnicalCheckEvidence = {
sourceUrl?: string | null;
finalUrl?: string | null;
usesHttps?: boolean;
missingTitle?: boolean;
missingMetaDescription?: boolean;
hasVisibleContactPath?: boolean;
brokenInternalLinkCount?: number;
};
export type AuditScreenshotEvidence = {
storageId: string;
viewport: string;
sourceUrl: string;
capturedAt: number;
width: number;
height: number;
mimeType: string;
[key: string]: unknown;
};
export type AuditEvidenceInput = {
companyContext: string[];
checkedPages: string[];
observedUxSignals: string[];
observedContentSignals: string[];
observedTechnicalSignals: string[];
screenshotReferences: Array<{
storageId: string;
sourceUrl: string;
viewport: string;
width: number;
height: number;
mimeType: string;
capturedAt: number;
}>;
pageSpeedCustomerImplications: string[];
selectedSkills: AuditUsedSkill[];
};
export type AuditEvidenceInputArgs = {
lead?: AuditLeadEvidence;
crawlPages?: readonly AuditCrawlPageEvidence[];
technicalChecks?: readonly AuditTechnicalCheckEvidence[];
screenshots?: readonly AuditScreenshotEvidence[];
pageSpeedInputs?: readonly PageSpeedMinimalAuditResult[];
skillRegistry?: readonly SkillRegistryEntryEvidence[];
};
const COMPANY_CONTEXT_LIMIT = 8;
const CHECKED_PAGES_LIMIT = 8;
const UX_SIGNAL_LIMIT = 6;
const CONTENT_SIGNAL_LIMIT = 6;
const TECHNICAL_SIGNAL_LIMIT = 6;
const PAGESPEED_SIGNAL_LIMIT = 8;
const SCREENSHOT_REFERENCE_LIMIT = 8;
const SELECTED_SKILLS_LIMIT = 6;
const URL_PATTERN = /\bhttps?:\/\/[^\s<>"']+/i;
const JSON_BRACKET_PATTERN = /\{[^}]*\}|\[[^\]]*\]/;
const PAGESPEED_NOISE_PATTERN =
/\b(?:raw\s*storage\s*id|rawstorageid|lighthouse|pagespeed|score)\b/i;
const MACHINE_TOKEN_PATTERN = /\b[a-z\d_-]{24,}\b/i;
function trimAndNormalize(input: unknown): string {
if (typeof input !== "string") {
return "";
}
return input.replace(/\s+/g, " ").trim();
}
function sanitizeCustomerText(value: unknown, maxLength = 180): string {
let text = trimAndNormalize(value);
if (!text) {
return "";
}
text = text.replace(/<[^>]*>/g, " ");
text = text.replace(/\s{2,}/g, " ").trim();
if (URL_PATTERN.test(text)) {
return "";
}
if (JSON_BRACKET_PATTERN.test(text)) {
return "";
}
if (PAGESPEED_NOISE_PATTERN.test(text)) {
return "";
}
if (MACHINE_TOKEN_PATTERN.test(text)) {
return "";
}
if (text.length > maxLength) {
return "";
}
if (!/[a-zäöüß]/i.test(text)) {
return "";
}
return text;
}
function addUniqueCapped(
bucket: string[],
input: string,
max: number,
sanitizer = sanitizeCustomerText,
): void {
const candidate = sanitizer(input);
if (!candidate) {
return;
}
const normalized = candidate.toLowerCase();
const alreadyThere = bucket.some((line) => line.toLowerCase() === normalized);
if (!alreadyThere && bucket.length < max) {
bucket.push(candidate);
}
}
function compactPath(urlLike: string): string {
try {
const parsed = new URL(urlLike);
const normalizedPath = (parsed.pathname || "/").replace(/\/+/g, "/").trim();
if (!normalizedPath || normalizedPath === "/") {
return "Startseite";
}
return normalizedPath.replace(/^\//, "").slice(0, 70);
} catch {
return "";
}
}
function compactLabelForPage(pageKind: string, pageLabel: string): string {
if (pageLabel.length > 100) {
return pageLabel.slice(0, 100);
}
if (pageKind) {
return `${pageKind}: ${pageLabel}`;
}
return pageLabel;
}
function toSafePath(url: string | null | undefined): string {
if (!url) {
return "";
}
return compactPath(url);
}
function selectTopSkill(
skills: readonly SkillRegistryEntryEvidence[],
category: string,
evidenceText: string,
): AuditUsedSkill | null {
const evidenceTokens = evidenceText
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 3);
if (evidenceTokens.length === 0) {
return null;
}
const candidates = skills.filter((skill) => skill.category === category);
if (candidates.length === 0) {
return null;
}
const scored = candidates.map((candidate) => {
const whenToUseText = candidate.whenToUse.toLowerCase();
const matchCount = evidenceTokens.filter((token) =>
whenToUseText.includes(token),
).length;
const score = 1 + Math.min(matchCount, 5) + (candidate.version ? 0.1 : 0);
return {
candidate,
score,
name: candidate.name.toLowerCase(),
};
});
scored.sort((a, b) => {
if (b.score !== a.score) {
return b.score - a.score;
}
return a.name.localeCompare(b.name);
});
return toAuditUsedSkill(scored[0]!.candidate);
}
function buildObservedSignals(
crawlPages: readonly AuditCrawlPageEvidence[],
technicalChecks: readonly AuditTechnicalCheckEvidence[],
): {
ux: string[];
content: string[];
technical: string[];
evidenceText: {
design: boolean;
ux: boolean;
copy: boolean;
seo: boolean;
};
} {
const uxSignals: string[] = [];
const contentSignals: string[] = [];
const technicalSignals: string[] = [];
let designEvidence = false;
let uxEvidence = false;
let copyEvidence = false;
let seoEvidence = false;
for (const page of crawlPages) {
const title = trimAndNormalize(page.title ?? "");
if (title) {
if (title.length > 4) {
copyEvidence = true;
addUniqueCapped(
contentSignals,
`Seitentitel wurde erfasst: ${title}`,
CONTENT_SIGNAL_LIMIT,
(value) => sanitizeCustomerText(value, 150),
);
}
}
if (page.hasContactFormSignal) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein Kontaktformular wurde als potenzieller Einstiegspunkt erkannt.",
UX_SIGNAL_LIMIT,
);
}
if (page.hasContactCtaSignal) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein klarer Call-to-Action scheint auf der Seite aktiv zu sein.",
UX_SIGNAL_LIMIT,
);
}
if (page.visibleText || page.visibleTextExcerpt) {
copyEvidence = true;
addUniqueCapped(
contentSignals,
"Sichtbarer Text wurde in der Crawl-Auswertung extrahiert.",
CONTENT_SIGNAL_LIMIT,
);
}
}
for (const check of technicalChecks) {
if (check.usesHttps === false) {
uxEvidence = true;
addUniqueCapped(
technicalSignals,
"Ein Teil der Seiten ist nicht per HTTPS erreichbar.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
uxSignals,
"Die sichere Übertragung der Seite ist nicht durchgängig verifiziert.",
UX_SIGNAL_LIMIT,
);
}
if (check.missingMetaDescription) {
seoEvidence = true;
addUniqueCapped(
technicalSignals,
"Fehlende Meta-Beschreibungen können die Auffindbarkeit schwächen.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
contentSignals,
"Meta-Informationen sind teilweise nicht vollständig vorhanden.",
CONTENT_SIGNAL_LIMIT,
);
}
if (check.missingTitle) {
seoEvidence = true;
addUniqueCapped(
technicalSignals,
"Einige Seiten besitzen keinen aussagekräftigen Titel.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
contentSignals,
"Seitentitel fehlen auf ausgewählten Seiten.",
CONTENT_SIGNAL_LIMIT,
);
}
if (check.hasVisibleContactPath) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein klarer Kontaktpfad scheint bereits vorhanden zu sein.",
UX_SIGNAL_LIMIT,
);
}
const brokenLinks = check.brokenInternalLinkCount ?? 0;
if (brokenLinks > 0) {
addUniqueCapped(
technicalSignals,
`Es wurden ${Math.min(brokenLinks, 10)} interne Verlinkungen mit Fehlerstatus erkannt.`,
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
uxSignals,
"Nutzer könnten durch interne Linkfehler im Fluss abbrechen.",
UX_SIGNAL_LIMIT,
);
}
}
if (crawlPages.length > 0 || technicalChecks.length > 0) {
designEvidence = true;
}
if (
crawlPages.some(
(page) =>
page.pageKind === "contact" ||
page.pageKind === "impressum" ||
page.pageKind === "services",
)
) {
seoEvidence = true;
uxEvidence = true;
}
return {
ux: uxSignals,
content: contentSignals,
technical: technicalSignals,
evidenceText: {
design: designEvidence,
ux: uxEvidence,
copy: copyEvidence,
seo: seoEvidence,
},
};
}
function extractSkills(
skillRegistry: readonly SkillRegistryEntryEvidence[],
evidence: {
design: boolean;
ux: boolean;
copy: boolean;
seo: boolean;
marketing: boolean;
offer: boolean;
},
): AuditUsedSkill[] {
const selected: AuditUsedSkill[] = [];
const categoryOrder = ["design", "ux", "copy", "seo", "marketing", "offer"] as const;
const evidenceText = {
design:
"visuale layout seite struktur design hierarchie conversion",
ux:
"kontakt formular cta nutzer flow conversion pfad",
copy:
"text klarheit copy headline ton local",
seo: "local auffindbarkeit meta seo impressum kontakt",
marketing: "positionierung unterscheidung angebot",
offer: "angebot text preis rahmen",
};
for (const category of categoryOrder) {
if (!evidence[category]) {
continue;
}
const match = selectTopSkill(
skillRegistry,
category,
evidenceText[category]!,
);
if (match) {
selected.push(match);
}
}
if (selected.length > SELECTED_SKILLS_LIMIT) {
selected.length = SELECTED_SKILLS_LIMIT;
}
return selected;
}
export function buildAuditEvidenceInput(
args: AuditEvidenceInputArgs,
): AuditEvidenceInput {
const lead = args.lead ?? {};
const crawlPages = args.crawlPages ?? [];
const technicalChecks = args.technicalChecks ?? [];
const screenshots = args.screenshots ?? [];
const pageSpeedInputs = args.pageSpeedInputs ?? [];
const skillRegistry = args.skillRegistry ?? [];
const companyContext: string[] = [];
const checkedPages: string[] = [];
const screenshotReferences = screenshots
.slice(0, SCREENSHOT_REFERENCE_LIMIT)
.map((screenshot) => ({
storageId: screenshot.storageId,
sourceUrl: screenshot.sourceUrl,
viewport: screenshot.viewport,
width: screenshot.width,
height: screenshot.height,
mimeType: screenshot.mimeType,
capturedAt: screenshot.capturedAt,
}));
addUniqueCapped(
companyContext,
`Firma: ${lead.companyName ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(companyContext, `Sparte: ${lead.niche ?? ""}`, COMPANY_CONTEXT_LIMIT);
addUniqueCapped(
companyContext,
`Ort: ${lead.city ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Adresse: ${lead.address ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Domain: ${lead.websiteDomain ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Kontaktperson: ${lead.contactPerson ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Telefon: ${lead.phone ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Website: ${lead.websiteUrl ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
for (const page of crawlPages) {
const safePath = toSafePath(page.finalUrl ?? page.sourceUrl ?? "");
const title = sanitizeCustomerText(page.title ?? "", 90);
const label = compactLabelForPage(
page.pageKind ?? "Seite",
title || safePath,
);
if (!label || label === page.pageKind) {
continue;
}
addUniqueCapped(checkedPages, label, CHECKED_PAGES_LIMIT);
}
if (checkedPages.length === 0 && lead.companyName) {
addUniqueCapped(
checkedPages,
`Website-Startseite analysiert: ${lead.companyName}`,
CHECKED_PAGES_LIMIT,
);
}
const signals = buildObservedSignals(crawlPages, technicalChecks);
const pageSpeedInputsOutput = buildPageSpeedAuditInputs(pageSpeedInputs);
const pageSpeedCustomerImplications: string[] = [];
for (const implication of pageSpeedInputsOutput.customerImplications) {
addUniqueCapped(
pageSpeedCustomerImplications,
implication,
PAGESPEED_SIGNAL_LIMIT,
sanitizeCustomerText,
);
}
const selectedSkills = extractSkills(skillRegistry, {
...signals.evidenceText,
marketing: false,
offer: false,
});
return {
companyContext,
checkedPages,
observedUxSignals: signals.ux,
observedContentSignals: signals.content,
observedTechnicalSignals: signals.technical,
screenshotReferences: screenshotReferences.map((reference) => ({
...reference,
width: Math.max(reference.width, 0),
height: Math.max(reference.height, 0),
capturedAt: Number(reference.capturedAt),
})),
pageSpeedCustomerImplications: pageSpeedCustomerImplications.slice(
0,
PAGESPEED_SIGNAL_LIMIT,
),
selectedSkills,
};
}