Files
pitchfast/lib/ai/audit-evidence.ts

692 lines
18 KiB
TypeScript

import {
type SkillRegistryEntry,
toAuditUsedSkill,
type AuditUsedSkill,
} from "../skills-registry";
import {
buildPageSpeedAuditInputs,
type PageSpeedMinimalAuditResult,
} from "../pagespeed-audit-input";
export type SkillRegistryEntryEvidence = SkillRegistryEntry;
export type AuditLeadEvidence = {
companyName?: string | null;
niche?: string | null;
city?: string | null;
websiteDomain?: string | null;
websiteUrl?: string | null;
address?: string | null;
phone?: string | null;
contactPerson?: string | null;
};
export type AuditCrawlPageEvidence = {
sourceUrl?: string | null;
finalUrl?: string | null;
title?: string | null;
metaDescription?: string | null;
pageKind?: string | null;
hasContactFormSignal?: boolean;
hasContactCtaSignal?: boolean;
visibleText?: string | null;
visibleTextExcerpt?: string | null;
};
export type AuditTechnicalCheckEvidence = {
sourceUrl?: string | null;
finalUrl?: string | null;
usesHttps?: boolean;
missingTitle?: boolean;
missingMetaDescription?: boolean;
hasVisibleContactPath?: boolean;
brokenInternalLinkCount?: number;
};
export type AuditScreenshotEvidence = {
storageId: string;
viewport: string;
sourceUrl: string;
capturedAt: number;
width: number;
height: number;
mimeType: string;
[key: string]: unknown;
};
export type AuditEvidenceInput = {
companyContext: string[];
checkedPages: string[];
observedUxSignals: string[];
observedContentSignals: string[];
observedTechnicalSignals: string[];
externalMarkdown?: string;
screenshotReferences: Array<{
storageId: string;
sourceUrl: string;
viewport: string;
width: number;
height: number;
mimeType: string;
capturedAt: number;
}>;
pageSpeedCustomerImplications: string[];
selectedSkills: AuditUsedSkill[];
};
export type AuditEvidenceInputArgs = {
lead?: AuditLeadEvidence;
crawlPages?: readonly AuditCrawlPageEvidence[];
technicalChecks?: readonly AuditTechnicalCheckEvidence[];
screenshots?: readonly AuditScreenshotEvidence[];
pageSpeedInputs?: readonly PageSpeedMinimalAuditResult[];
skillRegistry?: readonly SkillRegistryEntryEvidence[];
externalMarkdown?: string;
};
const COMPANY_CONTEXT_LIMIT = 8;
const CHECKED_PAGES_LIMIT = 8;
const UX_SIGNAL_LIMIT = 6;
const CONTENT_SIGNAL_LIMIT = 6;
const TECHNICAL_SIGNAL_LIMIT = 6;
const PAGESPEED_SIGNAL_LIMIT = 8;
const SCREENSHOT_REFERENCE_LIMIT = 8;
const SELECTED_SKILLS_LIMIT = 6;
const EXTERNAL_MARKDOWN_LIMIT = 4_000;
const V3_LOCAL_AUDIT_PRIORITY = new Map(
[
"visual-design",
"contact-conversion",
"local-seo-basics",
"performance-experience",
"mobile-usability",
"conversion-copy",
"first-impression-clarity",
"trust-signals",
"accessibility-basics",
].map((id, index) => [id, index] as const),
);
const URL_PATTERN = /\bhttps?:\/\/[^\s<>"']+/i;
const JSON_BRACKET_PATTERN = /\{[^}]*\}|\[[^\]]*\]/;
const PAGESPEED_NOISE_PATTERN =
/\b(?:raw\s*storage\s*id|rawstorageid|lighthouse|pagespeed|score)\b/i;
const MACHINE_TOKEN_PATTERN = /\b[a-z\d_-]{24,}\b/i;
function trimAndNormalize(input: unknown): string {
if (typeof input !== "string") {
return "";
}
return input.replace(/\s+/g, " ").trim();
}
function sanitizeCustomerText(value: unknown, maxLength = 180): string {
let text = trimAndNormalize(value);
if (!text) {
return "";
}
text = text.replace(/<[^>]*>/g, " ");
text = text.replace(/\s{2,}/g, " ").trim();
if (URL_PATTERN.test(text)) {
return "";
}
if (JSON_BRACKET_PATTERN.test(text)) {
return "";
}
if (PAGESPEED_NOISE_PATTERN.test(text)) {
return "";
}
if (MACHINE_TOKEN_PATTERN.test(text)) {
return "";
}
if (text.length > maxLength) {
return "";
}
if (!/[a-zäöüß]/i.test(text)) {
return "";
}
return text;
}
function sanitizeExternalMarkdown(value: unknown): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const markdown = value.replace(/\s+/g, " ").trim();
if (!markdown) {
return undefined;
}
return markdown.slice(0, EXTERNAL_MARKDOWN_LIMIT);
}
function addUniqueCapped(
bucket: string[],
input: string,
max: number,
sanitizer = sanitizeCustomerText,
): void {
const candidate = sanitizer(input);
if (!candidate) {
return;
}
const normalized = candidate.toLowerCase();
const alreadyThere = bucket.some((line) => line.toLowerCase() === normalized);
if (!alreadyThere && bucket.length < max) {
bucket.push(candidate);
}
}
function compactPath(urlLike: string): string {
try {
const parsed = new URL(urlLike);
const normalizedPath = (parsed.pathname || "/").replace(/\/+/g, "/").trim();
if (!normalizedPath || normalizedPath === "/") {
return "Startseite";
}
return normalizedPath.replace(/^\//, "").slice(0, 70);
} catch {
return "";
}
}
function compactLabelForPage(pageKind: string, pageLabel: string): string {
if (pageLabel.length > 100) {
return pageLabel.slice(0, 100);
}
if (pageKind) {
return `${pageKind}: ${pageLabel}`;
}
return pageLabel;
}
function toSafePath(url: string | null | undefined): string {
if (!url) {
return "";
}
return compactPath(url);
}
function selectTopSkill(
skills: readonly SkillRegistryEntryEvidence[],
category: string,
evidenceText: string,
): AuditUsedSkill | null {
const evidenceTokens = evidenceText
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 3);
if (evidenceTokens.length === 0) {
return null;
}
const candidates = skills.filter((skill) => skill.category === category);
if (candidates.length === 0) {
return null;
}
const scored = candidates.map((candidate) => {
const whenToUseText = candidate.whenToUse.toLowerCase();
const matchCount = evidenceTokens.filter((token) =>
whenToUseText.includes(token),
).length;
const score = 1 + Math.min(matchCount, 5) + (candidate.version ? 0.1 : 0);
return {
candidate,
score,
name: candidate.name.toLowerCase(),
};
});
scored.sort((a, b) => {
if (b.score !== a.score) {
return b.score - a.score;
}
return a.name.localeCompare(b.name);
});
return toAuditUsedSkill(scored[0]!.candidate);
}
type SkillInputAvailability = {
websiteExists: boolean;
hasDesktopScreenshot: boolean;
hasMobileScreenshot: boolean;
hasMarkdown: boolean;
hasPageSpeed: boolean;
hasDom: boolean;
};
function hasRequiredV3Input(input: string, availability: SkillInputAvailability) {
switch (input) {
case "desktop_screenshot":
return availability.hasDesktopScreenshot;
case "mobile_screenshot":
return availability.hasMobileScreenshot;
case "markdown":
return availability.hasMarkdown;
case "pagespeed":
return availability.hasPageSpeed;
case "dom":
return availability.hasDom;
default:
return false;
}
}
function v3SkillApplies(
skill: SkillRegistryEntryEvidence,
availability: SkillInputAvailability,
) {
const appliesWhen = skill.appliesWhen ?? "website_exists";
const applies =
appliesWhen === "always" ||
(appliesWhen === "website_exists" && availability.websiteExists) ||
(appliesWhen === "has_mobile_screenshot" &&
availability.hasMobileScreenshot) ||
(appliesWhen === "has_pagespeed" && availability.hasPageSpeed);
if (!applies) {
return false;
}
return (skill.inputs ?? []).every((input) =>
hasRequiredV3Input(input, availability),
);
}
function selectV3Skills(
skillRegistry: readonly SkillRegistryEntryEvidence[],
availability: SkillInputAvailability,
) {
return skillRegistry
.map((skill, registryIndex) => ({ skill, registryIndex }))
.filter(({ skill }) => skill.id && !skill.category)
.filter(({ skill }) => v3SkillApplies(skill, availability))
.sort((a, b) => {
// Keep core local-audit coverage inside the cap; otherwise preserve registry order.
const aPriority = V3_LOCAL_AUDIT_PRIORITY.get(a.skill.id ?? "");
const bPriority = V3_LOCAL_AUDIT_PRIORITY.get(b.skill.id ?? "");
if (aPriority !== undefined || bPriority !== undefined) {
return (
(aPriority ?? Number.POSITIVE_INFINITY) -
(bPriority ?? Number.POSITIVE_INFINITY)
);
}
return a.registryIndex - b.registryIndex;
})
.slice(0, SELECTED_SKILLS_LIMIT)
.map(({ skill }) => toAuditUsedSkill(skill));
}
function buildObservedSignals(
crawlPages: readonly AuditCrawlPageEvidence[],
technicalChecks: readonly AuditTechnicalCheckEvidence[],
): {
ux: string[];
content: string[];
technical: string[];
evidenceText: {
design: boolean;
ux: boolean;
copy: boolean;
seo: boolean;
};
} {
const uxSignals: string[] = [];
const contentSignals: string[] = [];
const technicalSignals: string[] = [];
let designEvidence = false;
let uxEvidence = false;
let copyEvidence = false;
let seoEvidence = false;
for (const page of crawlPages) {
const title = trimAndNormalize(page.title ?? "");
if (title) {
if (title.length > 4) {
copyEvidence = true;
addUniqueCapped(
contentSignals,
`Seitentitel wurde erfasst: ${title}`,
CONTENT_SIGNAL_LIMIT,
(value) => sanitizeCustomerText(value, 150),
);
}
}
if (page.hasContactFormSignal) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein Kontaktformular wurde als potenzieller Einstiegspunkt erkannt.",
UX_SIGNAL_LIMIT,
);
}
if (page.hasContactCtaSignal) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein klarer Call-to-Action scheint auf der Seite aktiv zu sein.",
UX_SIGNAL_LIMIT,
);
}
if (page.visibleText || page.visibleTextExcerpt) {
copyEvidence = true;
addUniqueCapped(
contentSignals,
"Sichtbarer Text wurde in der Crawl-Auswertung extrahiert.",
CONTENT_SIGNAL_LIMIT,
);
}
}
for (const check of technicalChecks) {
if (check.usesHttps === false) {
uxEvidence = true;
addUniqueCapped(
technicalSignals,
"Ein Teil der Seiten ist nicht per HTTPS erreichbar.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
uxSignals,
"Die sichere Übertragung der Seite ist nicht durchgängig verifiziert.",
UX_SIGNAL_LIMIT,
);
}
if (check.missingMetaDescription) {
seoEvidence = true;
addUniqueCapped(
technicalSignals,
"Fehlende Meta-Beschreibungen können die Auffindbarkeit schwächen.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
contentSignals,
"Meta-Informationen sind teilweise nicht vollständig vorhanden.",
CONTENT_SIGNAL_LIMIT,
);
}
if (check.missingTitle) {
seoEvidence = true;
addUniqueCapped(
technicalSignals,
"Einige Seiten besitzen keinen aussagekräftigen Titel.",
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
contentSignals,
"Seitentitel fehlen auf ausgewählten Seiten.",
CONTENT_SIGNAL_LIMIT,
);
}
if (check.hasVisibleContactPath) {
uxEvidence = true;
addUniqueCapped(
uxSignals,
"Ein klarer Kontaktpfad scheint bereits vorhanden zu sein.",
UX_SIGNAL_LIMIT,
);
}
const brokenLinks = check.brokenInternalLinkCount ?? 0;
if (brokenLinks > 0) {
addUniqueCapped(
technicalSignals,
`Es wurden ${Math.min(brokenLinks, 10)} interne Verlinkungen mit Fehlerstatus erkannt.`,
TECHNICAL_SIGNAL_LIMIT,
);
addUniqueCapped(
uxSignals,
"Nutzer könnten durch interne Linkfehler im Fluss abbrechen.",
UX_SIGNAL_LIMIT,
);
}
}
if (crawlPages.length > 0 || technicalChecks.length > 0) {
designEvidence = true;
}
if (
crawlPages.some(
(page) =>
page.pageKind === "contact" ||
page.pageKind === "impressum" ||
page.pageKind === "services",
)
) {
seoEvidence = true;
uxEvidence = true;
}
return {
ux: uxSignals,
content: contentSignals,
technical: technicalSignals,
evidenceText: {
design: designEvidence,
ux: uxEvidence,
copy: copyEvidence,
seo: seoEvidence,
},
};
}
function extractSkills(
skillRegistry: readonly SkillRegistryEntryEvidence[],
evidence: {
design: boolean;
ux: boolean;
copy: boolean;
seo: boolean;
marketing: boolean;
offer: boolean;
},
availability: SkillInputAvailability,
): AuditUsedSkill[] {
const selected: AuditUsedSkill[] = selectV3Skills(
skillRegistry,
availability,
);
const categoryOrder = ["design", "ux", "copy", "seo", "marketing", "offer"] as const;
const evidenceText = {
design:
"visuale layout seite struktur design hierarchie conversion",
ux:
"kontakt formular cta nutzer flow conversion pfad",
copy:
"text klarheit copy headline ton local",
seo: "local auffindbarkeit meta seo impressum kontakt",
marketing: "positionierung unterscheidung angebot",
offer: "angebot text preis rahmen",
};
for (const category of categoryOrder) {
if (!evidence[category]) {
continue;
}
const match = selectTopSkill(
skillRegistry,
category,
evidenceText[category]!,
);
if (match) {
selected.push(match);
}
}
if (selected.length > SELECTED_SKILLS_LIMIT) {
selected.length = SELECTED_SKILLS_LIMIT;
}
return selected;
}
export function buildAuditEvidenceInput(
args: AuditEvidenceInputArgs,
): AuditEvidenceInput {
const lead = args.lead ?? {};
const crawlPages = args.crawlPages ?? [];
const technicalChecks = args.technicalChecks ?? [];
const screenshots = args.screenshots ?? [];
const pageSpeedInputs = args.pageSpeedInputs ?? [];
const skillRegistry = args.skillRegistry ?? [];
const externalMarkdown = sanitizeExternalMarkdown(args.externalMarkdown);
const companyContext: string[] = [];
const checkedPages: string[] = [];
const screenshotReferences = screenshots
.slice(0, SCREENSHOT_REFERENCE_LIMIT)
.map((screenshot) => ({
storageId: screenshot.storageId,
sourceUrl: screenshot.sourceUrl,
viewport: screenshot.viewport,
width: screenshot.width,
height: screenshot.height,
mimeType: screenshot.mimeType,
capturedAt: screenshot.capturedAt,
}));
addUniqueCapped(
companyContext,
`Firma: ${lead.companyName ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(companyContext, `Sparte: ${lead.niche ?? ""}`, COMPANY_CONTEXT_LIMIT);
addUniqueCapped(
companyContext,
`Ort: ${lead.city ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Adresse: ${lead.address ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Domain: ${lead.websiteDomain ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Kontaktperson: ${lead.contactPerson ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Telefon: ${lead.phone ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
addUniqueCapped(
companyContext,
`Website: ${lead.websiteUrl ?? ""}`,
COMPANY_CONTEXT_LIMIT,
);
for (const page of crawlPages) {
const safePath = toSafePath(page.finalUrl ?? page.sourceUrl ?? "");
const title = sanitizeCustomerText(page.title ?? "", 90);
const label = compactLabelForPage(
page.pageKind ?? "Seite",
title || safePath,
);
if (!label || label === page.pageKind) {
continue;
}
addUniqueCapped(checkedPages, label, CHECKED_PAGES_LIMIT);
}
if (checkedPages.length === 0 && lead.companyName) {
addUniqueCapped(
checkedPages,
`Website-Startseite analysiert: ${lead.companyName}`,
CHECKED_PAGES_LIMIT,
);
}
const signals = buildObservedSignals(crawlPages, technicalChecks);
const pageSpeedInputsOutput = buildPageSpeedAuditInputs(pageSpeedInputs);
const pageSpeedCustomerImplications: string[] = [];
for (const implication of pageSpeedInputsOutput.customerImplications) {
addUniqueCapped(
pageSpeedCustomerImplications,
implication,
PAGESPEED_SIGNAL_LIMIT,
sanitizeCustomerText,
);
}
const selectedSkills = extractSkills(skillRegistry, {
...signals.evidenceText,
marketing: false,
offer: false,
}, {
websiteExists:
Boolean(lead.websiteDomain || lead.websiteUrl) ||
crawlPages.length > 0 ||
screenshots.length > 0,
hasDesktopScreenshot: screenshots.some(
(screenshot) => screenshot.viewport === "desktop",
),
hasMobileScreenshot: screenshots.some(
(screenshot) => screenshot.viewport === "mobile",
),
hasMarkdown:
Boolean(externalMarkdown) ||
crawlPages.some((page) =>
Boolean(page.visibleText || page.visibleTextExcerpt),
),
hasPageSpeed:
pageSpeedInputsOutput.customerImplications.length > 0 ||
pageSpeedInputs.some((input) => input.status === "succeeded"),
hasDom: crawlPages.length > 0 || technicalChecks.length > 0,
});
return {
companyContext,
checkedPages,
observedUxSignals: signals.ux,
observedContentSignals: signals.content,
observedTechnicalSignals: signals.technical,
...(externalMarkdown ? { externalMarkdown } : {}),
screenshotReferences: screenshotReferences.map((reference) => ({
...reference,
width: Math.max(reference.width, 0),
height: Math.max(reference.height, 0),
capturedAt: Number(reference.capturedAt),
})),
pageSpeedCustomerImplications: pageSpeedCustomerImplications.slice(
0,
PAGESPEED_SIGNAL_LIMIT,
),
selectedSkills,
};
}