import { type SkillRegistryEntry, toAuditUsedSkill, type AuditUsedSkill, } from "../skills-registry"; import { buildPageSpeedAuditInputs, type PageSpeedMinimalAuditResult, } from "../pagespeed-audit-input"; export type SkillRegistryEntryEvidence = SkillRegistryEntry; export type AuditLeadEvidence = { companyName?: string | null; niche?: string | null; city?: string | null; websiteDomain?: string | null; websiteUrl?: string | null; address?: string | null; phone?: string | null; contactPerson?: string | null; }; export type AuditCrawlPageEvidence = { sourceUrl?: string | null; finalUrl?: string | null; title?: string | null; metaDescription?: string | null; pageKind?: string | null; hasContactFormSignal?: boolean; hasContactCtaSignal?: boolean; visibleText?: string | null; visibleTextExcerpt?: string | null; }; export type AuditTechnicalCheckEvidence = { sourceUrl?: string | null; finalUrl?: string | null; usesHttps?: boolean; missingTitle?: boolean; missingMetaDescription?: boolean; hasVisibleContactPath?: boolean; brokenInternalLinkCount?: number; }; export type AuditScreenshotEvidence = { storageId: string; viewport: string; sourceUrl: string; capturedAt: number; width: number; height: number; mimeType: string; [key: string]: unknown; }; export type AuditEvidenceInput = { companyContext: string[]; checkedPages: string[]; observedUxSignals: string[]; observedContentSignals: string[]; observedTechnicalSignals: string[]; externalMarkdown?: string; screenshotReferences: Array<{ storageId: string; sourceUrl: string; viewport: string; width: number; height: number; mimeType: string; capturedAt: number; }>; pageSpeedCustomerImplications: string[]; selectedSkills: AuditUsedSkill[]; }; export type AuditEvidenceInputArgs = { lead?: AuditLeadEvidence; crawlPages?: readonly AuditCrawlPageEvidence[]; technicalChecks?: readonly AuditTechnicalCheckEvidence[]; screenshots?: readonly AuditScreenshotEvidence[]; pageSpeedInputs?: readonly PageSpeedMinimalAuditResult[]; skillRegistry?: readonly SkillRegistryEntryEvidence[]; externalMarkdown?: string; }; const COMPANY_CONTEXT_LIMIT = 8; const CHECKED_PAGES_LIMIT = 8; const UX_SIGNAL_LIMIT = 6; const CONTENT_SIGNAL_LIMIT = 6; const TECHNICAL_SIGNAL_LIMIT = 6; const PAGESPEED_SIGNAL_LIMIT = 8; const SCREENSHOT_REFERENCE_LIMIT = 8; const SELECTED_SKILLS_LIMIT = 6; const EXTERNAL_MARKDOWN_LIMIT = 4_000; const V3_LOCAL_AUDIT_PRIORITY = new Map( [ "visual-design", "contact-conversion", "local-seo-basics", "performance-experience", "mobile-usability", "conversion-copy", "first-impression-clarity", "trust-signals", "accessibility-basics", ].map((id, index) => [id, index] as const), ); const URL_PATTERN = /\bhttps?:\/\/[^\s<>"']+/i; const JSON_BRACKET_PATTERN = /\{[^}]*\}|\[[^\]]*\]/; const PAGESPEED_NOISE_PATTERN = /\b(?:raw\s*storage\s*id|rawstorageid|lighthouse|pagespeed|score)\b/i; const MACHINE_TOKEN_PATTERN = /\b[a-z\d_-]{24,}\b/i; function trimAndNormalize(input: unknown): string { if (typeof input !== "string") { return ""; } return input.replace(/\s+/g, " ").trim(); } function sanitizeCustomerText(value: unknown, maxLength = 180): string { let text = trimAndNormalize(value); if (!text) { return ""; } text = text.replace(/<[^>]*>/g, " "); text = text.replace(/\s{2,}/g, " ").trim(); if (URL_PATTERN.test(text)) { return ""; } if (JSON_BRACKET_PATTERN.test(text)) { return ""; } if (PAGESPEED_NOISE_PATTERN.test(text)) { return ""; } if (MACHINE_TOKEN_PATTERN.test(text)) { return ""; } if (text.length > maxLength) { return ""; } if (!/[a-zäöüß]/i.test(text)) { return ""; } return text; } function sanitizeExternalMarkdown(value: unknown): string | undefined { if (typeof value !== "string") { return undefined; } const markdown = value.replace(/\s+/g, " ").trim(); if (!markdown) { return undefined; } return markdown.slice(0, EXTERNAL_MARKDOWN_LIMIT); } function addUniqueCapped( bucket: string[], input: string, max: number, sanitizer = sanitizeCustomerText, ): void { const candidate = sanitizer(input); if (!candidate) { return; } const normalized = candidate.toLowerCase(); const alreadyThere = bucket.some((line) => line.toLowerCase() === normalized); if (!alreadyThere && bucket.length < max) { bucket.push(candidate); } } function compactPath(urlLike: string): string { try { const parsed = new URL(urlLike); const normalizedPath = (parsed.pathname || "/").replace(/\/+/g, "/").trim(); if (!normalizedPath || normalizedPath === "/") { return "Startseite"; } return normalizedPath.replace(/^\//, "").slice(0, 70); } catch { return ""; } } function compactLabelForPage(pageKind: string, pageLabel: string): string { if (pageLabel.length > 100) { return pageLabel.slice(0, 100); } if (pageKind) { return `${pageKind}: ${pageLabel}`; } return pageLabel; } function toSafePath(url: string | null | undefined): string { if (!url) { return ""; } return compactPath(url); } function selectTopSkill( skills: readonly SkillRegistryEntryEvidence[], category: string, evidenceText: string, ): AuditUsedSkill | null { const evidenceTokens = evidenceText .toLowerCase() .split(/\s+/) .filter((token) => token.length > 3); if (evidenceTokens.length === 0) { return null; } const candidates = skills.filter((skill) => skill.category === category); if (candidates.length === 0) { return null; } const scored = candidates.map((candidate) => { const whenToUseText = candidate.whenToUse.toLowerCase(); const matchCount = evidenceTokens.filter((token) => whenToUseText.includes(token), ).length; const score = 1 + Math.min(matchCount, 5) + (candidate.version ? 0.1 : 0); return { candidate, score, name: candidate.name.toLowerCase(), }; }); scored.sort((a, b) => { if (b.score !== a.score) { return b.score - a.score; } return a.name.localeCompare(b.name); }); return toAuditUsedSkill(scored[0]!.candidate); } type SkillInputAvailability = { websiteExists: boolean; hasDesktopScreenshot: boolean; hasMobileScreenshot: boolean; hasMarkdown: boolean; hasPageSpeed: boolean; hasDom: boolean; }; function hasRequiredV3Input(input: string, availability: SkillInputAvailability) { switch (input) { case "desktop_screenshot": return availability.hasDesktopScreenshot; case "mobile_screenshot": return availability.hasMobileScreenshot; case "markdown": return availability.hasMarkdown; case "pagespeed": return availability.hasPageSpeed; case "dom": return availability.hasDom; default: return false; } } function v3SkillApplies( skill: SkillRegistryEntryEvidence, availability: SkillInputAvailability, ) { const appliesWhen = skill.appliesWhen ?? "website_exists"; const applies = appliesWhen === "always" || (appliesWhen === "website_exists" && availability.websiteExists) || (appliesWhen === "has_mobile_screenshot" && availability.hasMobileScreenshot) || (appliesWhen === "has_pagespeed" && availability.hasPageSpeed); if (!applies) { return false; } return (skill.inputs ?? []).every((input) => hasRequiredV3Input(input, availability), ); } function selectV3Skills( skillRegistry: readonly SkillRegistryEntryEvidence[], availability: SkillInputAvailability, ) { return skillRegistry .map((skill, registryIndex) => ({ skill, registryIndex })) .filter(({ skill }) => skill.id && !skill.category) .filter(({ skill }) => v3SkillApplies(skill, availability)) .sort((a, b) => { // Keep core local-audit coverage inside the cap; otherwise preserve registry order. const aPriority = V3_LOCAL_AUDIT_PRIORITY.get(a.skill.id ?? ""); const bPriority = V3_LOCAL_AUDIT_PRIORITY.get(b.skill.id ?? ""); if (aPriority !== undefined || bPriority !== undefined) { return ( (aPriority ?? Number.POSITIVE_INFINITY) - (bPriority ?? Number.POSITIVE_INFINITY) ); } return a.registryIndex - b.registryIndex; }) .slice(0, SELECTED_SKILLS_LIMIT) .map(({ skill }) => toAuditUsedSkill(skill)); } function buildObservedSignals( crawlPages: readonly AuditCrawlPageEvidence[], technicalChecks: readonly AuditTechnicalCheckEvidence[], ): { ux: string[]; content: string[]; technical: string[]; evidenceText: { design: boolean; ux: boolean; copy: boolean; seo: boolean; }; } { const uxSignals: string[] = []; const contentSignals: string[] = []; const technicalSignals: string[] = []; let designEvidence = false; let uxEvidence = false; let copyEvidence = false; let seoEvidence = false; for (const page of crawlPages) { const title = trimAndNormalize(page.title ?? ""); if (title) { if (title.length > 4) { copyEvidence = true; addUniqueCapped( contentSignals, `Seitentitel wurde erfasst: ${title}`, CONTENT_SIGNAL_LIMIT, (value) => sanitizeCustomerText(value, 150), ); } } if (page.hasContactFormSignal) { uxEvidence = true; addUniqueCapped( uxSignals, "Ein Kontaktformular wurde als potenzieller Einstiegspunkt erkannt.", UX_SIGNAL_LIMIT, ); } if (page.hasContactCtaSignal) { uxEvidence = true; addUniqueCapped( uxSignals, "Ein klarer Call-to-Action scheint auf der Seite aktiv zu sein.", UX_SIGNAL_LIMIT, ); } if (page.visibleText || page.visibleTextExcerpt) { copyEvidence = true; addUniqueCapped( contentSignals, "Sichtbarer Text wurde in der Crawl-Auswertung extrahiert.", CONTENT_SIGNAL_LIMIT, ); } } for (const check of technicalChecks) { if (check.usesHttps === false) { uxEvidence = true; addUniqueCapped( technicalSignals, "Ein Teil der Seiten ist nicht per HTTPS erreichbar.", TECHNICAL_SIGNAL_LIMIT, ); addUniqueCapped( uxSignals, "Die sichere Übertragung der Seite ist nicht durchgängig verifiziert.", UX_SIGNAL_LIMIT, ); } if (check.missingMetaDescription) { seoEvidence = true; addUniqueCapped( technicalSignals, "Fehlende Meta-Beschreibungen können die Auffindbarkeit schwächen.", TECHNICAL_SIGNAL_LIMIT, ); addUniqueCapped( contentSignals, "Meta-Informationen sind teilweise nicht vollständig vorhanden.", CONTENT_SIGNAL_LIMIT, ); } if (check.missingTitle) { seoEvidence = true; addUniqueCapped( technicalSignals, "Einige Seiten besitzen keinen aussagekräftigen Titel.", TECHNICAL_SIGNAL_LIMIT, ); addUniqueCapped( contentSignals, "Seitentitel fehlen auf ausgewählten Seiten.", CONTENT_SIGNAL_LIMIT, ); } if (check.hasVisibleContactPath) { uxEvidence = true; addUniqueCapped( uxSignals, "Ein klarer Kontaktpfad scheint bereits vorhanden zu sein.", UX_SIGNAL_LIMIT, ); } const brokenLinks = check.brokenInternalLinkCount ?? 0; if (brokenLinks > 0) { addUniqueCapped( technicalSignals, `Es wurden ${Math.min(brokenLinks, 10)} interne Verlinkungen mit Fehlerstatus erkannt.`, TECHNICAL_SIGNAL_LIMIT, ); addUniqueCapped( uxSignals, "Nutzer könnten durch interne Linkfehler im Fluss abbrechen.", UX_SIGNAL_LIMIT, ); } } if (crawlPages.length > 0 || technicalChecks.length > 0) { designEvidence = true; } if ( crawlPages.some( (page) => page.pageKind === "contact" || page.pageKind === "impressum" || page.pageKind === "services", ) ) { seoEvidence = true; uxEvidence = true; } return { ux: uxSignals, content: contentSignals, technical: technicalSignals, evidenceText: { design: designEvidence, ux: uxEvidence, copy: copyEvidence, seo: seoEvidence, }, }; } function extractSkills( skillRegistry: readonly SkillRegistryEntryEvidence[], evidence: { design: boolean; ux: boolean; copy: boolean; seo: boolean; marketing: boolean; offer: boolean; }, availability: SkillInputAvailability, ): AuditUsedSkill[] { const selected: AuditUsedSkill[] = selectV3Skills( skillRegistry, availability, ); const categoryOrder = ["design", "ux", "copy", "seo", "marketing", "offer"] as const; const evidenceText = { design: "visuale layout seite struktur design hierarchie conversion", ux: "kontakt formular cta nutzer flow conversion pfad", copy: "text klarheit copy headline ton local", seo: "local auffindbarkeit meta seo impressum kontakt", marketing: "positionierung unterscheidung angebot", offer: "angebot text preis rahmen", }; for (const category of categoryOrder) { if (!evidence[category]) { continue; } const match = selectTopSkill( skillRegistry, category, evidenceText[category]!, ); if (match) { selected.push(match); } } if (selected.length > SELECTED_SKILLS_LIMIT) { selected.length = SELECTED_SKILLS_LIMIT; } return selected; } export function buildAuditEvidenceInput( args: AuditEvidenceInputArgs, ): AuditEvidenceInput { const lead = args.lead ?? {}; const crawlPages = args.crawlPages ?? []; const technicalChecks = args.technicalChecks ?? []; const screenshots = args.screenshots ?? []; const pageSpeedInputs = args.pageSpeedInputs ?? []; const skillRegistry = args.skillRegistry ?? []; const externalMarkdown = sanitizeExternalMarkdown(args.externalMarkdown); const companyContext: string[] = []; const checkedPages: string[] = []; const screenshotReferences = screenshots .slice(0, SCREENSHOT_REFERENCE_LIMIT) .map((screenshot) => ({ storageId: screenshot.storageId, sourceUrl: screenshot.sourceUrl, viewport: screenshot.viewport, width: screenshot.width, height: screenshot.height, mimeType: screenshot.mimeType, capturedAt: screenshot.capturedAt, })); addUniqueCapped( companyContext, `Firma: ${lead.companyName ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped(companyContext, `Sparte: ${lead.niche ?? ""}`, COMPANY_CONTEXT_LIMIT); addUniqueCapped( companyContext, `Ort: ${lead.city ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped( companyContext, `Adresse: ${lead.address ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped( companyContext, `Domain: ${lead.websiteDomain ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped( companyContext, `Kontaktperson: ${lead.contactPerson ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped( companyContext, `Telefon: ${lead.phone ?? ""}`, COMPANY_CONTEXT_LIMIT, ); addUniqueCapped( companyContext, `Website: ${lead.websiteUrl ?? ""}`, COMPANY_CONTEXT_LIMIT, ); for (const page of crawlPages) { const safePath = toSafePath(page.finalUrl ?? page.sourceUrl ?? ""); const title = sanitizeCustomerText(page.title ?? "", 90); const label = compactLabelForPage( page.pageKind ?? "Seite", title || safePath, ); if (!label || label === page.pageKind) { continue; } addUniqueCapped(checkedPages, label, CHECKED_PAGES_LIMIT); } if (checkedPages.length === 0 && lead.companyName) { addUniqueCapped( checkedPages, `Website-Startseite analysiert: ${lead.companyName}`, CHECKED_PAGES_LIMIT, ); } const signals = buildObservedSignals(crawlPages, technicalChecks); const pageSpeedInputsOutput = buildPageSpeedAuditInputs(pageSpeedInputs); const pageSpeedCustomerImplications: string[] = []; for (const implication of pageSpeedInputsOutput.customerImplications) { addUniqueCapped( pageSpeedCustomerImplications, implication, PAGESPEED_SIGNAL_LIMIT, sanitizeCustomerText, ); } const selectedSkills = extractSkills(skillRegistry, { ...signals.evidenceText, marketing: false, offer: false, }, { websiteExists: Boolean(lead.websiteDomain || lead.websiteUrl) || crawlPages.length > 0 || screenshots.length > 0, hasDesktopScreenshot: screenshots.some( (screenshot) => screenshot.viewport === "desktop", ), hasMobileScreenshot: screenshots.some( (screenshot) => screenshot.viewport === "mobile", ), hasMarkdown: Boolean(externalMarkdown) || crawlPages.some((page) => Boolean(page.visibleText || page.visibleTextExcerpt), ), hasPageSpeed: pageSpeedInputsOutput.customerImplications.length > 0 || pageSpeedInputs.some((input) => input.status === "succeeded"), hasDom: crawlPages.length > 0 || technicalChecks.length > 0, }); return { companyContext, checkedPages, observedUxSignals: signals.ux, observedContentSignals: signals.content, observedTechnicalSignals: signals.technical, ...(externalMarkdown ? { externalMarkdown } : {}), screenshotReferences: screenshotReferences.map((reference) => ({ ...reference, width: Math.max(reference.width, 0), height: Math.max(reference.height, 0), capturedAt: Number(reference.capturedAt), })), pageSpeedCustomerImplications: pageSpeedCustomerImplications.slice( 0, PAGESPEED_SIGNAL_LIMIT, ), selectedSkills, }; }