Improve audit pipeline and outreach review

2026-06-08 22:16:32 +02:00
parent ff18fc202e
commit 1695110e0a
34 changed files with 2792 additions and 238 deletions
--- a/convex/auditGenerationAction.ts
+++ b/convex/auditGenerationAction.ts
@@ -4,15 +4,20 @@ import { type DataContent, generateObject } from "ai";
 import { createOpenRouterProvider } from "../lib/ai/openrouter-provider";
 import { resolveModelProfile } from "../lib/ai/model-profiles";
 import { loadLocalAuditSkillRegistry } from "../lib/ai/local-audit-skill-registry";
+import { buildCustomerTonePromptSection } from "../lib/ai/customer-tone-guidelines";
 import {
  auditClassificationSchema,
+  auditEvidenceVerificationSchema,
  auditSummarySchema,
+  auditSpecialistResultSchema,
  callScriptSchema,
  emailDraftSchema,
  emailSubjectSchema,
  followUpDraftSchema,
  publicAuditTextSchema,
  qualityReviewSchema,
+  type AuditSpecialistFinding,
+  type AuditSpecialistResult,
 } from "../lib/ai/schemas";
 import {
  validateCustomerFacingCopy,
@@ -320,6 +325,47 @@ const terminalLeadContactStatuses = [
  "replied",
 ] as const;

+const specialistStageConfigs = [
+  {
+    stage: "localSeoSpecialist",
+    title: "Local SEO Specialist",
+    focus:
+      "NAP, Ort-Leistung-Relevanz, Title/Meta/H1, lokale Vertrauenssignale und Impressum-/Kontaktklarheit.",
+  },
+  {
+    stage: "conversionUxSpecialist",
+    title: "Conversion UX Specialist",
+    focus:
+      "Kontaktpfad, CTA-Sichtbarkeit, Click-to-call, Formularreibung und mobile Handlungsfähigkeit.",
+  },
+  {
+    stage: "visualTrustSpecialist",
+    title: "Visual Trust Specialist",
+    focus:
+      "Erster visueller Eindruck, Hierarchie, Lesbarkeit, Bild-/Team-/Vertrauenssignale aus Screenshots.",
+  },
+  {
+    stage: "critiqueSpecialist",
+    title: "Impeccable Critique Specialist",
+    focus:
+      "Designkritik nach critique/impeccable: visuelle Hierarchie, Informationsarchitektur, kognitive Last, Nielsen-Heuristiken, AI-Slop-/Template-Indizien und persona-nahe Reibung.",
+    guidance:
+      "Nutze fuer passende Befunde skillId impeccable-critique. Liefere keine Heuristik-Score-Tabelle, sondern konkrete, evidence-gebundene Findings. Markenfit, Emotion oder AI-Slop nur behaupten, wenn Screenshot/Text/DOM es stuetzen.",
+  },
+  {
+    stage: "performanceAccessibilitySpecialist",
+    title: "Performance Accessibility Specialist",
+    focus:
+      "Mobile Ladeerfahrung, PageSpeed-Auswirkungen, Tap-Ziele, Kontrast, Labels und einfache Barrieren.",
+  },
+] as const;
+
+type SpecialistStage = (typeof specialistStageConfigs)[number]["stage"];
+type VerifierCandidate = {
+  findingId: string;
+  finding: AuditSpecialistFinding;
+};
+
 function toAuditGenerationProfileMessage(stage: string, runId: Id<"agentRuns">) {
  return {
    level: "info" as const,
@@ -374,14 +420,118 @@ function buildMultimodalPrompt(evidence: AuditEvidence, withScreenshots = false)
  ].join("\n");
 }

+function formatEvidenceLedger(evidence: AuditEvidence) {
+  return evidence.evidenceLedger
+    .slice(0, 24)
+    .map((entry) =>
+      [
+        `id=${entry.id}`,
+        `type=${entry.type}`,
+        `label=${entry.label}`,
+        entry.sourceUrl ? `url=${entry.sourceUrl}` : "",
+        `summary=${entry.summary}`,
+      ]
+        .filter(Boolean)
+        .join(" | "),
+    )
+    .join("\n");
+}
+
+function buildSpecialistPrompt(
+  evidence: AuditEvidence,
+  config: (typeof specialistStageConfigs)[number],
+) {
+  return [
+    `Du bist ${config.title} für lokale Website-Audits.`,
+    `Fokus: ${config.focus}`,
+    "guidance" in config ? config.guidance : "",
+    "Erzeuge nur Befunde, die mit evidenceRefs aus dem Evidence-Ledger belegt sind.",
+    "Nutze keine Unknown-/Unbekannt-Werte als Kundenbefund. Wenn Belege fehlen, liefere keinen Befund.",
+    "Jeder Befund braucht skillId, claim, recommendation, customerBenefit, severity, confidence, evidenceRefs, applies und unknowns.",
+    "Jede evidenceRef braucht id, type, label und sourceUrl; nutze die Ledger-URL oder einen leeren String.",
+    "Antworte mit status, findings und notes; wenn nichts belegt ist, nutze findings: [] und erklaerende notes.",
+    `Unternehmenskontext: ${evidence.companyContext.join(" | ")}`,
+    `Prüfseiten: ${evidence.checkedPages.join(" ; ")}`,
+    `UX-Signale: ${evidence.observedUxSignals.join(" ; ")}`,
+    `Content-Signale: ${evidence.observedContentSignals.join(" ; ")}`,
+    `Technische Signale: ${evidence.observedTechnicalSignals.join(" ; ")}`,
+    `PageSpeed-Folgen: ${evidence.pageSpeedCustomerImplications.join(" ; ")}`,
+    `Evidence-Ledger:\n${formatEvidenceLedger(evidence)}`,
+  ].join("\n");
+}
+
+function toVerifierCandidates(
+  findings: readonly AuditSpecialistFinding[],
+): VerifierCandidate[] {
+  return findings.slice(0, 12).map((finding, index) => ({
+    findingId: `finding-${index + 1}`,
+    finding,
+  }));
+}
+
+function formatVerifierCandidate(candidate: VerifierCandidate) {
+  const { finding, findingId } = candidate;
+  return [
+    `id=${findingId}`,
+    `skillId=${finding.skillId}`,
+    `claim=${finding.claim}`,
+    `recommendation=${finding.recommendation}`,
+    `customerBenefit=${finding.customerBenefit}`,
+    `severity=${finding.severity}`,
+    `confidence=${Math.round(finding.confidence * 100)}%`,
+    `evidenceRefs=${finding.evidenceRefs
+      .map((ref) => `${ref.id} (${ref.type}, ${ref.label})`)
+      .join("; ")}`,
+    finding.unknowns.length > 0 ? `unknowns=${finding.unknowns.join("; ")}` : "",
+  ]
+    .filter(Boolean)
+    .join("\n");
+}
+
+function buildEvidenceVerifierPrompt(
+  candidates: readonly VerifierCandidate[],
+  evidence: AuditEvidence,
+) {
+  return [
+    "Du bist EvidenceQA und verifizierst Audit-Befunde.",
+    "Behalte nur Befunde, die konkrete evidenceRefs besitzen, nicht generisch sind und keine Unknown-Werte als Claim nutzen.",
+    "Lege widersprüchliche CTA/Kontakt/Meta-Aussagen in contradictions offen.",
+    "Antworte mit verifiedFindingIds, rejectedFindings, contradictions und notes.",
+    "verifiedFindingIds enthaelt nur IDs aus den unten gelisteten Befunden.",
+    "Gib keine vollstaendigen verified Findings zurueck; die Anwendung uebernimmt die Originalbefunde anhand der IDs.",
+    "Ein rejectedFinding braucht findingId, skillId, claim und rejectionReason.",
+    `Evidence-Ledger:\n${formatEvidenceLedger(evidence)}`,
+    `Befunde zur Prüfung:\n${candidates.map(formatVerifierCandidate).join("\n\n")}`,
+  ].join("\n");
+}
+
+function formatVerifiedFindings(findings: readonly AuditSpecialistFinding[]) {
+  return findings
+    .map((finding, index) =>
+      [
+        `${index + 1}. [${finding.skillId}] ${finding.claim}`,
+        `Empfehlung: ${finding.recommendation}`,
+        `Nutzen: ${finding.customerBenefit}`,
+        `Priorität: ${finding.severity}; Sicherheit: ${Math.round(finding.confidence * 100)}%`,
+        `Belege: ${finding.evidenceRefs.map((ref) => `${ref.type}:${ref.label}`).join(", ")}`,
+      ].join("\n"),
+    )
+    .join("\n\n");
+}
+
 function buildGermanCopyPrompt(
  internalFindings: string,
  multimodalSummary: string,
+  evidence: AuditEvidence,
 ) {
  return [
    "Du bist Senior-Redakteur für lokale Kundengewinnung.",
-    "Erstelle kundenrelevante Texte in deutscher Sprache, im Ich-Ich Kontext,",
-    "mit Beobachtung und konkretem Vorschlag in jedem Stück.",
+    "Erstelle kundenrelevante Texte in deutscher Sprache und nutze ausschließlich verifizierte Befunde als fachliche Grundlage.",
+    "Vermeide mechanische Wiederholungen wie 'Ich habe beobachtet' oder 'Ich schlage vor'.",
+    "PublicSummary und PublicBody dürfen auditartig bleiben, sollen aber natürlich und konkret klingen.",
+    buildCustomerTonePromptSection(),
+    `Lead-/Unternehmenskontext: ${evidence.companyContext.join(" | ")}`,
+    `Geprüfte Seiten: ${evidence.checkedPages.join(" ; ")}`,
    `Interne Befunde: ${internalFindings}`,
    `Multimodale Zusammenfassung: ${multimodalSummary}`,
    "Liefer bitte alle Felder als validiertes JSON gemäß Schema.",
@@ -395,6 +545,10 @@ function buildQualityReviewPrompt(
  return [
    "Du bist Qualitätssicherungs-Engine für Kundenkommunikation.",
    "Prüfe Inhalte auf deutsche Sprache, Tonalität, Beobachtung/Suggestion und klare, faktennahe Inhalte.",
+    "Prüfe besonders die E-Mail: Klingt sie wie eine echte Erstmail von Matthias?",
+    "Würde ein lokaler Betrieb sie als hilfreichen Hinweis lesen, nicht als KI-Verkaufstext?",
+    "Ist jede konkrete Behauptung in der E-Mail durch verified findings / verifizierte Befunde gedeckt?",
+    buildCustomerTonePromptSection(),
    `Interne Befunde: ${internalFindings}`,
    `Öffentliche Zusammenfassung: ${germanCopy.publicSummary}`,
    `Öffentlicher Text: ${germanCopy.publicBody}`,
@@ -412,13 +566,40 @@ function toSkillSummaries(
    version?: string;
    source?: string;
  }>,
+  registry: Array<{
+    id?: string;
+    name: string;
+    purpose?: string;
+    instructions?: string;
+    requiredInput?: string;
+    expectedOutput?: string;
+    category?: string;
+    version?: string;
+    source?: string;
+  }> = [],
 ) {
  return skills.slice(0, 6).map((skill) => ({
    name: skill.name,
-    purpose: "Erkenntnisbasiertes Hilfsmodul für die Audit-Bearbeitung.",
-    summary: `${skill.name}${skill.version ? ` (${skill.version})` : ""}${
-      skill.category ? ` aus ${skill.category}` : ""
-    }.`,
+    purpose:
+      registry.find(
+        (candidate) =>
+          (skill.id && candidate.id === skill.id) ||
+          candidate.name === skill.name,
+      )?.purpose ??
+      registry.find(
+        (candidate) =>
+          (skill.id && candidate.id === skill.id) ||
+          candidate.name === skill.name,
+      )?.instructions ??
+      "Zweckbeschreibung nicht verfügbar.",
+    summary: [
+      skill.name,
+      skill.version ? `Version ${skill.version}` : "",
+      skill.category ? `Kategorie ${skill.category}` : "",
+      skill.source ? `Quelle ${skill.source}` : "",
+    ]
+      .filter(Boolean)
+      .join(" · "),
  }));
 }

@@ -966,7 +1147,13 @@ async function persistAuditStage({
  runId: Id<"agentRuns">;
  leadId: Id<"leads">;
  auditId?: Id<"audits">;
-  stage: "classification" | "multimodalAudit" | "germanCopy" | "qualityReview";
+  stage:
+    | "classification"
+    | SpecialistStage
+    | "evidenceVerifier"
+    | "multimodalAudit"
+    | "germanCopy"
+    | "qualityReview";
  modelProfile: string;
  modelId: string;
  prompt: string;
@@ -1051,8 +1238,15 @@ export const processAuditGeneration = internalAction({
    };
    let qualityPassed = false;
    let errors = 0;
-    let currentStep: "audit_generation" | "classification" | "multimodalAudit" | "germanCopy" | "qualityReview" =
-      "audit_generation";
+    let currentStep:
+      | "audit_generation"
+      | "classification"
+      | SpecialistStage
+      | "evidenceVerifier"
+      | "multimodalAudit"
+      | "germanCopy"
+      | "qualityReview" = "audit_generation";
+    let verifiedFindings: AuditSpecialistFinding[] = [];

    try {
      started = await ctx.runMutation(internal.auditGeneration.startAuditGenerationRun, {
@@ -1244,6 +1438,205 @@ export const processAuditGeneration = internalAction({
        return null;
      }

+      // Stage 2: specialist fan-out and evidence verification
+      const specialistSystemPrompt =
+        "Du bist ein spezialisierter Website-Audit-Agent. Antworte ausschließlich als JSON gemäß Schema.";
+      const specialistResults = await Promise.all(
+        specialistStageConfigs.map(async (config): Promise<AuditSpecialistResult> => {
+          const specialistPrompt = buildSpecialistPrompt(evidenceInput, config);
+          const safeSpecialistPrompt = sanitizeAndCapString(
+            specialistPrompt,
+            MAX_PROMPT_BYTES,
+          );
+          currentStep = config.stage;
+
+          await persistAuditStage({
+            ctx,
+            runId: args.runId,
+            leadId: started!.lead._id,
+            ...(auditId ? { auditId } : {}),
+            stage: config.stage,
+            modelProfile: "classification",
+            modelId: classificationProfile.modelId,
+            prompt: safeSpecialistPrompt ?? "",
+            systemPrompt: specialistSystemPrompt,
+            status: "running",
+          });
+
+          try {
+            const specialistResult = await generateObject({
+              model: provider(classificationProfile.modelId),
+              system: specialistSystemPrompt,
+              schema: auditSpecialistResultSchema,
+              prompt: safeSpecialistPrompt ?? "",
+              temperature: classificationProfile.temperature,
+              maxOutputTokens: classificationProfile.maxTokens,
+            });
+
+            await persistAuditStage({
+              ctx,
+              runId: args.runId,
+              leadId: started!.lead._id,
+              ...(auditId ? { auditId } : {}),
+              stage: config.stage,
+              modelProfile: "classification",
+              modelId: classificationProfile.modelId,
+              prompt: safeSpecialistPrompt ?? "",
+              systemPrompt: specialistSystemPrompt,
+              rawResponse: sanitizeAndCapString(
+                safeStringify(specialistResult.object),
+                MAX_RAW_RESPONSE_BYTES,
+              ),
+              parsedJson: sanitizeAndCapParsedJson(specialistResult.object),
+              ...withStageUsage(specialistResult.usage),
+              status: "succeeded",
+              finishReason: specialistResult.finishReason,
+            });
+            await recordOpenRouterUsage(ctx, {
+              runId: args.runId,
+              leadId: started!.lead._id,
+              ...(auditId ? { auditId } : {}),
+              usage: specialistResult.usage,
+            });
+
+            return specialistResult.object;
+          } catch (error) {
+            const safeErrorSummary = messageFromError(error);
+            await persistAuditStage({
+              ctx,
+              runId: args.runId,
+              leadId: started!.lead._id,
+              ...(auditId ? { auditId } : {}),
+              stage: config.stage,
+              modelProfile: "classification",
+              modelId: classificationProfile.modelId,
+              prompt: safeSpecialistPrompt ?? "",
+              systemPrompt: specialistSystemPrompt,
+              status: "failed",
+              errorSummary: safeErrorSummary,
+            });
+            await appendRunEvent(ctx, {
+              runId: args.runId,
+              level: "warning",
+              message: `${config.title} konnte keine Befunde liefern.`,
+              details: [{ label: "Fehler", value: safeErrorSummary }],
+            });
+            return {
+              status: "failed",
+              findings: [],
+              notes: [safeErrorSummary],
+            };
+          }
+        }),
+      );
+
+      const specialistFindings = specialistResults.flatMap((result) =>
+        result.findings.filter((finding) => finding.applies),
+      );
+      const verifierCandidates = toVerifierCandidates(specialistFindings);
+      const verifierPrompt = buildEvidenceVerifierPrompt(
+        verifierCandidates,
+        evidenceInput,
+      );
+      const safeVerifierPrompt = sanitizeAndCapString(
+        verifierPrompt,
+        MAX_PROMPT_BYTES,
+      );
+      const verifierSystemPrompt =
+        "Du bist EvidenceQA. Verifiziere Befunde streng gegen belegte Evidence-Refs.";
+      currentStep = "evidenceVerifier";
+
+      await persistAuditStage({
+        ctx,
+        runId: args.runId,
+        leadId: started.lead._id,
+        ...(auditId ? { auditId } : {}),
+        stage: "evidenceVerifier",
+        modelProfile: "classification",
+        modelId: classificationProfile.modelId,
+        prompt: safeVerifierPrompt ?? "",
+        systemPrompt: verifierSystemPrompt,
+        status: "running",
+      });
+
+      try {
+        const verifierResult = await generateObject({
+          model: provider(classificationProfile.modelId),
+          system: verifierSystemPrompt,
+          schema: auditEvidenceVerificationSchema,
+          prompt: safeVerifierPrompt ?? "",
+          temperature: classificationProfile.temperature,
+          maxOutputTokens: classificationProfile.maxTokens,
+        });
+        const verifiedFindingIds = new Set(
+          verifierResult.object.verifiedFindingIds,
+        );
+        verifiedFindings = verifierCandidates
+          .filter((candidate) => verifiedFindingIds.has(candidate.findingId))
+          .map((candidate) => candidate.finding);
+
+        await persistAuditStage({
+          ctx,
+          runId: args.runId,
+          leadId: started.lead._id,
+          ...(auditId ? { auditId } : {}),
+          stage: "evidenceVerifier",
+          modelProfile: "classification",
+          modelId: classificationProfile.modelId,
+          prompt: safeVerifierPrompt ?? "",
+          systemPrompt: verifierSystemPrompt,
+          rawResponse: sanitizeAndCapString(
+            safeStringify(verifierResult.object),
+            MAX_RAW_RESPONSE_BYTES,
+          ),
+          parsedJson: sanitizeAndCapParsedJson(verifierResult.object),
+          ...withStageUsage(verifierResult.usage),
+          status: "succeeded",
+          finishReason: verifierResult.finishReason,
+        });
+        await recordOpenRouterUsage(ctx, {
+          runId: args.runId,
+          leadId: started.lead._id,
+          ...(auditId ? { auditId } : {}),
+          usage: verifierResult.usage,
+        });
+      } catch (error) {
+        errors += 1;
+        const safeErrorSummary = messageFromError(error);
+        await persistAuditStage({
+          ctx,
+          runId: args.runId,
+          leadId: started.lead._id,
+          ...(auditId ? { auditId } : {}),
+          stage: "evidenceVerifier",
+          modelProfile: "classification",
+          modelId: classificationProfile.modelId,
+          prompt: safeVerifierPrompt ?? "",
+          systemPrompt: verifierSystemPrompt,
+          status: "failed",
+          errorSummary: safeErrorSummary,
+        });
+        await ctx.runMutation(internal.auditGeneration.finishAuditGenerationRun, {
+          runId: args.runId,
+          status: "failed",
+          errors,
+          errorSummary: "Evidence-Verifikation konnte nicht abgeschlossen werden.",
+          currentStep: "evidenceVerifier",
+        });
+        return null;
+      }
+
+      if (verifiedFindings.length === 0) {
+        await ctx.runMutation(internal.auditGeneration.finishAuditGenerationRun, {
+          runId: args.runId,
+          status: "failed",
+          errors: errors + 1,
+          errorSummary: "Keine belegten Audit-Befunde nach Evidence-Verifikation.",
+          currentStep: "evidenceVerifier",
+        });
+        return null;
+      }
+
      // Stage 2: multimodal audit summary
      const multimodalSystemPrompt =
        "Du bist Prüfanalyst für Conversion-Optimierung mit Fokus auf lokale Unternehmen.";
@@ -1454,9 +1847,11 @@ export const processAuditGeneration = internalAction({
      // Stage 3: german copy generation
      const germanSystemPrompt =
        "Du bist fachlicher Texter für lokale Unternehmen im B2B-Kontext.";
+      const verifiedFindingsText = formatVerifiedFindings(verifiedFindings);
      const germanPrompt = buildGermanCopyPrompt(
-        classificationSummary,
+        verifiedFindingsText,
        multimodalSummary,
+        evidenceInput,
      );
      const safeGermanPrompt = sanitizeAndCapString(germanPrompt, MAX_PROMPT_BYTES);

@@ -1623,7 +2018,7 @@ export const processAuditGeneration = internalAction({

      // Stage 4: final quality review
      const qualityPrompt = buildQualityReviewPrompt(
-        classificationSummary,
+        verifiedFindingsText,
        germanCopyOutput,
      );
      const safeQualityPrompt = sanitizeAndCapString(qualityPrompt, MAX_PROMPT_BYTES);
@@ -1641,7 +2036,7 @@ export const processAuditGeneration = internalAction({
          maxOutputTokens: qualityReviewProfile.maxTokens,
        });

-        qualityPassed = guardResult.passed;
+        qualityPassed = qualityResult.object.isValid && guardResult.passed;

        const qualityPayload = {
          isValid: qualityResult.object.isValid && guardResult.passed,
@@ -1776,7 +2171,7 @@ export const processAuditGeneration = internalAction({
          usedSkills: evidenceInput.selectedSkills
            .slice(0, 6)
            .map(toPersistedUsedSkill),
-          skillSummaries: toSkillSummaries(evidenceInput.selectedSkills),
+          skillSummaries: toSkillSummaries(evidenceInput.selectedSkills, skillRegistry),
        },
      );

@@ -1784,6 +2179,28 @@ export const processAuditGeneration = internalAction({
        auditId = persistedAuditId;
      }

+      if (auditId) {
+        await ctx.runMutation(internal.auditGeneration.replaceAuditFindings, {
+          auditId,
+          runId: args.runId,
+          findings: verifiedFindings.slice(0, 12).map((finding) => ({
+            skillId: finding.skillId,
+            claim: finding.claim,
+            recommendation: finding.recommendation,
+            customerBenefit: finding.customerBenefit,
+            severity: finding.severity,
+            confidence: finding.confidence,
+            evidenceRefs: finding.evidenceRefs.slice(0, 6).map((ref) => ({
+              id: ref.id,
+              type: ref.type,
+              label: ref.label,
+              ...(ref.sourceUrl ? { sourceUrl: ref.sourceUrl } : {}),
+            })),
+            reviewStatus: "pending" as const,
+          })),
+        });
+      }
+
      await ctx.runMutation(internal.outreach.upsertFromAuditGeneration, {
        leadId: started.lead._id,
        ...(auditId ? { auditId } : {}),