Externalize audit pipeline services

This commit is contained in:
2026-06-07 23:06:31 +02:00
parent 470fb0f348
commit a45b92ea0a
42 changed files with 3141 additions and 247 deletions

View File

@@ -60,6 +60,7 @@ export type AuditEvidenceInput = {
observedUxSignals: string[];
observedContentSignals: string[];
observedTechnicalSignals: string[];
externalMarkdown?: string;
screenshotReferences: Array<{
storageId: string;
sourceUrl: string;
@@ -80,6 +81,7 @@ export type AuditEvidenceInputArgs = {
screenshots?: readonly AuditScreenshotEvidence[];
pageSpeedInputs?: readonly PageSpeedMinimalAuditResult[];
skillRegistry?: readonly SkillRegistryEntryEvidence[];
externalMarkdown?: string;
};
const COMPANY_CONTEXT_LIMIT = 8;
@@ -90,6 +92,20 @@ const TECHNICAL_SIGNAL_LIMIT = 6;
const PAGESPEED_SIGNAL_LIMIT = 8;
const SCREENSHOT_REFERENCE_LIMIT = 8;
const SELECTED_SKILLS_LIMIT = 6;
const EXTERNAL_MARKDOWN_LIMIT = 4_000;
const V3_LOCAL_AUDIT_PRIORITY = new Map(
[
"visual-design",
"contact-conversion",
"local-seo-basics",
"performance-experience",
"mobile-usability",
"conversion-copy",
"first-impression-clarity",
"trust-signals",
"accessibility-basics",
].map((id, index) => [id, index] as const),
);
const URL_PATTERN = /\bhttps?:\/\/[^\s<>"']+/i;
const JSON_BRACKET_PATTERN = /\{[^}]*\}|\[[^\]]*\]/;
@@ -140,6 +156,19 @@ function sanitizeCustomerText(value: unknown, maxLength = 180): string {
return text;
}
function sanitizeExternalMarkdown(value: unknown): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const markdown = value.replace(/\s+/g, " ").trim();
if (!markdown) {
return undefined;
}
return markdown.slice(0, EXTERNAL_MARKDOWN_LIMIT);
}
function addUniqueCapped(
bucket: string[],
input: string,
@@ -233,6 +262,77 @@ function selectTopSkill(
return toAuditUsedSkill(scored[0]!.candidate);
}
type SkillInputAvailability = {
websiteExists: boolean;
hasDesktopScreenshot: boolean;
hasMobileScreenshot: boolean;
hasMarkdown: boolean;
hasPageSpeed: boolean;
hasDom: boolean;
};
function hasRequiredV3Input(input: string, availability: SkillInputAvailability) {
switch (input) {
case "desktop_screenshot":
return availability.hasDesktopScreenshot;
case "mobile_screenshot":
return availability.hasMobileScreenshot;
case "markdown":
return availability.hasMarkdown;
case "pagespeed":
return availability.hasPageSpeed;
case "dom":
return availability.hasDom;
default:
return false;
}
}
function v3SkillApplies(
skill: SkillRegistryEntryEvidence,
availability: SkillInputAvailability,
) {
const appliesWhen = skill.appliesWhen ?? "website_exists";
const applies =
appliesWhen === "always" ||
(appliesWhen === "website_exists" && availability.websiteExists) ||
(appliesWhen === "has_mobile_screenshot" &&
availability.hasMobileScreenshot) ||
(appliesWhen === "has_pagespeed" && availability.hasPageSpeed);
if (!applies) {
return false;
}
return (skill.inputs ?? []).every((input) =>
hasRequiredV3Input(input, availability),
);
}
function selectV3Skills(
skillRegistry: readonly SkillRegistryEntryEvidence[],
availability: SkillInputAvailability,
) {
return skillRegistry
.map((skill, registryIndex) => ({ skill, registryIndex }))
.filter(({ skill }) => skill.id && !skill.category)
.filter(({ skill }) => v3SkillApplies(skill, availability))
.sort((a, b) => {
// Keep core local-audit coverage inside the cap; otherwise preserve registry order.
const aPriority = V3_LOCAL_AUDIT_PRIORITY.get(a.skill.id ?? "");
const bPriority = V3_LOCAL_AUDIT_PRIORITY.get(b.skill.id ?? "");
if (aPriority !== undefined || bPriority !== undefined) {
return (
(aPriority ?? Number.POSITIVE_INFINITY) -
(bPriority ?? Number.POSITIVE_INFINITY)
);
}
return a.registryIndex - b.registryIndex;
})
.slice(0, SELECTED_SKILLS_LIMIT)
.map(({ skill }) => toAuditUsedSkill(skill));
}
function buildObservedSignals(
crawlPages: readonly AuditCrawlPageEvidence[],
technicalChecks: readonly AuditTechnicalCheckEvidence[],
@@ -403,8 +503,12 @@ function extractSkills(
marketing: boolean;
offer: boolean;
},
availability: SkillInputAvailability,
): AuditUsedSkill[] {
const selected: AuditUsedSkill[] = [];
const selected: AuditUsedSkill[] = selectV3Skills(
skillRegistry,
availability,
);
const categoryOrder = ["design", "ux", "copy", "seo", "marketing", "offer"] as const;
const evidenceText = {
design:
@@ -450,6 +554,7 @@ export function buildAuditEvidenceInput(
const screenshots = args.screenshots ?? [];
const pageSpeedInputs = args.pageSpeedInputs ?? [];
const skillRegistry = args.skillRegistry ?? [];
const externalMarkdown = sanitizeExternalMarkdown(args.externalMarkdown);
const companyContext: string[] = [];
const checkedPages: string[] = [];
@@ -542,6 +647,26 @@ export function buildAuditEvidenceInput(
...signals.evidenceText,
marketing: false,
offer: false,
}, {
websiteExists:
Boolean(lead.websiteDomain || lead.websiteUrl) ||
crawlPages.length > 0 ||
screenshots.length > 0,
hasDesktopScreenshot: screenshots.some(
(screenshot) => screenshot.viewport === "desktop",
),
hasMobileScreenshot: screenshots.some(
(screenshot) => screenshot.viewport === "mobile",
),
hasMarkdown:
Boolean(externalMarkdown) ||
crawlPages.some((page) =>
Boolean(page.visibleText || page.visibleTextExcerpt),
),
hasPageSpeed:
pageSpeedInputsOutput.customerImplications.length > 0 ||
pageSpeedInputs.some((input) => input.status === "succeeded"),
hasDom: crawlPages.length > 0 || technicalChecks.length > 0,
});
return {
@@ -550,6 +675,7 @@ export function buildAuditEvidenceInput(
observedUxSignals: signals.ux,
observedContentSignals: signals.content,
observedTechnicalSignals: signals.technical,
...(externalMarkdown ? { externalMarkdown } : {}),
screenshotReferences: screenshotReferences.map((reference) => ({
...reference,
width: Math.max(reference.width, 0),

View File

@@ -0,0 +1,233 @@
export type ExternalAuditUsageInput = {
openRouter?: {
inputTokens?: number;
outputTokens?: number;
inputUsdPerMillionTokens?: number;
outputUsdPerMillionTokens?: number;
};
screenshotOne?: {
screenshots?: number;
usdPerScreenshot?: number;
};
jina?: {
requests?: number;
pages?: number;
usdPerRequest?: number;
usdPerPage?: number;
};
pageSpeed?: {
requests?: number;
};
};
export type ExternalAuditCostEstimate = {
byProvider: {
openRouter: number;
screenshotOne: number;
jina: number;
pageSpeed: number;
};
totalUsd: number;
};
export type ScreenshotOneViewport = "desktop" | "mobile";
export type ScreenshotOneRequest = {
viewport: ScreenshotOneViewport;
url: string;
};
export type BuildScreenshotOneRequestsInput = {
accessKey: string;
targetUrl: string;
endpoint?: string;
};
export type JinaReaderPagePath = "/" | "/kontakt" | "/impressum" | "/leistungen" | "/ueber-uns";
export type JinaReaderPageInput = {
url: string;
markdown: string;
};
export type JinaReaderAuditInput = {
pages: Array<{
path: JinaReaderPagePath;
sourceUrl: string;
readerUrl: string;
}>;
readerUrls: string[];
markdown: string;
};
export type BuildJinaReaderAuditInputOptions = {
baseUrl: string;
pages?: JinaReaderPageInput[];
maxMarkdownChars: number;
};
const SCREENSHOT_ONE_ENDPOINT = "https://api.screenshotone.com/take";
const JINA_READER_PREFIX = "https://r.jina.ai/";
const JINA_PAGE_PATHS: JinaReaderPagePath[] = [
"/",
"/kontakt",
"/impressum",
"/leistungen",
"/ueber-uns",
];
function roundUsd(value: number): number {
return Math.round((value + Number.EPSILON) * 1_000_000) / 1_000_000;
}
function nonNegativeOrZero(value: number | undefined): number {
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : 0;
}
export function estimateExternalAuditCostUsd(
usage: ExternalAuditUsageInput,
): ExternalAuditCostEstimate {
const openRouter = roundUsd(
(nonNegativeOrZero(usage.openRouter?.inputTokens) / 1_000_000) *
nonNegativeOrZero(usage.openRouter?.inputUsdPerMillionTokens) +
(nonNegativeOrZero(usage.openRouter?.outputTokens) / 1_000_000) *
nonNegativeOrZero(usage.openRouter?.outputUsdPerMillionTokens),
);
const screenshotOne = roundUsd(
nonNegativeOrZero(usage.screenshotOne?.screenshots) *
nonNegativeOrZero(usage.screenshotOne?.usdPerScreenshot),
);
const jina = roundUsd(
nonNegativeOrZero(usage.jina?.requests) * nonNegativeOrZero(usage.jina?.usdPerRequest) +
nonNegativeOrZero(usage.jina?.pages) * nonNegativeOrZero(usage.jina?.usdPerPage),
);
const pageSpeed = 0;
return {
byProvider: {
openRouter,
screenshotOne,
jina,
pageSpeed,
},
totalUsd: roundUsd(openRouter + screenshotOne + jina + pageSpeed),
};
}
export function buildScreenshotOneRequests({
accessKey,
targetUrl,
endpoint = SCREENSHOT_ONE_ENDPOINT,
}: BuildScreenshotOneRequestsInput): ScreenshotOneRequest[] {
let normalizedTargetUrl: string;
try {
const parsedTargetUrl = parseWebUrl(targetUrl, "target URL");
normalizedTargetUrl = parsedTargetUrl.toString();
} catch {
throw new Error("Invalid target URL for ScreenshotOne request. Only http and https URLs are supported.");
}
const viewports: Array<{
viewport: ScreenshotOneViewport;
width: number;
height: number;
scale: number;
}> = [
{ viewport: "desktop", width: 1280, height: 900, scale: 1 },
{ viewport: "mobile", width: 390, height: 844, scale: 2 },
];
return viewports.map(({ viewport, width, height, scale }) => {
const requestUrl = new URL(endpoint);
requestUrl.searchParams.set("access_key", accessKey);
requestUrl.searchParams.set("url", normalizedTargetUrl);
requestUrl.searchParams.set("viewport_width", String(width));
requestUrl.searchParams.set("viewport_height", String(height));
requestUrl.searchParams.set("device_scale_factor", String(scale));
requestUrl.searchParams.set("full_page", "true");
requestUrl.searchParams.set("block_cookie_banners", "true");
requestUrl.searchParams.set("block_ads", "true");
requestUrl.searchParams.set("block_trackers", "true");
return {
viewport,
url: requestUrl.toString(),
};
});
}
export function buildJinaReaderAuditInput({
baseUrl,
pages = [],
maxMarkdownChars,
}: BuildJinaReaderAuditInputOptions): JinaReaderAuditInput {
const normalizedBaseUrl = normalizeBaseUrl(baseUrl);
const pagesByUrl = new Map(
pages.map((page) => [normalizeComparableUrl(page.url), page.markdown]),
);
const preparedPages = JINA_PAGE_PATHS.map((path) => {
const sourceUrl = new URL(path, normalizedBaseUrl).toString();
const readerUrl = toJinaReaderUrl(sourceUrl);
return {
path,
sourceUrl,
readerUrl,
};
});
const markdown = preparedPages
.map((page) => {
const pageMarkdown = pagesByUrl.get(normalizeComparableUrl(page.sourceUrl)) ?? "";
return `Source: ${page.sourceUrl}\n\n${pageMarkdown.trim()}`;
})
.join("\n\n---\n\n");
return {
pages: preparedPages,
readerUrls: preparedPages.map((page) => page.readerUrl),
markdown: capMarkdown(markdown, maxMarkdownChars),
};
}
function normalizeBaseUrl(baseUrl: string): URL {
try {
const url = parseWebUrl(baseUrl, "base URL");
url.hash = "";
url.search = "";
url.pathname = "/";
return url;
} catch {
throw new Error("Invalid base URL for Jina Reader input. Only http and https URLs are supported.");
}
}
function normalizeComparableUrl(url: string): string {
const normalized = parseWebUrl(url, "page URL");
normalized.hash = "";
if (normalized.pathname !== "/" && normalized.pathname.endsWith("/")) {
normalized.pathname = normalized.pathname.slice(0, -1);
}
return normalized.toString();
}
function toJinaReaderUrl(sourceUrl: string): string {
const url = parseWebUrl(sourceUrl, "source URL");
return `${JINA_READER_PREFIX}${url.protocol}//${url.host}${url.pathname}${url.search}`;
}
function parseWebUrl(value: string, label: string): URL {
const url = new URL(value);
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`Invalid ${label}. Only http and https URLs are supported.`);
}
return url;
}
function capMarkdown(markdown: string, maxMarkdownChars: number): string {
if (markdown.length <= maxMarkdownChars) {
return markdown;
}
const suffix = `[truncated to ${maxMarkdownChars} chars]`;
const availableChars = Math.max(0, maxMarkdownChars - suffix.length);
return `${markdown.slice(0, availableChars)}${suffix}`;
}

View File

@@ -5,10 +5,11 @@ export type IntegrationReadinessDefinition = {
| "google"
| "pagespeed"
| "openrouter"
| "playwright"
| "screenshotone"
| "smtp"
| "convex_jobs"
| "rybbit";
| "rybbit"
| "jina";
label: string;
requiredEnv: string[];
errorSurface: string;
@@ -39,10 +40,10 @@ export const integrationReadinessDefinitions: IntegrationReadinessDefinition[] =
errorSurface: "Audit-Generierungsruns zeigen Modell- und Guard-Fehler.",
},
{
id: "playwright",
label: "Playwright",
requiredEnv: ["TASK8_BROWSER_ASSET_URL"],
errorSurface: "Website-Enrichment-Runs zeigen Browser- und Crawl-Fehler.",
id: "screenshotone",
label: "ScreenshotOne",
requiredEnv: ["SCREENSHOTONE_API_KEY"],
errorSurface: "Screenshot-Erfassung zeigt API-, Quota- und Rendering-Fehler.",
},
{
id: "smtp",
@@ -62,6 +63,12 @@ export const integrationReadinessDefinitions: IntegrationReadinessDefinition[] =
requiredEnv: ["RYBBIT_API_URL", "RYBBIT_API_KEY", "NEXT_PUBLIC_RYBBIT_SITE_ID"],
errorSurface: "Analytics zeigt API-Fehler als nicht blockierende Meldung.",
},
{
id: "jina",
label: "Jina",
requiredEnv: [],
errorSurface: "Optionaler Fetch-/Reader-Fallback zeigt Fehler im Audit-Quellenkontext.",
},
];
export function getIntegrationReadiness(

View File

@@ -13,20 +13,27 @@ export const SKILL_CATEGORIES = [
export type SkillCategory = (typeof SKILL_CATEGORIES)[number];
export type SkillRegistryEntry = {
id?: string;
name: string;
title?: string;
purpose: string;
whenToUse: string;
whenNotToUse: string;
requiredInput: string;
expectedOutput: string;
category: SkillCategory;
category?: SkillCategory;
appliesWhen?: string;
inputs?: string[];
outputs?: string;
instructions?: string;
version?: string;
source?: string;
};
export type AuditUsedSkill = {
id?: string;
name: string;
category: SkillCategory;
category?: SkillCategory;
version?: string;
source?: string;
};
@@ -51,6 +58,7 @@ const REQUIRED_FIELDS: ParsedFieldName[] = [
];
const FIELD_LABELS_RE = /^(Purpose|When to use|When not to use|Required input|Expected output|Category|Version|Source):\s*(.*?)\s*$/;
const V3_META_BLOCK_RE = /```yaml\s*\n([\s\S]*?)\n```\s*\n?([\s\S]*)$/;
function normalizeCategory(value: string): SkillCategory {
const normalized = value.toLowerCase();
@@ -129,6 +137,108 @@ function parseSection(lines: string[], sectionIndex: number): SkillRegistryEntry
};
}
function parseV3List(value: string): string[] {
const trimmed = value.trim();
if (!trimmed.startsWith("[") || !trimmed.endsWith("]")) {
return trimmed ? [trimmed] : [];
}
return trimmed
.slice(1, -1)
.split(",")
.map((entry) => entry.trim())
.filter(Boolean);
}
function parseV3MetaBlock(metaSource: string): Record<string, string> {
const values: Record<string, string> = {};
for (const line of metaSource.split("\n")) {
const match = line.trim().match(/^([a-z_]+):\s*(.*?)\s*$/);
if (match) {
values[match[1]] = match[2].trim();
}
}
return values;
}
function parseV3Section(
rawBody: string,
sectionIndex: number,
): SkillRegistryEntry | null {
const match = rawBody.match(V3_META_BLOCK_RE);
if (!match) {
return null;
}
const values = parseV3MetaBlock(match[1]);
if (!values.id) {
return null;
}
const requiredFields = ["id", "title", "applies_when", "inputs", "outputs"];
for (const field of requiredFields) {
if (!values[field]) {
throw new Error(
`Missing required v3 field "${field}" for skill section ${sectionIndex}.`,
);
}
}
const id = values.id;
const title = values.title;
const inputs = parseV3List(values.inputs);
const instructions = match[2].trim();
if (instructions.length === 0) {
throw new Error(`Missing instructions for v3 skill "${id}".`);
}
return {
id,
name: title,
title,
purpose: instructions,
whenToUse: values.applies_when,
whenNotToUse: "Use only when applies_when and inputs match.",
requiredInput: inputs.join(", "),
expectedOutput: values.outputs,
appliesWhen: values.applies_when,
inputs,
outputs: values.outputs,
instructions,
};
}
function addParsedEntry(
entries: SkillRegistryEntry[],
names: Set<string>,
ids: Set<string>,
parsed: SkillRegistryEntry,
) {
const normalizedName = parsed.name.trim().toLowerCase();
if (names.has(normalizedName)) {
throw new Error(`Duplicate skill name "${parsed.name}" in skills registry.`);
}
if (parsed.id) {
const normalizedId = parsed.id.trim().toLowerCase();
if (ids.has(normalizedId)) {
throw new Error(`Duplicate skill id "${parsed.id}" in skills registry.`);
}
ids.add(normalizedId);
}
names.add(normalizedName);
entries.push(parsed);
}
function hasLegacyFieldLabels(source: string): boolean {
return source
.split("\n")
.some((line) => FIELD_LABELS_RE.test(line.trim()));
}
export function parseSkillsRegistry(source: string): SkillRegistryEntry[] {
const normalized = source.replace(/\r\n/g, "\n");
const rawSections = normalized
@@ -138,6 +248,45 @@ export function parseSkillsRegistry(source: string): SkillRegistryEntry[] {
const entries: SkillRegistryEntry[] = [];
const names = new Set<string>();
const ids = new Set<string>();
const v3Entries: SkillRegistryEntry[] = [];
for (let index = 0; index < rawSections.length; index += 1) {
const rawSection = rawSections[index];
const lines = rawSection
.split("\n")
.map((line) => line.trimEnd())
.filter((line, lineIndex) => line.length > 0 || lineIndex === 0);
const sectionBody = lines.slice(1).join("\n");
const parsed = parseV3Section(sectionBody, index + 1);
if (parsed && parsed.id !== "kebab-case-id") {
v3Entries.push(parsed);
}
}
if (v3Entries.length > 0) {
for (let index = 0; index < rawSections.length; index += 1) {
const rawSection = rawSections[index];
const lines = rawSection
.split("\n")
.map((line) => line.trimEnd())
.filter((line, lineIndex) => line.length > 0 || lineIndex === 0);
const sectionTitle = lines.at(0) ?? "";
const sectionBody = lines.slice(1).join("\n");
const sectionLines = [`## ${sectionTitle}`, ...lines.slice(1)];
const parsed = parseV3Section(sectionBody, index + 1);
if (parsed) {
if (parsed.id !== "kebab-case-id") {
addParsedEntry(entries, names, ids, parsed);
}
continue;
}
if (hasLegacyFieldLabels(sectionBody)) {
addParsedEntry(entries, names, ids, parseSection(sectionLines, index + 1));
}
}
return entries;
}
for (let index = 0; index < rawSections.length; index += 1) {
const rawSection = rawSections[index];
@@ -146,16 +295,10 @@ export function parseSkillsRegistry(source: string): SkillRegistryEntry[] {
.map((line) => line.trimEnd())
.filter((line, lineIndex) => line.length > 0 || lineIndex === 0);
const sectionLines = [`## ${lines.at(0) ?? ""}`, ...lines.slice(1)];
const sectionTitle = lines.at(0) ?? "";
const sectionLines = [`## ${sectionTitle}`, ...lines.slice(1)];
const parsed = parseSection(sectionLines, index + 1);
const normalizedName = parsed.name.trim().toLowerCase();
if (names.has(normalizedName)) {
throw new Error(`Duplicate skill name "${parsed.name}" in skills registry.`);
}
names.add(normalizedName);
entries.push(parsed);
addParsedEntry(entries, names, ids, parsed);
}
return entries;
@@ -169,10 +312,24 @@ export async function loadSkillsRegistry(
}
export function toAuditUsedSkill(skill: SkillRegistryEntry): AuditUsedSkill {
return {
const usedSkill: AuditUsedSkill = {
name: skill.name,
category: skill.category,
version: skill.version,
source: skill.source,
};
if (skill.id) {
usedSkill.id = skill.id;
}
if (skill.category) {
usedSkill.category = skill.category;
}
if (!skill.version) {
delete usedSkill.version;
}
if (!skill.source) {
delete usedSkill.source;
}
return usedSkill;
}