Externalize audit pipeline services

This commit is contained in:
2026-06-07 23:06:31 +02:00
parent 470fb0f348
commit a45b92ea0a
42 changed files with 3141 additions and 247 deletions

View File

@@ -0,0 +1,233 @@
export type ExternalAuditUsageInput = {
openRouter?: {
inputTokens?: number;
outputTokens?: number;
inputUsdPerMillionTokens?: number;
outputUsdPerMillionTokens?: number;
};
screenshotOne?: {
screenshots?: number;
usdPerScreenshot?: number;
};
jina?: {
requests?: number;
pages?: number;
usdPerRequest?: number;
usdPerPage?: number;
};
pageSpeed?: {
requests?: number;
};
};
export type ExternalAuditCostEstimate = {
byProvider: {
openRouter: number;
screenshotOne: number;
jina: number;
pageSpeed: number;
};
totalUsd: number;
};
export type ScreenshotOneViewport = "desktop" | "mobile";
export type ScreenshotOneRequest = {
viewport: ScreenshotOneViewport;
url: string;
};
export type BuildScreenshotOneRequestsInput = {
accessKey: string;
targetUrl: string;
endpoint?: string;
};
export type JinaReaderPagePath = "/" | "/kontakt" | "/impressum" | "/leistungen" | "/ueber-uns";
export type JinaReaderPageInput = {
url: string;
markdown: string;
};
export type JinaReaderAuditInput = {
pages: Array<{
path: JinaReaderPagePath;
sourceUrl: string;
readerUrl: string;
}>;
readerUrls: string[];
markdown: string;
};
export type BuildJinaReaderAuditInputOptions = {
baseUrl: string;
pages?: JinaReaderPageInput[];
maxMarkdownChars: number;
};
const SCREENSHOT_ONE_ENDPOINT = "https://api.screenshotone.com/take";
const JINA_READER_PREFIX = "https://r.jina.ai/";
const JINA_PAGE_PATHS: JinaReaderPagePath[] = [
"/",
"/kontakt",
"/impressum",
"/leistungen",
"/ueber-uns",
];
function roundUsd(value: number): number {
return Math.round((value + Number.EPSILON) * 1_000_000) / 1_000_000;
}
function nonNegativeOrZero(value: number | undefined): number {
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : 0;
}
export function estimateExternalAuditCostUsd(
usage: ExternalAuditUsageInput,
): ExternalAuditCostEstimate {
const openRouter = roundUsd(
(nonNegativeOrZero(usage.openRouter?.inputTokens) / 1_000_000) *
nonNegativeOrZero(usage.openRouter?.inputUsdPerMillionTokens) +
(nonNegativeOrZero(usage.openRouter?.outputTokens) / 1_000_000) *
nonNegativeOrZero(usage.openRouter?.outputUsdPerMillionTokens),
);
const screenshotOne = roundUsd(
nonNegativeOrZero(usage.screenshotOne?.screenshots) *
nonNegativeOrZero(usage.screenshotOne?.usdPerScreenshot),
);
const jina = roundUsd(
nonNegativeOrZero(usage.jina?.requests) * nonNegativeOrZero(usage.jina?.usdPerRequest) +
nonNegativeOrZero(usage.jina?.pages) * nonNegativeOrZero(usage.jina?.usdPerPage),
);
const pageSpeed = 0;
return {
byProvider: {
openRouter,
screenshotOne,
jina,
pageSpeed,
},
totalUsd: roundUsd(openRouter + screenshotOne + jina + pageSpeed),
};
}
export function buildScreenshotOneRequests({
accessKey,
targetUrl,
endpoint = SCREENSHOT_ONE_ENDPOINT,
}: BuildScreenshotOneRequestsInput): ScreenshotOneRequest[] {
let normalizedTargetUrl: string;
try {
const parsedTargetUrl = parseWebUrl(targetUrl, "target URL");
normalizedTargetUrl = parsedTargetUrl.toString();
} catch {
throw new Error("Invalid target URL for ScreenshotOne request. Only http and https URLs are supported.");
}
const viewports: Array<{
viewport: ScreenshotOneViewport;
width: number;
height: number;
scale: number;
}> = [
{ viewport: "desktop", width: 1280, height: 900, scale: 1 },
{ viewport: "mobile", width: 390, height: 844, scale: 2 },
];
return viewports.map(({ viewport, width, height, scale }) => {
const requestUrl = new URL(endpoint);
requestUrl.searchParams.set("access_key", accessKey);
requestUrl.searchParams.set("url", normalizedTargetUrl);
requestUrl.searchParams.set("viewport_width", String(width));
requestUrl.searchParams.set("viewport_height", String(height));
requestUrl.searchParams.set("device_scale_factor", String(scale));
requestUrl.searchParams.set("full_page", "true");
requestUrl.searchParams.set("block_cookie_banners", "true");
requestUrl.searchParams.set("block_ads", "true");
requestUrl.searchParams.set("block_trackers", "true");
return {
viewport,
url: requestUrl.toString(),
};
});
}
export function buildJinaReaderAuditInput({
baseUrl,
pages = [],
maxMarkdownChars,
}: BuildJinaReaderAuditInputOptions): JinaReaderAuditInput {
const normalizedBaseUrl = normalizeBaseUrl(baseUrl);
const pagesByUrl = new Map(
pages.map((page) => [normalizeComparableUrl(page.url), page.markdown]),
);
const preparedPages = JINA_PAGE_PATHS.map((path) => {
const sourceUrl = new URL(path, normalizedBaseUrl).toString();
const readerUrl = toJinaReaderUrl(sourceUrl);
return {
path,
sourceUrl,
readerUrl,
};
});
const markdown = preparedPages
.map((page) => {
const pageMarkdown = pagesByUrl.get(normalizeComparableUrl(page.sourceUrl)) ?? "";
return `Source: ${page.sourceUrl}\n\n${pageMarkdown.trim()}`;
})
.join("\n\n---\n\n");
return {
pages: preparedPages,
readerUrls: preparedPages.map((page) => page.readerUrl),
markdown: capMarkdown(markdown, maxMarkdownChars),
};
}
function normalizeBaseUrl(baseUrl: string): URL {
try {
const url = parseWebUrl(baseUrl, "base URL");
url.hash = "";
url.search = "";
url.pathname = "/";
return url;
} catch {
throw new Error("Invalid base URL for Jina Reader input. Only http and https URLs are supported.");
}
}
function normalizeComparableUrl(url: string): string {
const normalized = parseWebUrl(url, "page URL");
normalized.hash = "";
if (normalized.pathname !== "/" && normalized.pathname.endsWith("/")) {
normalized.pathname = normalized.pathname.slice(0, -1);
}
return normalized.toString();
}
function toJinaReaderUrl(sourceUrl: string): string {
const url = parseWebUrl(sourceUrl, "source URL");
return `${JINA_READER_PREFIX}${url.protocol}//${url.host}${url.pathname}${url.search}`;
}
function parseWebUrl(value: string, label: string): URL {
const url = new URL(value);
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`Invalid ${label}. Only http and https URLs are supported.`);
}
return url;
}
function capMarkdown(markdown: string, maxMarkdownChars: number): string {
if (markdown.length <= maxMarkdownChars) {
return markdown;
}
const suffix = `[truncated to ${maxMarkdownChars} chars]`;
const availableChars = Math.max(0, maxMarkdownChars - suffix.length);
return `${markdown.slice(0, availableChars)}${suffix}`;
}