Externalize audit pipeline services
This commit is contained in:
233
lib/external-audit-services.ts
Normal file
233
lib/external-audit-services.ts
Normal file
@@ -0,0 +1,233 @@
|
||||
export type ExternalAuditUsageInput = {
|
||||
openRouter?: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
inputUsdPerMillionTokens?: number;
|
||||
outputUsdPerMillionTokens?: number;
|
||||
};
|
||||
screenshotOne?: {
|
||||
screenshots?: number;
|
||||
usdPerScreenshot?: number;
|
||||
};
|
||||
jina?: {
|
||||
requests?: number;
|
||||
pages?: number;
|
||||
usdPerRequest?: number;
|
||||
usdPerPage?: number;
|
||||
};
|
||||
pageSpeed?: {
|
||||
requests?: number;
|
||||
};
|
||||
};
|
||||
|
||||
export type ExternalAuditCostEstimate = {
|
||||
byProvider: {
|
||||
openRouter: number;
|
||||
screenshotOne: number;
|
||||
jina: number;
|
||||
pageSpeed: number;
|
||||
};
|
||||
totalUsd: number;
|
||||
};
|
||||
|
||||
export type ScreenshotOneViewport = "desktop" | "mobile";
|
||||
|
||||
export type ScreenshotOneRequest = {
|
||||
viewport: ScreenshotOneViewport;
|
||||
url: string;
|
||||
};
|
||||
|
||||
export type BuildScreenshotOneRequestsInput = {
|
||||
accessKey: string;
|
||||
targetUrl: string;
|
||||
endpoint?: string;
|
||||
};
|
||||
|
||||
export type JinaReaderPagePath = "/" | "/kontakt" | "/impressum" | "/leistungen" | "/ueber-uns";
|
||||
|
||||
export type JinaReaderPageInput = {
|
||||
url: string;
|
||||
markdown: string;
|
||||
};
|
||||
|
||||
export type JinaReaderAuditInput = {
|
||||
pages: Array<{
|
||||
path: JinaReaderPagePath;
|
||||
sourceUrl: string;
|
||||
readerUrl: string;
|
||||
}>;
|
||||
readerUrls: string[];
|
||||
markdown: string;
|
||||
};
|
||||
|
||||
export type BuildJinaReaderAuditInputOptions = {
|
||||
baseUrl: string;
|
||||
pages?: JinaReaderPageInput[];
|
||||
maxMarkdownChars: number;
|
||||
};
|
||||
|
||||
const SCREENSHOT_ONE_ENDPOINT = "https://api.screenshotone.com/take";
|
||||
const JINA_READER_PREFIX = "https://r.jina.ai/";
|
||||
const JINA_PAGE_PATHS: JinaReaderPagePath[] = [
|
||||
"/",
|
||||
"/kontakt",
|
||||
"/impressum",
|
||||
"/leistungen",
|
||||
"/ueber-uns",
|
||||
];
|
||||
|
||||
function roundUsd(value: number): number {
|
||||
return Math.round((value + Number.EPSILON) * 1_000_000) / 1_000_000;
|
||||
}
|
||||
|
||||
function nonNegativeOrZero(value: number | undefined): number {
|
||||
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : 0;
|
||||
}
|
||||
|
||||
export function estimateExternalAuditCostUsd(
|
||||
usage: ExternalAuditUsageInput,
|
||||
): ExternalAuditCostEstimate {
|
||||
const openRouter = roundUsd(
|
||||
(nonNegativeOrZero(usage.openRouter?.inputTokens) / 1_000_000) *
|
||||
nonNegativeOrZero(usage.openRouter?.inputUsdPerMillionTokens) +
|
||||
(nonNegativeOrZero(usage.openRouter?.outputTokens) / 1_000_000) *
|
||||
nonNegativeOrZero(usage.openRouter?.outputUsdPerMillionTokens),
|
||||
);
|
||||
const screenshotOne = roundUsd(
|
||||
nonNegativeOrZero(usage.screenshotOne?.screenshots) *
|
||||
nonNegativeOrZero(usage.screenshotOne?.usdPerScreenshot),
|
||||
);
|
||||
const jina = roundUsd(
|
||||
nonNegativeOrZero(usage.jina?.requests) * nonNegativeOrZero(usage.jina?.usdPerRequest) +
|
||||
nonNegativeOrZero(usage.jina?.pages) * nonNegativeOrZero(usage.jina?.usdPerPage),
|
||||
);
|
||||
const pageSpeed = 0;
|
||||
|
||||
return {
|
||||
byProvider: {
|
||||
openRouter,
|
||||
screenshotOne,
|
||||
jina,
|
||||
pageSpeed,
|
||||
},
|
||||
totalUsd: roundUsd(openRouter + screenshotOne + jina + pageSpeed),
|
||||
};
|
||||
}
|
||||
|
||||
export function buildScreenshotOneRequests({
|
||||
accessKey,
|
||||
targetUrl,
|
||||
endpoint = SCREENSHOT_ONE_ENDPOINT,
|
||||
}: BuildScreenshotOneRequestsInput): ScreenshotOneRequest[] {
|
||||
let normalizedTargetUrl: string;
|
||||
try {
|
||||
const parsedTargetUrl = parseWebUrl(targetUrl, "target URL");
|
||||
normalizedTargetUrl = parsedTargetUrl.toString();
|
||||
} catch {
|
||||
throw new Error("Invalid target URL for ScreenshotOne request. Only http and https URLs are supported.");
|
||||
}
|
||||
|
||||
const viewports: Array<{
|
||||
viewport: ScreenshotOneViewport;
|
||||
width: number;
|
||||
height: number;
|
||||
scale: number;
|
||||
}> = [
|
||||
{ viewport: "desktop", width: 1280, height: 900, scale: 1 },
|
||||
{ viewport: "mobile", width: 390, height: 844, scale: 2 },
|
||||
];
|
||||
|
||||
return viewports.map(({ viewport, width, height, scale }) => {
|
||||
const requestUrl = new URL(endpoint);
|
||||
requestUrl.searchParams.set("access_key", accessKey);
|
||||
requestUrl.searchParams.set("url", normalizedTargetUrl);
|
||||
requestUrl.searchParams.set("viewport_width", String(width));
|
||||
requestUrl.searchParams.set("viewport_height", String(height));
|
||||
requestUrl.searchParams.set("device_scale_factor", String(scale));
|
||||
requestUrl.searchParams.set("full_page", "true");
|
||||
requestUrl.searchParams.set("block_cookie_banners", "true");
|
||||
requestUrl.searchParams.set("block_ads", "true");
|
||||
requestUrl.searchParams.set("block_trackers", "true");
|
||||
|
||||
return {
|
||||
viewport,
|
||||
url: requestUrl.toString(),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function buildJinaReaderAuditInput({
|
||||
baseUrl,
|
||||
pages = [],
|
||||
maxMarkdownChars,
|
||||
}: BuildJinaReaderAuditInputOptions): JinaReaderAuditInput {
|
||||
const normalizedBaseUrl = normalizeBaseUrl(baseUrl);
|
||||
const pagesByUrl = new Map(
|
||||
pages.map((page) => [normalizeComparableUrl(page.url), page.markdown]),
|
||||
);
|
||||
const preparedPages = JINA_PAGE_PATHS.map((path) => {
|
||||
const sourceUrl = new URL(path, normalizedBaseUrl).toString();
|
||||
const readerUrl = toJinaReaderUrl(sourceUrl);
|
||||
return {
|
||||
path,
|
||||
sourceUrl,
|
||||
readerUrl,
|
||||
};
|
||||
});
|
||||
const markdown = preparedPages
|
||||
.map((page) => {
|
||||
const pageMarkdown = pagesByUrl.get(normalizeComparableUrl(page.sourceUrl)) ?? "";
|
||||
return `Source: ${page.sourceUrl}\n\n${pageMarkdown.trim()}`;
|
||||
})
|
||||
.join("\n\n---\n\n");
|
||||
|
||||
return {
|
||||
pages: preparedPages,
|
||||
readerUrls: preparedPages.map((page) => page.readerUrl),
|
||||
markdown: capMarkdown(markdown, maxMarkdownChars),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(baseUrl: string): URL {
|
||||
try {
|
||||
const url = parseWebUrl(baseUrl, "base URL");
|
||||
url.hash = "";
|
||||
url.search = "";
|
||||
url.pathname = "/";
|
||||
return url;
|
||||
} catch {
|
||||
throw new Error("Invalid base URL for Jina Reader input. Only http and https URLs are supported.");
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeComparableUrl(url: string): string {
|
||||
const normalized = parseWebUrl(url, "page URL");
|
||||
normalized.hash = "";
|
||||
if (normalized.pathname !== "/" && normalized.pathname.endsWith("/")) {
|
||||
normalized.pathname = normalized.pathname.slice(0, -1);
|
||||
}
|
||||
return normalized.toString();
|
||||
}
|
||||
|
||||
function toJinaReaderUrl(sourceUrl: string): string {
|
||||
const url = parseWebUrl(sourceUrl, "source URL");
|
||||
return `${JINA_READER_PREFIX}${url.protocol}//${url.host}${url.pathname}${url.search}`;
|
||||
}
|
||||
|
||||
function parseWebUrl(value: string, label: string): URL {
|
||||
const url = new URL(value);
|
||||
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
||||
throw new Error(`Invalid ${label}. Only http and https URLs are supported.`);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function capMarkdown(markdown: string, maxMarkdownChars: number): string {
|
||||
if (markdown.length <= maxMarkdownChars) {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
const suffix = `[truncated to ${maxMarkdownChars} chars]`;
|
||||
const availableChars = Math.max(0, maxMarkdownChars - suffix.length);
|
||||
return `${markdown.slice(0, availableChars)}${suffix}`;
|
||||
}
|
||||
Reference in New Issue
Block a user