234 lines
6.6 KiB
TypeScript
234 lines
6.6 KiB
TypeScript
export type ExternalAuditUsageInput = {
|
|
openRouter?: {
|
|
inputTokens?: number;
|
|
outputTokens?: number;
|
|
inputUsdPerMillionTokens?: number;
|
|
outputUsdPerMillionTokens?: number;
|
|
};
|
|
screenshotOne?: {
|
|
screenshots?: number;
|
|
usdPerScreenshot?: number;
|
|
};
|
|
jina?: {
|
|
requests?: number;
|
|
pages?: number;
|
|
usdPerRequest?: number;
|
|
usdPerPage?: number;
|
|
};
|
|
pageSpeed?: {
|
|
requests?: number;
|
|
};
|
|
};
|
|
|
|
export type ExternalAuditCostEstimate = {
|
|
byProvider: {
|
|
openRouter: number;
|
|
screenshotOne: number;
|
|
jina: number;
|
|
pageSpeed: number;
|
|
};
|
|
totalUsd: number;
|
|
};
|
|
|
|
export type ScreenshotOneViewport = "desktop" | "mobile";
|
|
|
|
export type ScreenshotOneRequest = {
|
|
viewport: ScreenshotOneViewport;
|
|
url: string;
|
|
};
|
|
|
|
export type BuildScreenshotOneRequestsInput = {
|
|
accessKey: string;
|
|
targetUrl: string;
|
|
endpoint?: string;
|
|
};
|
|
|
|
export type JinaReaderPagePath = "/" | "/kontakt" | "/impressum" | "/leistungen" | "/ueber-uns";
|
|
|
|
export type JinaReaderPageInput = {
|
|
url: string;
|
|
markdown: string;
|
|
};
|
|
|
|
export type JinaReaderAuditInput = {
|
|
pages: Array<{
|
|
path: JinaReaderPagePath;
|
|
sourceUrl: string;
|
|
readerUrl: string;
|
|
}>;
|
|
readerUrls: string[];
|
|
markdown: string;
|
|
};
|
|
|
|
export type BuildJinaReaderAuditInputOptions = {
|
|
baseUrl: string;
|
|
pages?: JinaReaderPageInput[];
|
|
maxMarkdownChars: number;
|
|
};
|
|
|
|
const SCREENSHOT_ONE_ENDPOINT = "https://api.screenshotone.com/take";
|
|
const JINA_READER_PREFIX = "https://r.jina.ai/";
|
|
const JINA_PAGE_PATHS: JinaReaderPagePath[] = [
|
|
"/",
|
|
"/kontakt",
|
|
"/impressum",
|
|
"/leistungen",
|
|
"/ueber-uns",
|
|
];
|
|
|
|
function roundUsd(value: number): number {
|
|
return Math.round((value + Number.EPSILON) * 1_000_000) / 1_000_000;
|
|
}
|
|
|
|
function nonNegativeOrZero(value: number | undefined): number {
|
|
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : 0;
|
|
}
|
|
|
|
export function estimateExternalAuditCostUsd(
|
|
usage: ExternalAuditUsageInput,
|
|
): ExternalAuditCostEstimate {
|
|
const openRouter = roundUsd(
|
|
(nonNegativeOrZero(usage.openRouter?.inputTokens) / 1_000_000) *
|
|
nonNegativeOrZero(usage.openRouter?.inputUsdPerMillionTokens) +
|
|
(nonNegativeOrZero(usage.openRouter?.outputTokens) / 1_000_000) *
|
|
nonNegativeOrZero(usage.openRouter?.outputUsdPerMillionTokens),
|
|
);
|
|
const screenshotOne = roundUsd(
|
|
nonNegativeOrZero(usage.screenshotOne?.screenshots) *
|
|
nonNegativeOrZero(usage.screenshotOne?.usdPerScreenshot),
|
|
);
|
|
const jina = roundUsd(
|
|
nonNegativeOrZero(usage.jina?.requests) * nonNegativeOrZero(usage.jina?.usdPerRequest) +
|
|
nonNegativeOrZero(usage.jina?.pages) * nonNegativeOrZero(usage.jina?.usdPerPage),
|
|
);
|
|
const pageSpeed = 0;
|
|
|
|
return {
|
|
byProvider: {
|
|
openRouter,
|
|
screenshotOne,
|
|
jina,
|
|
pageSpeed,
|
|
},
|
|
totalUsd: roundUsd(openRouter + screenshotOne + jina + pageSpeed),
|
|
};
|
|
}
|
|
|
|
export function buildScreenshotOneRequests({
|
|
accessKey,
|
|
targetUrl,
|
|
endpoint = SCREENSHOT_ONE_ENDPOINT,
|
|
}: BuildScreenshotOneRequestsInput): ScreenshotOneRequest[] {
|
|
let normalizedTargetUrl: string;
|
|
try {
|
|
const parsedTargetUrl = parseWebUrl(targetUrl, "target URL");
|
|
normalizedTargetUrl = parsedTargetUrl.toString();
|
|
} catch {
|
|
throw new Error("Invalid target URL for ScreenshotOne request. Only http and https URLs are supported.");
|
|
}
|
|
|
|
const viewports: Array<{
|
|
viewport: ScreenshotOneViewport;
|
|
width: number;
|
|
height: number;
|
|
scale: number;
|
|
}> = [
|
|
{ viewport: "desktop", width: 1280, height: 900, scale: 1 },
|
|
{ viewport: "mobile", width: 390, height: 844, scale: 2 },
|
|
];
|
|
|
|
return viewports.map(({ viewport, width, height, scale }) => {
|
|
const requestUrl = new URL(endpoint);
|
|
requestUrl.searchParams.set("access_key", accessKey);
|
|
requestUrl.searchParams.set("url", normalizedTargetUrl);
|
|
requestUrl.searchParams.set("viewport_width", String(width));
|
|
requestUrl.searchParams.set("viewport_height", String(height));
|
|
requestUrl.searchParams.set("device_scale_factor", String(scale));
|
|
requestUrl.searchParams.set("full_page", "true");
|
|
requestUrl.searchParams.set("block_cookie_banners", "true");
|
|
requestUrl.searchParams.set("block_ads", "true");
|
|
requestUrl.searchParams.set("block_trackers", "true");
|
|
|
|
return {
|
|
viewport,
|
|
url: requestUrl.toString(),
|
|
};
|
|
});
|
|
}
|
|
|
|
export function buildJinaReaderAuditInput({
|
|
baseUrl,
|
|
pages = [],
|
|
maxMarkdownChars,
|
|
}: BuildJinaReaderAuditInputOptions): JinaReaderAuditInput {
|
|
const normalizedBaseUrl = normalizeBaseUrl(baseUrl);
|
|
const pagesByUrl = new Map(
|
|
pages.map((page) => [normalizeComparableUrl(page.url), page.markdown]),
|
|
);
|
|
const preparedPages = JINA_PAGE_PATHS.map((path) => {
|
|
const sourceUrl = new URL(path, normalizedBaseUrl).toString();
|
|
const readerUrl = toJinaReaderUrl(sourceUrl);
|
|
return {
|
|
path,
|
|
sourceUrl,
|
|
readerUrl,
|
|
};
|
|
});
|
|
const markdown = preparedPages
|
|
.map((page) => {
|
|
const pageMarkdown = pagesByUrl.get(normalizeComparableUrl(page.sourceUrl)) ?? "";
|
|
return `Source: ${page.sourceUrl}\n\n${pageMarkdown.trim()}`;
|
|
})
|
|
.join("\n\n---\n\n");
|
|
|
|
return {
|
|
pages: preparedPages,
|
|
readerUrls: preparedPages.map((page) => page.readerUrl),
|
|
markdown: capMarkdown(markdown, maxMarkdownChars),
|
|
};
|
|
}
|
|
|
|
function normalizeBaseUrl(baseUrl: string): URL {
|
|
try {
|
|
const url = parseWebUrl(baseUrl, "base URL");
|
|
url.hash = "";
|
|
url.search = "";
|
|
url.pathname = "/";
|
|
return url;
|
|
} catch {
|
|
throw new Error("Invalid base URL for Jina Reader input. Only http and https URLs are supported.");
|
|
}
|
|
}
|
|
|
|
function normalizeComparableUrl(url: string): string {
|
|
const normalized = parseWebUrl(url, "page URL");
|
|
normalized.hash = "";
|
|
if (normalized.pathname !== "/" && normalized.pathname.endsWith("/")) {
|
|
normalized.pathname = normalized.pathname.slice(0, -1);
|
|
}
|
|
return normalized.toString();
|
|
}
|
|
|
|
function toJinaReaderUrl(sourceUrl: string): string {
|
|
const url = parseWebUrl(sourceUrl, "source URL");
|
|
return `${JINA_READER_PREFIX}${url.protocol}//${url.host}${url.pathname}${url.search}`;
|
|
}
|
|
|
|
function parseWebUrl(value: string, label: string): URL {
|
|
const url = new URL(value);
|
|
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
throw new Error(`Invalid ${label}. Only http and https URLs are supported.`);
|
|
}
|
|
return url;
|
|
}
|
|
|
|
function capMarkdown(markdown: string, maxMarkdownChars: number): string {
|
|
if (markdown.length <= maxMarkdownChars) {
|
|
return markdown;
|
|
}
|
|
|
|
const suffix = `[truncated to ${maxMarkdownChars} chars]`;
|
|
const availableChars = Math.max(0, maxMarkdownChars - suffix.length);
|
|
return `${markdown.slice(0, availableChars)}${suffix}`;
|
|
}
|