feat: add OpenRouter audit generation pipeline
This commit is contained in:
482
lib/ai/german-copy-guard.ts
Normal file
482
lib/ai/german-copy-guard.ts
Normal file
@@ -0,0 +1,482 @@
|
||||
const GERMAN_MARKERS = new Set([
|
||||
"ich",
|
||||
"mich",
|
||||
"mir",
|
||||
"mein",
|
||||
"meine",
|
||||
"wir",
|
||||
"du",
|
||||
"sie",
|
||||
"er",
|
||||
"sie",
|
||||
"der",
|
||||
"die",
|
||||
"das",
|
||||
"und",
|
||||
"ist",
|
||||
"sind",
|
||||
"sind",
|
||||
"waren",
|
||||
"hat",
|
||||
"habe",
|
||||
"haben",
|
||||
"eine",
|
||||
"einer",
|
||||
"einem",
|
||||
"dieser",
|
||||
"diese",
|
||||
"dieses",
|
||||
"nicht",
|
||||
"mit",
|
||||
"wenn",
|
||||
"für",
|
||||
"bei",
|
||||
]);
|
||||
|
||||
const ENGLISH_MARKERS = new Set([
|
||||
"the",
|
||||
"and",
|
||||
"you",
|
||||
"your",
|
||||
"we",
|
||||
"our",
|
||||
"is",
|
||||
"are",
|
||||
"was",
|
||||
"were",
|
||||
"to",
|
||||
"of",
|
||||
"in",
|
||||
"for",
|
||||
"on",
|
||||
"with",
|
||||
"this",
|
||||
"that",
|
||||
"it",
|
||||
"from",
|
||||
"have",
|
||||
"has",
|
||||
"will",
|
||||
"can",
|
||||
"if",
|
||||
"quick",
|
||||
"audit",
|
||||
"bad",
|
||||
"website",
|
||||
"report",
|
||||
]);
|
||||
|
||||
const OBSERVATION_TOKENS = [
|
||||
/\b(mir|ich)\b[^\n]{0,80}\b(aufgefallen|festgestellt|bemerkt|beobachtet|gesehen|sichtbar)\b/i,
|
||||
/\b(erkennt|zeigt|sichtbar|feststell|finde|fällt)\b/i,
|
||||
/\b(ich sehe|ich habe gesehen|bei der Prüfung)\b/i,
|
||||
];
|
||||
|
||||
const SUGGESTION_TOKENS = [
|
||||
/\b(empfehle|empfiehlt|vorschlage|vorschlagen|schlage vor|könnte helfen|kannst|können wir|sollte|sollten|ich könnte|ich würde|ich empfehle)\b/i,
|
||||
/\b(schlage vor|schlage)\b/i,
|
||||
/\b(ergänzt|ergänzen|anpassen|optimieren|verbessern|prüfen|einbauen|einzusetzen|setzten)\b/i,
|
||||
];
|
||||
|
||||
const AI_SLOP_TOKENS = [
|
||||
/\bmaßgeschneid(?:ert|ert|er)\b/i,
|
||||
/\bnahtlos\b/i,
|
||||
/\bstate[- ]of[- ]the[- ]art\b/i,
|
||||
/\bgame[- ]?changer\b/i,
|
||||
/\bsynerg(?:ie|istisch)\b/i,
|
||||
/\brevolutionär\b/i,
|
||||
/\bnext level\b/i,
|
||||
/\bzukunftsweisend\b/i,
|
||||
/\bdigital transformieren\b/i,
|
||||
/\boutstanding\b/i,
|
||||
/\bhebt.{0,20}Sichtbarkeit\b/i,
|
||||
];
|
||||
|
||||
const HOSTILE_TOKENS = [
|
||||
/\b(Ihr|Ihre|Sie|eure|euer)\b[^\n.!?]{0,80}\b(katastroph|schlecht|veraltet|unprofessionell|unbrauchbar|mangelhaft|chaotisch|desastr|desaster|skrupellos)\b/i,
|
||||
/\b(ist|sind)\s+(?:total|absolut)\s+(?:schlecht|kaputt|katastroph)\b/i,
|
||||
/\babsolut unprofessionell\b/i,
|
||||
];
|
||||
|
||||
const SCORE_CONTEXT_TOKENS = [
|
||||
/\b(?:pagespeed|lighthouse|score)\b[^\n]{0,120}\b\d{1,2}(?:[.,]\d+)?%?/i,
|
||||
/\b\d{1,2}(?:[.,]\d+)?%?[^\n]{0,120}\b(?:pagespeed|lighthouse|score)\b/i,
|
||||
];
|
||||
|
||||
const PRICE_PATTERNS = [
|
||||
/\b\d{1,4}\s*(?:€|EUR|Euro|euro)/,
|
||||
/(?:€|EUR|Euro|euro)\s*\d{1,4}(?:[.,]\d{1,2})?/,
|
||||
/\b(?:preis|preise|kosten)\b[^a-z]{0,5}\d{1,4}\s*(?:€|EUR|Euro|euro)?/i,
|
||||
];
|
||||
|
||||
const RAW_TECH_PATTERNS = [
|
||||
/\braw\s*storage\s*id\b/i,
|
||||
/\bstorage[_-]?id\b/i,
|
||||
/\bmodel[_-]?id\b/i,
|
||||
/\b(?:gpt|claude|gemini|llama|mistral|qwen|mixtral|deepseek|phi|sonar|gemma)\b[-\w]*/i,
|
||||
/\{[^\n]{0,240}:[^\n]{0,240}\}/,
|
||||
/\[[^\n]{0,240}\]/,
|
||||
/\b[0-9a-f]{24}\b/i,
|
||||
];
|
||||
|
||||
export type GermanCopyGuardIssue = {
|
||||
field: string;
|
||||
rule: string;
|
||||
message: string;
|
||||
};
|
||||
|
||||
export type GermanCopyGuardResult = {
|
||||
passed: boolean;
|
||||
issues: GermanCopyGuardIssue[];
|
||||
};
|
||||
|
||||
export type AuditCopy = {
|
||||
summary: string;
|
||||
body: string;
|
||||
};
|
||||
|
||||
export type EmailCopy = {
|
||||
subject: string;
|
||||
body: string;
|
||||
};
|
||||
|
||||
export type CallScriptCopy = {
|
||||
openingLine: string;
|
||||
callScript: string[];
|
||||
closeLine: string;
|
||||
};
|
||||
|
||||
export type FollowUpCopy = {
|
||||
message: string;
|
||||
};
|
||||
|
||||
export type GermanCustomerCopy = {
|
||||
auditSummary?: string;
|
||||
auditBody?: string;
|
||||
emailSubject?: string;
|
||||
emailBody?: string;
|
||||
callScript?: CallScriptCopy;
|
||||
followUp?: string;
|
||||
};
|
||||
|
||||
type ValidationOptions = {
|
||||
requireIchForm?: boolean;
|
||||
requireObservationAndSuggestion?: boolean;
|
||||
skipIfTooShort?: boolean;
|
||||
};
|
||||
|
||||
function addIssue(
|
||||
issues: GermanCopyGuardIssue[],
|
||||
field: string,
|
||||
rule: string,
|
||||
message: string,
|
||||
) {
|
||||
issues.push({ field, rule, message });
|
||||
}
|
||||
|
||||
function tokenizeWords(value: string): string[] {
|
||||
return value
|
||||
.toLowerCase()
|
||||
.match(/[a-zäöüß]{3,}/giu)
|
||||
?.map((token) => token.toLowerCase()) ?? [];
|
||||
}
|
||||
|
||||
function hasGermanAnchor(value: string): boolean {
|
||||
const words = tokenizeWords(value);
|
||||
|
||||
if (!words.length) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (/[äöüß]/i.test(value)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const germanCount = words.reduce(
|
||||
(count, word) => count + (GERMAN_MARKERS.has(word) ? 1 : 0),
|
||||
0,
|
||||
);
|
||||
const englishCount = words.reduce(
|
||||
(count, word) => count + (ENGLISH_MARKERS.has(word) ? 1 : 0),
|
||||
0,
|
||||
);
|
||||
|
||||
if (words.length <= 4) {
|
||||
if (germanCount >= 1) {
|
||||
return true;
|
||||
}
|
||||
return englishCount === 0;
|
||||
}
|
||||
|
||||
if (germanCount >= 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (englishCount === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (englishCount / words.length >= 0.2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function hasIchForm(value: string): boolean {
|
||||
return /\b(ich|mich|mir|mein|meine|meinem|meiner)\b/i.test(value);
|
||||
}
|
||||
|
||||
function hasObservation(value: string): boolean {
|
||||
return OBSERVATION_TOKENS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasSuggestion(value: string): boolean {
|
||||
return SUGGESTION_TOKENS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasAiSlop(value: string): boolean {
|
||||
return AI_SLOP_TOKENS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasHostileTone(value: string): boolean {
|
||||
return HOSTILE_TOKENS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasScoreArtifact(value: string): boolean {
|
||||
return SCORE_CONTEXT_TOKENS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasPrice(value: string): boolean {
|
||||
return PRICE_PATTERNS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function hasRawArtifact(value: string): boolean {
|
||||
return RAW_TECH_PATTERNS.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
function validateTextField(
|
||||
issues: GermanCopyGuardIssue[],
|
||||
field: string,
|
||||
value: string,
|
||||
options: ValidationOptions = {},
|
||||
) {
|
||||
if (options.skipIfTooShort && value.trim().length < 6) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hasGermanAnchor(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"not_german",
|
||||
"Text wirkt nicht ausreichend deutsch.",
|
||||
);
|
||||
}
|
||||
|
||||
if (options.requireIchForm && !hasIchForm(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"missing_ich_form",
|
||||
"Text sollte in Ich-Form geschrieben sein.",
|
||||
);
|
||||
}
|
||||
|
||||
if (hasScoreArtifact(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"pagespeed_score_artifact",
|
||||
"Technische Score-/PageSpeed-Werte sollten nicht im Kunden-Text erscheinen.",
|
||||
);
|
||||
}
|
||||
|
||||
if (hasPrice(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"price_mention",
|
||||
"Preis- oder Währungsangaben sollten im Kunden-Text vermieden werden.",
|
||||
);
|
||||
}
|
||||
|
||||
if (hasAiSlop(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"generic_ai_slop",
|
||||
"Generische KI-Slop-Formulierungen erkannt.",
|
||||
);
|
||||
}
|
||||
|
||||
if (hasHostileTone(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"hostile_tone",
|
||||
"Anklagende oder negativ wertende Sprache wurde erkannt.",
|
||||
);
|
||||
}
|
||||
|
||||
if (hasRawArtifact(value)) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"raw_technical_artifact",
|
||||
"Technische Artefakte im Text erkannt.",
|
||||
);
|
||||
}
|
||||
|
||||
if (options.requireObservationAndSuggestion && (!hasObservation(value) || !hasSuggestion(value))) {
|
||||
addIssue(
|
||||
issues,
|
||||
field,
|
||||
"missing_observation_or_suggestion",
|
||||
"Beobachtung und Vorschlag sollten im gleichen Text erkennbar sein.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function validateCallScriptText(
|
||||
issues: GermanCopyGuardIssue[],
|
||||
linePrefix: string,
|
||||
scriptLine: string,
|
||||
options: ValidationOptions,
|
||||
) {
|
||||
const lineValue = scriptLine?.trim();
|
||||
if (!lineValue) {
|
||||
return;
|
||||
}
|
||||
|
||||
validateTextField(issues, linePrefix, lineValue, options);
|
||||
}
|
||||
|
||||
export function validateAuditCopy(audit: AuditCopy): GermanCopyGuardResult {
|
||||
const issues: GermanCopyGuardIssue[] = [];
|
||||
|
||||
validateTextField(issues, "auditSummary", audit.summary, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
validateTextField(issues, "auditBody", audit.body, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
|
||||
return { passed: issues.length === 0, issues };
|
||||
}
|
||||
|
||||
export function validateEmailCopy(email: EmailCopy): GermanCopyGuardResult {
|
||||
const issues: GermanCopyGuardIssue[] = [];
|
||||
|
||||
validateTextField(issues, "emailSubject", email.subject, { skipIfTooShort: true });
|
||||
validateTextField(issues, "emailBody", email.body, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
|
||||
return { passed: issues.length === 0, issues };
|
||||
}
|
||||
|
||||
export function validateCallScriptCopy(script: CallScriptCopy): GermanCopyGuardResult {
|
||||
const issues: GermanCopyGuardIssue[] = [];
|
||||
|
||||
validateCallScriptText(issues, "callScript.openingLine", script.openingLine, {
|
||||
requireIchForm: true,
|
||||
});
|
||||
validateCallScriptText(issues, "callScript.closeLine", script.closeLine, {
|
||||
requireIchForm: true,
|
||||
});
|
||||
|
||||
script.callScript.forEach((line, index) => {
|
||||
validateCallScriptText(
|
||||
issues,
|
||||
`callScript.callScript[${index}]`,
|
||||
line,
|
||||
{
|
||||
requireIchForm: false,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const scriptConcatenated = [
|
||||
script.openingLine,
|
||||
...script.callScript,
|
||||
script.closeLine,
|
||||
]
|
||||
.filter((line) => line.trim().length > 0)
|
||||
.join(" ");
|
||||
|
||||
if (!hasObservation(scriptConcatenated) || !hasSuggestion(scriptConcatenated)) {
|
||||
addIssue(
|
||||
issues,
|
||||
"callScript",
|
||||
"missing_observation_or_suggestion",
|
||||
"Beobachtung und Vorschlag sollten im Call-Script erkennbar sein.",
|
||||
);
|
||||
}
|
||||
|
||||
return { passed: issues.length === 0, issues };
|
||||
}
|
||||
|
||||
export function validateFollowUpCopy(followUp: FollowUpCopy): GermanCopyGuardResult {
|
||||
const issues: GermanCopyGuardIssue[] = [];
|
||||
|
||||
validateTextField(issues, "followUp", followUp.message, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
|
||||
return { passed: issues.length === 0, issues };
|
||||
}
|
||||
|
||||
export function validateCustomerFacingCopy(input: GermanCustomerCopy): GermanCopyGuardResult {
|
||||
const issues: GermanCopyGuardIssue[] = [];
|
||||
|
||||
if (input.auditSummary !== undefined) {
|
||||
validateTextField(issues, "auditSummary", input.auditSummary, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (input.auditBody !== undefined) {
|
||||
validateTextField(issues, "auditBody", input.auditBody, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (input.emailSubject !== undefined) {
|
||||
validateTextField(issues, "emailSubject", input.emailSubject, {
|
||||
skipIfTooShort: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (input.emailBody !== undefined) {
|
||||
validateTextField(issues, "emailBody", input.emailBody, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (input.callScript) {
|
||||
issues.push(
|
||||
...validateCallScriptCopy({
|
||||
openingLine: input.callScript.openingLine,
|
||||
callScript: [...input.callScript.callScript],
|
||||
closeLine: input.callScript.closeLine,
|
||||
}).issues,
|
||||
);
|
||||
}
|
||||
|
||||
if (input.followUp !== undefined) {
|
||||
validateTextField(issues, "followUp", input.followUp, {
|
||||
requireIchForm: true,
|
||||
requireObservationAndSuggestion: true,
|
||||
});
|
||||
}
|
||||
|
||||
return { passed: issues.length === 0, issues };
|
||||
}
|
||||
Reference in New Issue
Block a user