Files
webdev-pipeline/convex/leadDiscovery.ts

681 lines
20 KiB
TypeScript

import { v } from "convex/values";
import {
GOOGLE_PLACES_FIELD_MASK,
buildGeocodingUrl,
getBlacklistLookupValues,
getBlacklistMatches,
getCandidateEmailValues,
getPlacesSearchSpec,
normalizeDomain,
normalizePhone,
normalizeText,
normalizePlacesResponse,
parseGeocodingResponse,
} from "../lib/lead-discovery-google";
import {
buildLeadDiscoveryLeadRecord,
buildLeadDiscoveryCounters,
getLeadDiscoveryPriority,
shouldScheduleWebsiteEnrichment,
} from "../lib/lead-discovery-run";
import { calculateNextRunAt } from "../lib/campaign-scheduling";
import { internal } from "./_generated/api";
import { Doc, Id } from "./_generated/dataModel";
import { internalAction, internalMutation } from "./_generated/server";
type CampaignDoc = Doc<"campaigns">;
const nullableString = v.union(v.string(), v.null());
const nullableNumber = v.union(v.number(), v.null());
const candidateValidator = v.object({
placeId: v.string(),
businessName: v.string(),
address: v.string(),
websiteUrl: nullableString,
websiteDomain: nullableString,
phone: nullableString,
rating: nullableNumber,
userRatingCount: nullableNumber,
businessStatus: nullableString,
googleTypes: v.array(v.string()),
googlePrimaryType: nullableString,
googleMapsUrl: nullableString,
email: v.optional(nullableString),
emailSource: v.optional(nullableString),
contactPerson: v.optional(nullableString),
isBusinessContactAddress: v.optional(v.boolean()),
contactEmails: v.optional(
v.array(
v.object({
email: v.string(),
emailSource: v.optional(nullableString),
contactPerson: v.optional(nullableString),
isBusinessContactAddress: v.optional(v.boolean()),
}),
),
),
sourceProvider: v.literal("google_places"),
sourceFetchedAt: v.number(),
});
const eventDetailValidator = v.object({
label: v.string(),
value: v.string(),
source: v.optional(v.string()),
});
function getRequiredEnv(key: string) {
const value = process.env[key]?.trim();
if (!value) {
throw new Error(`${key} ist nicht gesetzt.`);
}
return value;
}
function messageFromError(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
function getCampaignNiche(campaign: CampaignDoc) {
if (
campaign.categoryMode === "custom" ||
campaign.category === "Anderes"
) {
return campaign.customSearchTerm?.trim() || campaign.category;
}
return campaign.category;
}
async function fetchJson(url: string, init?: RequestInit) {
const response = await fetch(url, init);
if (!response.ok) {
const body = await response.text();
throw new Error(
`Google API request failed with HTTP ${response.status}: ${body.slice(0, 500)}`,
);
}
return await response.json();
}
export const processCampaignRun = internalAction({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const started: {
campaign: CampaignDoc;
runId: Id<"agentRuns">;
} | null = await ctx.runMutation(internal.leadDiscovery.startCampaignRun, {
runId: args.runId,
});
if (!started) {
return null;
}
try {
const geocodingApiKey = getRequiredEnv("GOOGLE_GEOCODING_API_KEY");
const placesApiKey = getRequiredEnv("GOOGLE_PLACES_API_KEY");
const campaign = started.campaign;
const fetchedAt = Date.now();
let latitude = campaign.latitude;
let longitude = campaign.longitude;
if (typeof latitude !== "number" || typeof longitude !== "number") {
const geocodingUrl = buildGeocodingUrl({
postalCode: campaign.postalCode,
apiKey: geocodingApiKey,
});
const geocodingJson = await fetchJson(geocodingUrl);
const geocoding = parseGeocodingResponse(geocodingJson, fetchedAt);
latitude = geocoding.latitude;
longitude = geocoding.longitude;
await ctx.runMutation(internal.leadDiscovery.cacheCampaignGeocode, {
campaignId: campaign._id,
latitude,
longitude,
geocodedAt: geocoding.fetchedAt,
geocodingPlaceId: geocoding.placeId,
geocodingFormattedAddress: geocoding.formattedAddress,
});
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "PLZ geocodiert.",
details: [
{ label: "PLZ", value: campaign.postalCode, source: "google_geocoding" },
{
label: "Koordinaten",
value: `${latitude}, ${longitude}`,
source: "google_geocoding",
},
],
});
} else {
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "Geocoding-Cache der Kampagne verwendet.",
details: [
{ label: "PLZ", value: campaign.postalCode },
{ label: "Koordinaten", value: `${latitude}, ${longitude}` },
],
});
}
const searchSpec = getPlacesSearchSpec({
categoryMode: campaign.categoryMode,
category: campaign.category,
customSearchTerm: campaign.customSearchTerm,
postalCode: campaign.postalCode,
radiusKm: campaign.radiusKm,
latitude,
longitude,
});
const placesJson = await fetchJson(
`https://places.googleapis.com/v1/places:${searchSpec.endpoint}`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Goog-Api-Key": placesApiKey,
"X-Goog-FieldMask": GOOGLE_PLACES_FIELD_MASK,
},
body: JSON.stringify(searchSpec.body),
},
);
const candidates = normalizePlacesResponse(placesJson, Date.now());
if (candidates.length === 0) {
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "warning",
message: "Google Places lieferte keine Ergebnisse.",
details: [
{ label: "Suchtyp", value: searchSpec.searchType, source: "google_places" },
{ label: "Kategorie", value: getCampaignNiche(campaign), source: "google_places" },
],
});
}
const result: {
leadsFound: number;
leadsCreated: number;
skippedDuplicates: number;
skippedBlacklisted: number;
errors: number;
websiteEnrichmentQueue: Array<{
leadId: Id<"leads">;
companyName: string;
website: string;
}>;
} = await ctx.runMutation(internal.leadDiscovery.persistDiscoveredLeads, {
runId: args.runId,
campaignId: campaign._id,
maxNewLeads: campaign.maxNewLeadsPerRun,
niche: getCampaignNiche(campaign),
postalCode: campaign.postalCode,
candidates,
});
for (const enrichment of result.websiteEnrichmentQueue) {
await ctx.runMutation(internal.websiteEnrichment.queueLeadEnrichment, {
leadId: enrichment.leadId,
parentRunId: args.runId,
});
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "Website-Kontaktanreicherung geplant.",
details: [
{
label: "Unternehmen",
value: enrichment.companyName,
source: "google_places",
},
{
label: "Website",
value: enrichment.website,
source: "google_places",
},
],
});
}
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "succeeded",
currentStep: "lead_discovery",
counters: buildLeadDiscoveryCounters({
leadsFound: result.leadsFound,
leadsCreated: result.leadsCreated,
errors: result.errors,
}),
});
return result;
} catch (error) {
const errorSummary = messageFromError(error);
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "error",
message: "Lead-Recherche fehlgeschlagen.",
details: [{ label: "Fehler", value: errorSummary }],
});
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "failed",
currentStep: "lead_discovery",
errorSummary,
counters: buildLeadDiscoveryCounters({
leadsFound: 0,
leadsCreated: 0,
errors: 1,
}),
});
return null;
}
},
});
export const startCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const now = Date.now();
const run = await ctx.db.get(args.runId);
if (!run || !run.campaignId || run.status !== "pending") {
return null;
}
const activeRunning = await ctx.db
.query("agentRuns")
.withIndex("by_type_and_status", (q) =>
q.eq("type", "campaign").eq("status", "running"),
)
.take(1);
if (activeRunning.length > 0) {
await ctx.db.patch(args.runId, {
status: "canceled",
currentStep: "lead_discovery",
errorSummary: "Ein anderer Agentenlauf ist bereits aktiv.",
finishedAt: now,
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Lauf nicht gestartet, weil ein anderer Agentenlauf aktiv ist.",
createdAt: now,
});
return null;
}
const campaign = await ctx.db.get(run.campaignId);
if (!campaign) {
await ctx.db.patch(args.runId, {
status: "failed",
currentStep: "lead_discovery",
errorSummary: "Kampagne nicht gefunden.",
finishedAt: now,
updatedAt: now,
});
return null;
}
await ctx.db.patch(args.runId, {
status: "running",
currentStep: "lead_discovery",
startedAt: now,
updatedAt: now,
});
await ctx.db.patch(campaign._id, {
lastRunAt: now,
nextRunAt: calculateNextRunAt({
recurrence: campaign.recurrence,
status: campaign.status,
lastRunAt: now,
now,
}),
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche gestartet.",
details: [
{ label: "Kampagne", value: campaign.name },
{ label: "PLZ", value: campaign.postalCode },
],
createdAt: now,
});
return { runId: args.runId, campaign };
},
});
export const cacheCampaignGeocode = internalMutation({
args: {
campaignId: v.id("campaigns"),
latitude: v.number(),
longitude: v.number(),
geocodedAt: v.number(),
geocodingPlaceId: v.string(),
geocodingFormattedAddress: v.string(),
},
handler: async (ctx, args) => {
await ctx.db.patch(args.campaignId, {
latitude: args.latitude,
longitude: args.longitude,
geocodedAt: args.geocodedAt,
geocodingPlaceId: args.geocodingPlaceId,
geocodingFormattedAddress: args.geocodingFormattedAddress,
updatedAt: Date.now(),
});
},
});
export const appendRunEvent = internalMutation({
args: {
runId: v.id("agentRuns"),
level: v.union(v.literal("info"), v.literal("warning"), v.literal("error")),
message: v.string(),
details: v.optional(v.array(eventDetailValidator)),
},
handler: async (ctx, args) => {
await ctx.db.insert("agentRunEvents", {
...args,
createdAt: Date.now(),
});
},
});
export const persistDiscoveredLeads = internalMutation({
args: {
runId: v.id("agentRuns"),
campaignId: v.id("campaigns"),
maxNewLeads: v.number(),
niche: v.string(),
postalCode: v.string(),
candidates: v.array(candidateValidator),
},
handler: async (ctx, args) => {
const now = Date.now();
let leadsCreated = 0;
let skippedDuplicates = 0;
let skippedBlacklisted = 0;
let errors = 0;
const websiteEnrichmentQueue: Array<{
leadId: Id<"leads">;
companyName: string;
website: string;
}> = [];
for (const candidate of args.candidates) {
if (leadsCreated >= args.maxNewLeads) {
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Limit des Laufs erreicht.",
details: [{ label: "Limit", value: String(args.maxNewLeads) }],
createdAt: Date.now(),
});
break;
}
if (!candidate.businessName.trim()) {
errors += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Google-Places-Ergebnis ohne Unternehmensname übersprungen.",
details: [{ label: "Place ID", value: candidate.placeId }],
createdAt: Date.now(),
});
continue;
}
const normalizedPlaceId = normalizeDomain(candidate.placeId);
const normalizedDomain = normalizeDomain(candidate.websiteDomain);
const normalizedEmails = getCandidateEmailValues(candidate);
const normalizedPhone = normalizePhone(candidate.phone);
const normalizedCompanyName = normalizeText(candidate.businessName);
const normalizedAddress = normalizeText(candidate.address);
const duplicateByPlaceId = normalizedPlaceId
? await ctx.db
.query("leads")
.withIndex("by_normalizedGooglePlaceId", (q) =>
q.eq("normalizedGooglePlaceId", normalizedPlaceId),
)
.take(1)
: [];
const duplicateByDomain = normalizedDomain
? await ctx.db
.query("leads")
.withIndex("by_websiteDomain", (q) => q.eq("websiteDomain", normalizedDomain))
.take(1)
: [];
const duplicateByEmailRows = [];
for (const email of normalizedEmails) {
const rows = await ctx.db
.query("leads")
.withIndex("by_normalizedEmail", (q) => q.eq("normalizedEmail", email))
.take(1);
duplicateByEmailRows.push(...rows);
}
if (
duplicateByPlaceId.length > 0 ||
duplicateByDomain.length > 0 ||
duplicateByEmailRows.length > 0
) {
skippedDuplicates += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Doppelter Lead übersprungen.",
details: [
{ label: "Unternehmen", value: candidate.businessName, source: "google_places" },
{ label: "Place ID", value: candidate.placeId, source: "google_places" },
],
createdAt: Date.now(),
});
continue;
}
const probableDuplicateByPhone = normalizedPhone
? await ctx.db
.query("leads")
.withIndex("by_normalizedPhone", (q) =>
q.eq("normalizedPhone", normalizedPhone),
)
.take(1)
: [];
const probableDuplicateByAddress = normalizedCompanyName && normalizedAddress
? await ctx.db
.query("leads")
.withIndex("by_normalizedCompanyName_and_normalizedAddress", (q) =>
q
.eq("normalizedCompanyName", normalizedCompanyName)
.eq("normalizedAddress", normalizedAddress),
)
.take(1)
: [];
const probableDuplicateLead =
probableDuplicateByPhone[0] ?? probableDuplicateByAddress[0] ?? null;
const blacklistRows = [];
for (const lookup of getBlacklistLookupValues(candidate)) {
const rows = await ctx.db
.query("blacklistEntries")
.withIndex("by_type_and_normalizedValue", (q) =>
q
.eq("type", lookup.type)
.eq("normalizedValue", lookup.normalizedValue),
)
.take(1);
blacklistRows.push(...rows);
}
const blacklistMatches = getBlacklistMatches(candidate, blacklistRows);
if (blacklistMatches.length > 0) {
skippedBlacklisted += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Gesperrter Lead übersprungen.",
details: blacklistMatches.map((match) => ({
label: match.type,
value: match.value,
source: "blacklist",
})),
createdAt: Date.now(),
});
continue;
}
const lead = buildLeadDiscoveryLeadRecord({
campaignId: args.campaignId,
runId: args.runId,
niche: args.niche,
postalCode: args.postalCode,
candidate,
now,
});
const hasWebsite = Boolean(candidate.websiteUrl ?? candidate.websiteDomain);
const priorityResult = getLeadDiscoveryPriority({
isDuplicate: !!probableDuplicateLead,
hasWebsite,
hasWebsiteSignal: false, // plain Google-Places website hint maps to medium priority.
});
const isDuplicateCandidate = !!probableDuplicateLead;
if (normalizedPlaceId) {
lead.normalizedGooglePlaceId = normalizedPlaceId;
}
if (normalizedPhone !== "") {
lead.normalizedPhone = normalizedPhone;
}
if (normalizedCompanyName !== "") {
lead.normalizedCompanyName = normalizedCompanyName;
}
if (normalizedAddress !== "") {
lead.normalizedAddress = normalizedAddress;
}
lead.priority = priorityResult.priority;
lead.priorityReason = priorityResult.reason;
if (isDuplicateCandidate) {
lead.duplicateStatus = "possible_duplicate";
lead.duplicateReason = `Möglicher Dublettenkandidat zu Lead ${probableDuplicateLead._id}`;
lead.duplicateOfLeadId = probableDuplicateLead._id;
}
const leadId = await ctx.db.insert("leads", lead);
leadsCreated += 1;
if (shouldScheduleWebsiteEnrichment(lead)) {
websiteEnrichmentQueue.push({
leadId,
companyName: lead.companyName,
website: lead.websiteDomain ?? lead.websiteUrl ?? "unbekannt",
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead aus Google Places gespeichert.",
details: [
{ label: "Unternehmen", value: candidate.businessName, source: "google_places" },
{ label: "Place ID", value: candidate.placeId, source: "google_places" },
],
createdAt: Date.now(),
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche abgeschlossen.",
details: [
{ label: "Gefunden", value: String(args.candidates.length) },
{ label: "Gespeichert", value: String(leadsCreated) },
{ label: "Dubletten übersprungen", value: String(skippedDuplicates) },
{ label: "Sperrliste übersprungen", value: String(skippedBlacklisted) },
],
createdAt: Date.now(),
});
return {
leadsFound: args.candidates.length,
leadsCreated,
skippedDuplicates,
skippedBlacklisted,
errors,
websiteEnrichmentQueue,
};
},
});
export const finishCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
status: v.union(v.literal("succeeded"), v.literal("failed"), v.literal("canceled")),
currentStep: v.optional(v.string()),
errorSummary: v.optional(v.string()),
counters: v.object({
leadsFound: v.number(),
leadsCreated: v.number(),
auditsCreated: v.number(),
outreachPrepared: v.number(),
errors: v.number(),
}),
},
handler: async (ctx, args) => {
const patch: {
status: typeof args.status;
updatedAt: number;
finishedAt: number;
counters: typeof args.counters;
currentStep?: string;
errorSummary?: string;
} = {
status: args.status,
updatedAt: Date.now(),
finishedAt: Date.now(),
counters: args.counters,
};
if (args.currentStep !== undefined) {
patch.currentStep = args.currentStep;
}
if (args.errorSummary !== undefined) {
patch.errorSummary = args.errorSummary;
}
await ctx.db.patch(args.runId, patch);
},
});