Files
pitchfast/convex/leadDiscovery.ts

705 lines
21 KiB
TypeScript

import { v } from "convex/values";
import {
getBlacklistLookupValues,
getBlacklistMatches,
getCandidateEmailValues,
getUsableContactEmail,
normalizeDomain,
normalizePhone,
normalizeText,
} from "../lib/lead-discovery-google";
import {
LOCAL_BUSINESS_DATA_HOST,
getLocalBusinessSearchSpec,
normalizeLocalBusinessSearchResponse,
} from "../lib/lead-discovery-local-business";
import {
buildLeadDiscoveryLeadRecord,
buildLeadDiscoveryCounters,
getLeadDiscoveryPriority,
} from "../lib/lead-discovery-run";
import { calculateNextRunAt } from "../lib/campaign-scheduling";
import { internal } from "./_generated/api";
import { Doc, Id } from "./_generated/dataModel";
import { internalAction, internalMutation } from "./_generated/server";
type CampaignDoc = Doc<"campaigns">;
type DuplicateEmailBackfillPatch = Partial<
Pick<
Doc<"leads">,
"normalizedEmail" | "email" | "emailSource" | "contactPerson" | "contactStatus" | "contactStatusReason"
>
> & {
updatedAt: number;
};
const nullableString = v.union(v.string(), v.null());
const nullableNumber = v.union(v.number(), v.null());
const candidateValidator = v.object({
placeId: v.string(),
sourceBusinessId: v.optional(nullableString),
businessName: v.string(),
address: v.string(),
websiteUrl: nullableString,
websiteDomain: nullableString,
phone: nullableString,
rating: nullableNumber,
userRatingCount: nullableNumber,
businessStatus: nullableString,
googleTypes: v.array(v.string()),
googlePrimaryType: nullableString,
googleMapsUrl: nullableString,
email: v.optional(nullableString),
emailSource: v.optional(nullableString),
contactPerson: v.optional(nullableString),
isBusinessContactAddress: v.optional(v.boolean()),
contactEmails: v.optional(
v.array(
v.object({
email: v.string(),
emailSource: v.optional(nullableString),
contactPerson: v.optional(nullableString),
isBusinessContactAddress: v.optional(v.boolean()),
}),
),
),
sourceProvider: v.union(
v.literal("google_places"),
v.literal("local_business_data"),
),
sourceFetchedAt: v.number(),
});
const eventDetailValidator = v.object({
label: v.string(),
value: v.string(),
source: v.optional(v.string()),
});
function getRequiredEnv(key: string) {
const value = process.env[key]?.trim();
if (!value) {
throw new Error(`${key} ist nicht gesetzt.`);
}
return value;
}
function messageFromError(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
function getCampaignNiche(campaign: CampaignDoc) {
if (
campaign.categoryMode === "custom" ||
campaign.category === "Anderes"
) {
return campaign.customSearchTerm?.trim() || campaign.category;
}
return campaign.category;
}
async function fetchJson(url: string, init?: RequestInit) {
const response = await fetch(url, init);
if (!response.ok) {
const body = await response.text();
throw new Error(
`Local Business Data request failed with HTTP ${response.status}: ${body.slice(0, 500)}`,
);
}
return await response.json();
}
export const processCampaignRun = internalAction({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const started: {
campaign: CampaignDoc;
runId: Id<"agentRuns">;
} | null = await ctx.runMutation(internal.leadDiscovery.startCampaignRun, {
runId: args.runId,
});
if (!started) {
return null;
}
try {
const localBusinessDataApiKey = getRequiredEnv("LOCAL_BUSINESS_DATA_API_KEY");
const campaign = started.campaign;
const searchSpec = getLocalBusinessSearchSpec({
categoryMode: campaign.categoryMode,
category: campaign.category,
customSearchTerm: campaign.customSearchTerm,
postalCode: campaign.postalCode,
maxNewLeads: campaign.maxNewLeadsPerRun,
});
const localBusinessJson = await fetchJson(searchSpec.url, {
method: "GET",
headers: {
"X-RapidAPI-Key": localBusinessDataApiKey,
"X-RapidAPI-Host": LOCAL_BUSINESS_DATA_HOST,
},
});
const candidates = normalizeLocalBusinessSearchResponse(
localBusinessJson,
Date.now(),
);
await ctx.runMutation(internal.usageEvents.recordUsageEvent, {
provider: "local_business_data",
operation: "lead_lookup",
runId: args.runId,
estimatedCostUsd: 0,
callCounts: {
requests: 1,
lookups: candidates.length,
},
});
if (candidates.length === 0) {
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "warning",
message: "Local Business Data lieferte keine Ergebnisse.",
details: [
{
label: "Suchquery",
value: searchSpec.query,
source: "local_business_data",
},
{
label: "Kategorie",
value: getCampaignNiche(campaign),
source: "local_business_data",
},
],
});
}
const result: {
leadsFound: number;
leadsCreated: number;
skippedDuplicates: number;
skippedBlacklisted: number;
errors: number;
} = await ctx.runMutation(internal.leadDiscovery.persistDiscoveredLeads, {
runId: args.runId,
campaignId: campaign._id,
maxNewLeads: campaign.maxNewLeadsPerRun,
niche: getCampaignNiche(campaign),
postalCode: campaign.postalCode,
candidates,
});
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "succeeded",
currentStep: "lead_discovery",
counters: buildLeadDiscoveryCounters({
leadsFound: result.leadsFound,
leadsCreated: result.leadsCreated,
errors: result.errors,
}),
});
return result;
} catch (error) {
const errorSummary = messageFromError(error);
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "error",
message: "Lead-Recherche fehlgeschlagen.",
details: [{ label: "Fehler", value: errorSummary }],
});
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "failed",
currentStep: "lead_discovery",
errorSummary,
counters: buildLeadDiscoveryCounters({
leadsFound: 0,
leadsCreated: 0,
errors: 1,
}),
});
return null;
}
},
});
export const startCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const now = Date.now();
const run = await ctx.db.get(args.runId);
if (!run || !run.campaignId || run.status !== "pending") {
return null;
}
const activeRunning = await ctx.db
.query("agentRuns")
.withIndex("by_type_and_status", (q) =>
q.eq("type", "campaign").eq("status", "running"),
)
.take(1);
if (activeRunning.length > 0) {
await ctx.db.patch(args.runId, {
status: "canceled",
currentStep: "lead_discovery",
errorSummary: "Ein anderer Agentenlauf ist bereits aktiv.",
finishedAt: now,
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Lauf nicht gestartet, weil ein anderer Agentenlauf aktiv ist.",
createdAt: now,
});
return null;
}
const campaign = await ctx.db.get(run.campaignId);
if (!campaign) {
await ctx.db.patch(args.runId, {
status: "failed",
currentStep: "lead_discovery",
errorSummary: "Kampagne nicht gefunden.",
finishedAt: now,
updatedAt: now,
});
return null;
}
await ctx.db.patch(args.runId, {
status: "running",
currentStep: "lead_discovery",
startedAt: now,
updatedAt: now,
});
await ctx.db.patch(campaign._id, {
lastRunAt: now,
nextRunAt: calculateNextRunAt({
recurrence: campaign.recurrence,
status: campaign.status,
lastRunAt: now,
now,
}),
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche gestartet.",
details: [
{ label: "Kampagne", value: campaign.name },
{ label: "PLZ", value: campaign.postalCode },
],
createdAt: now,
});
return { runId: args.runId, campaign };
},
});
export const cacheCampaignGeocode = internalMutation({
args: {
campaignId: v.id("campaigns"),
latitude: v.number(),
longitude: v.number(),
geocodedAt: v.number(),
geocodingPlaceId: v.string(),
geocodingFormattedAddress: v.string(),
},
handler: async (ctx, args) => {
await ctx.db.patch(args.campaignId, {
latitude: args.latitude,
longitude: args.longitude,
geocodedAt: args.geocodedAt,
geocodingPlaceId: args.geocodingPlaceId,
geocodingFormattedAddress: args.geocodingFormattedAddress,
updatedAt: Date.now(),
});
},
});
export const appendRunEvent = internalMutation({
args: {
runId: v.id("agentRuns"),
level: v.union(v.literal("info"), v.literal("warning"), v.literal("error")),
message: v.string(),
details: v.optional(v.array(eventDetailValidator)),
},
handler: async (ctx, args) => {
await ctx.db.insert("agentRunEvents", {
...args,
createdAt: Date.now(),
});
},
});
export const persistDiscoveredLeads = internalMutation({
args: {
runId: v.id("agentRuns"),
campaignId: v.id("campaigns"),
maxNewLeads: v.number(),
niche: v.string(),
postalCode: v.string(),
candidates: v.array(candidateValidator),
},
handler: async (ctx, args) => {
const now = Date.now();
let leadsCreated = 0;
let skippedDuplicates = 0;
let skippedBlacklisted = 0;
let errors = 0;
for (const candidate of args.candidates) {
if (leadsCreated >= args.maxNewLeads) {
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Limit des Laufs erreicht.",
details: [{ label: "Limit", value: String(args.maxNewLeads) }],
createdAt: Date.now(),
});
break;
}
if (!candidate.businessName.trim()) {
errors += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Lead-Recherche-Ergebnis ohne Unternehmensname übersprungen.",
details: [{ label: "Place ID", value: candidate.placeId }],
createdAt: Date.now(),
});
continue;
}
const normalizedPlaceId = normalizeDomain(candidate.placeId);
const normalizedSourceBusinessId = normalizeDomain(
candidate.sourceBusinessId ?? candidate.placeId,
);
const normalizedDomain = normalizeDomain(candidate.websiteDomain);
const normalizedEmails = getCandidateEmailValues(candidate);
const normalizedPhone = normalizePhone(candidate.phone);
const normalizedCompanyName = normalizeText(candidate.businessName);
const normalizedAddress = normalizeText(candidate.address);
const duplicateByPlaceId = normalizedPlaceId
? await ctx.db
.query("leads")
.withIndex("by_normalizedGooglePlaceId", (q) =>
q.eq("normalizedGooglePlaceId", normalizedPlaceId),
)
.take(1)
: [];
const duplicateByDomain = normalizedDomain
? await ctx.db
.query("leads")
.withIndex("by_websiteDomain", (q) => q.eq("websiteDomain", normalizedDomain))
.take(1)
: [];
const duplicateBySourceBusinessId = normalizedSourceBusinessId
? await ctx.db
.query("leads")
.withIndex("by_normalizedSourceBusinessId", (q) =>
q.eq("normalizedSourceBusinessId", normalizedSourceBusinessId),
)
.take(1)
: [];
const duplicateByEmailRows = [];
for (const email of normalizedEmails) {
const rows = await ctx.db
.query("leads")
.withIndex("by_normalizedEmail", (q) => q.eq("normalizedEmail", email))
.take(1);
duplicateByEmailRows.push(...rows);
}
if (
duplicateByPlaceId.length > 0 ||
duplicateBySourceBusinessId.length > 0 ||
duplicateByDomain.length > 0 ||
duplicateByEmailRows.length > 0
) {
skippedDuplicates += 1;
const duplicateLeadForEmailBackfill =
duplicateBySourceBusinessId[0] ??
duplicateByPlaceId[0] ??
duplicateByDomain[0] ??
null;
const usableEmail = getUsableContactEmail(candidate);
if (
duplicateLeadForEmailBackfill &&
usableEmail &&
!duplicateLeadForEmailBackfill.email &&
duplicateLeadForEmailBackfill.contactStatus !== "do_not_contact" &&
duplicateLeadForEmailBackfill.blacklistStatus !== "blocked" &&
duplicateLeadForEmailBackfill.priority !== "blocked" &&
duplicateByEmailRows.length === 0
) {
const emailBackfillPatch: DuplicateEmailBackfillPatch = {
normalizedEmail: usableEmail.email,
email: usableEmail.email,
updatedAt: now,
};
if (usableEmail.emailSource !== null) {
emailBackfillPatch.emailSource = usableEmail.emailSource;
}
if (usableEmail.contactPerson !== null) {
emailBackfillPatch.contactPerson = usableEmail.contactPerson;
}
if (duplicateLeadForEmailBackfill.contactStatus === "missing_contact") {
emailBackfillPatch.contactStatus = "new";
emailBackfillPatch.contactStatusReason =
"E-Mail bei erneutem Local-Business-Data-Lauf ergänzt.";
}
await ctx.db.patch(
duplicateLeadForEmailBackfill._id,
emailBackfillPatch,
);
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "E-Mail für bestehenden Lead ergänzt.",
details: [
{
label: "Unternehmen",
value: duplicateLeadForEmailBackfill.companyName,
source: candidate.sourceProvider,
},
{
label: "E-Mail-Quelle",
value: usableEmail.emailSource ?? "local_business_data",
source: candidate.sourceProvider,
},
],
createdAt: Date.now(),
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Doppelter Lead übersprungen.",
details: [
{
label: "Unternehmen",
value: candidate.businessName,
source: candidate.sourceProvider,
},
{
label: "Source ID",
value: candidate.sourceBusinessId ?? candidate.placeId,
source: candidate.sourceProvider,
},
],
createdAt: Date.now(),
});
continue;
}
const probableDuplicateByPhone = normalizedPhone
? await ctx.db
.query("leads")
.withIndex("by_normalizedPhone", (q) =>
q.eq("normalizedPhone", normalizedPhone),
)
.take(1)
: [];
const probableDuplicateByAddress = normalizedCompanyName && normalizedAddress
? await ctx.db
.query("leads")
.withIndex("by_normalizedCompanyName_and_normalizedAddress", (q) =>
q
.eq("normalizedCompanyName", normalizedCompanyName)
.eq("normalizedAddress", normalizedAddress),
)
.take(1)
: [];
const probableDuplicateLead =
probableDuplicateByPhone[0] ?? probableDuplicateByAddress[0] ?? null;
const blacklistRows = [];
for (const lookup of getBlacklistLookupValues(candidate)) {
const rows = await ctx.db
.query("blacklistEntries")
.withIndex("by_type_and_normalizedValue", (q) =>
q
.eq("type", lookup.type)
.eq("normalizedValue", lookup.normalizedValue),
)
.take(1);
blacklistRows.push(...rows);
}
const blacklistMatches = getBlacklistMatches(candidate, blacklistRows);
if (blacklistMatches.length > 0) {
skippedBlacklisted += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Gesperrter Lead übersprungen.",
details: blacklistMatches.map((match) => ({
label: match.type,
value: match.value,
source: "blacklist",
})),
createdAt: Date.now(),
});
continue;
}
const lead = buildLeadDiscoveryLeadRecord({
campaignId: args.campaignId,
runId: args.runId,
niche: args.niche,
postalCode: args.postalCode,
candidate,
now,
});
const hasWebsite = Boolean(candidate.websiteUrl ?? candidate.websiteDomain);
const priorityResult = getLeadDiscoveryPriority({
isDuplicate: !!probableDuplicateLead,
hasWebsite,
hasWebsiteSignal: false,
});
const isDuplicateCandidate = !!probableDuplicateLead;
if (normalizedPlaceId) {
lead.normalizedGooglePlaceId = normalizedPlaceId;
}
if (normalizedSourceBusinessId) {
lead.normalizedSourceBusinessId = normalizedSourceBusinessId;
}
if (normalizedPhone !== "") {
lead.normalizedPhone = normalizedPhone;
}
if (normalizedCompanyName !== "") {
lead.normalizedCompanyName = normalizedCompanyName;
}
if (normalizedAddress !== "") {
lead.normalizedAddress = normalizedAddress;
}
lead.priority = priorityResult.priority;
lead.priorityReason = priorityResult.reason;
if (isDuplicateCandidate) {
lead.duplicateStatus = "possible_duplicate";
lead.duplicateReason = `Möglicher Dublettenkandidat zu Lead ${probableDuplicateLead._id}`;
lead.duplicateOfLeadId = probableDuplicateLead._id;
}
const leadId = await ctx.db.insert("leads", lead);
leadsCreated += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead aus Local Business Data gespeichert.",
details: [
{
label: "Unternehmen",
value: candidate.businessName,
source: candidate.sourceProvider,
},
{
label: "Source ID",
value: candidate.sourceBusinessId ?? candidate.placeId,
source: candidate.sourceProvider,
},
],
createdAt: Date.now(),
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche abgeschlossen.",
details: [
{ label: "Gefunden", value: String(args.candidates.length) },
{ label: "Gespeichert", value: String(leadsCreated) },
{ label: "Dubletten übersprungen", value: String(skippedDuplicates) },
{ label: "Sperrliste übersprungen", value: String(skippedBlacklisted) },
],
createdAt: Date.now(),
});
return {
leadsFound: args.candidates.length,
leadsCreated,
skippedDuplicates,
skippedBlacklisted,
errors,
};
},
});
export const finishCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
status: v.union(v.literal("succeeded"), v.literal("failed"), v.literal("canceled")),
currentStep: v.optional(v.string()),
errorSummary: v.optional(v.string()),
counters: v.object({
leadsFound: v.number(),
leadsCreated: v.number(),
auditsCreated: v.number(),
outreachPrepared: v.number(),
errors: v.number(),
}),
},
handler: async (ctx, args) => {
const patch: {
status: typeof args.status;
updatedAt: number;
finishedAt: number;
counters: typeof args.counters;
currentStep?: string;
errorSummary?: string;
} = {
status: args.status,
updatedAt: Date.now(),
finishedAt: Date.now(),
counters: args.counters,
};
if (args.currentStep !== undefined) {
patch.currentStep = args.currentStep;
}
if (args.errorSummary !== undefined) {
patch.errorSummary = args.errorSummary;
}
await ctx.db.patch(args.runId, patch);
},
});