feat: integrate google lead discovery

This commit is contained in:
2026-06-04 15:25:01 +02:00
parent 585c4eeb2a
commit 15d8bfeb66
10 changed files with 1696 additions and 22 deletions

View File

@@ -1,9 +1,10 @@
--- ---
id: TASK-6 id: TASK-6
title: Integrate Google Geocoding and Places lead discovery title: Integrate Google Geocoding and Places lead discovery
status: To Do status: Done
assignee: [] assignee: []
created_date: '2026-06-03 19:12' created_date: '2026-06-03 19:12'
updated_date: '2026-06-04 13:24'
labels: labels:
- mvp - mvp
- integrations - integrations
@@ -24,19 +25,35 @@ Connect the campaign runner to Google Geocoding and Google Places. The system ge
## Acceptance Criteria ## Acceptance Criteria
<!-- AC:BEGIN --> <!-- AC:BEGIN -->
- [ ] #1 German PLZ values are geocoded to coordinates and cached on the campaign or run - [x] #1 German PLZ values are geocoded to coordinates and cached on the campaign or run
- [ ] #2 Google Places searches use category mappings or custom niche text plus configured radius - [x] #2 Google Places searches use category mappings or custom niche text plus configured radius
- [ ] #3 Lead records store Place ID, business name, address, category, website, phone, rating metadata for internal use, and source timestamps where available - [x] #3 Lead records store Place ID, business name, address, category, website, phone, rating metadata for internal use, and source timestamps where available
- [ ] #4 Runs respect max new leads and never start if another agent run is already active - [x] #4 Runs respect max new leads and never start if another agent run is already active
- [ ] #5 API failures, empty results, skipped duplicates, and skipped blacklisted entities are visible in run logs - [x] #5 API failures, empty results, skipped duplicates, and skipped blacklisted entities are visible in run logs
<!-- AC:END --> <!-- AC:END -->
## Implementation Plan ## Implementation Plan
<!-- SECTION:PLAN:BEGIN --> <!-- SECTION:PLAN:BEGIN -->
1. Define category-to-Places-query mappings for the initial MVP categories. 1. Add failing helper tests for Google category/query mapping, response parsing, duplicate/blacklist decisions, and source metadata.
2. Add Google Geocoding integration with Germany-focused requests. 2. Implement pure lead discovery helpers with GOOGLE_GEOCODING_API_KEY and GOOGLE_PLACES_API_KEY contract.
3. Add Google Places search integration using stored campaign settings. 3. Add failing Convex/schema tests or type checks for campaign requestRun guard, scheduled processing, geocode caching, and lead source persistence.
4. Persist discovered leads with source metadata and run linkage. 4. Implement Convex leadDiscovery processing, run transitions, logging, limits, duplicate and blacklist skips.
5. Add run-level logging for success, empty, duplicate, blacklisted, and error cases. 5. Run pnpm test, pnpm exec tsc -p tsconfig.json, pnpm lint; review and fix findings.
<!-- SECTION:PLAN:END --> <!-- SECTION:PLAN:END -->
## Implementation Notes
<!-- SECTION:NOTES:BEGIN -->
Starting TASK-6 after TASK-5 completion. Adjusted plan: reuse campaigns.requestRun and existing campaign run status UI; use split GOOGLE_GEOCODING_API_KEY and GOOGLE_PLACES_API_KEY from .env.local; no outreach or audit creation in this task.
Implemented TASK-6 subagent-driven and test-driven. Worker 1 built pure Google discovery helpers with RED/GREEN tests. Spec reviewer requested website URL persistence; fixed with TDD mapper and re-review approved. Code-quality reviewer requested exact blacklist lookups and moving campaign timestamp updates to actual run start; fixed and re-review approved. Final verification: npx convex codegen passed; pnpm exec tsc -p tsconfig.json passed; pnpm test passed 51/51; pnpm lint passed with only existing generated BetterAuth warnings.
Bugfix after manual UI test: campaigns.requestRun previously treated any pending run as active forever, so old Task-5 pending runs blocked new lead discovery starts. Added TDD coverage for stale pending runs, a 10-minute pending grace period, and automatic cancellation/logging of stale pending runs before creating a new run. Verification: pnpm exec tsc -p tsconfig.json passed; pnpm test passed 52/52; pnpm lint passed with only existing generated BetterAuth warnings; npx convex codegen passed.
<!-- SECTION:NOTES:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
TASK-6 shipped Google Geocoding and Places lead discovery wired into the existing campaign run flow. It geocodes German PLZ values, caches coordinates, searches Places with preset mappings or custom text plus radius, stores Google source-backed lead metadata, respects per-run limits and active-run guards, logs failures/empty/duplicate/blacklist outcomes, and includes the stale-pending-run cleanup discovered during manual UI testing.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -13,6 +13,7 @@ import type * as blacklist from "../blacklist.js";
import type * as campaigns from "../campaigns.js"; import type * as campaigns from "../campaigns.js";
import type * as domain from "../domain.js"; import type * as domain from "../domain.js";
import type * as http from "../http.js"; import type * as http from "../http.js";
import type * as leadDiscovery from "../leadDiscovery.js";
import type * as leads from "../leads.js"; import type * as leads from "../leads.js";
import type * as outreach from "../outreach.js"; import type * as outreach from "../outreach.js";
import type * as runs from "../runs.js"; import type * as runs from "../runs.js";
@@ -31,6 +32,7 @@ declare const fullApi: ApiFromModules<{
campaigns: typeof campaigns; campaigns: typeof campaigns;
domain: typeof domain; domain: typeof domain;
http: typeof http; http: typeof http;
leadDiscovery: typeof leadDiscovery;
leads: typeof leads; leads: typeof leads;
outreach: typeof outreach; outreach: typeof outreach;
runs: typeof runs; runs: typeof runs;

View File

@@ -13,8 +13,10 @@ import {
validateCampaignCreateInput, validateCampaignCreateInput,
validateCampaignUpdateInput, validateCampaignUpdateInput,
} from "../lib/campaign-validation"; } from "../lib/campaign-validation";
import { canStartAgentRun, isStalePendingAgentRun } from "../lib/lead-discovery-run";
import { normalizeListLimit } from "./domain"; import { normalizeListLimit } from "./domain";
import { internal } from "./_generated/api";
import { Doc } from "./_generated/dataModel"; import { Doc } from "./_generated/dataModel";
import { mutation, query, QueryCtx } from "./_generated/server"; import { mutation, query, QueryCtx } from "./_generated/server";
@@ -284,6 +286,41 @@ export const requestRun = mutation({
throw new Error("Kampagne nicht gefunden."); throw new Error("Kampagne nicht gefunden.");
} }
const possiblyActiveRuns = [
...(await ctx.db
.query("agentRuns")
.withIndex("by_status", (q) => q.eq("status", "pending"))
.take(20)),
...(await ctx.db
.query("agentRuns")
.withIndex("by_status", (q) => q.eq("status", "running"))
.take(1)),
];
const stalePendingRuns = possiblyActiveRuns.filter((run) =>
isStalePendingAgentRun(run, now),
);
for (const staleRun of stalePendingRuns) {
await ctx.db.patch(staleRun._id, {
status: "canceled",
currentStep: "lead_discovery",
errorSummary: "Ausstehender Lauf wurde nach Timeout automatisch abgebrochen.",
finishedAt: now,
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: staleRun._id,
level: "warning",
message: "Ausstehender Lauf wurde nach Timeout automatisch abgebrochen.",
details: [{ label: "Alter Status", value: "pending" }],
createdAt: now,
});
}
if (!canStartAgentRun(possiblyActiveRuns, now)) {
throw new Error("Es läuft bereits ein Agentenlauf.");
}
const runId = await ctx.db.insert("agentRuns", { const runId = await ctx.db.insert("agentRuns", {
type: "campaign", type: "campaign",
campaignId: args.id, campaignId: args.id,
@@ -299,17 +336,8 @@ export const requestRun = mutation({
updatedAt: now, updatedAt: now,
}); });
const nextRunAt = calculateNextRunAt({ await ctx.scheduler.runAfter(0, internal.leadDiscovery.processCampaignRun, {
recurrence: campaign.recurrence, runId,
status: campaign.status,
lastRunAt: now,
now,
});
await ctx.db.patch(args.id, {
lastRunAt: now,
nextRunAt,
updatedAt: now,
}); });
return runId; return runId;

544
convex/leadDiscovery.ts Normal file
View File

@@ -0,0 +1,544 @@
import { v } from "convex/values";
import {
GOOGLE_PLACES_FIELD_MASK,
buildGeocodingUrl,
getBlacklistLookupValues,
getBlacklistMatches,
getPlacesSearchSpec,
normalizePlacesResponse,
parseGeocodingResponse,
} from "../lib/lead-discovery-google";
import {
buildLeadDiscoveryLeadRecord,
buildLeadDiscoveryCounters,
} from "../lib/lead-discovery-run";
import { calculateNextRunAt } from "../lib/campaign-scheduling";
import { internal } from "./_generated/api";
import { Doc, Id } from "./_generated/dataModel";
import { internalAction, internalMutation } from "./_generated/server";
type CampaignDoc = Doc<"campaigns">;
const nullableString = v.union(v.string(), v.null());
const nullableNumber = v.union(v.number(), v.null());
const candidateValidator = v.object({
placeId: v.string(),
businessName: v.string(),
address: v.string(),
websiteUrl: nullableString,
websiteDomain: nullableString,
phone: nullableString,
rating: nullableNumber,
userRatingCount: nullableNumber,
businessStatus: nullableString,
googleTypes: v.array(v.string()),
googlePrimaryType: nullableString,
googleMapsUrl: nullableString,
sourceProvider: v.literal("google_places"),
sourceFetchedAt: v.number(),
});
const eventDetailValidator = v.object({
label: v.string(),
value: v.string(),
source: v.optional(v.string()),
});
function getRequiredEnv(key: string) {
const value = process.env[key]?.trim();
if (!value) {
throw new Error(`${key} ist nicht gesetzt.`);
}
return value;
}
function messageFromError(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
function getCampaignNiche(campaign: CampaignDoc) {
if (
campaign.categoryMode === "custom" ||
campaign.category === "Anderes"
) {
return campaign.customSearchTerm?.trim() || campaign.category;
}
return campaign.category;
}
async function fetchJson(url: string, init?: RequestInit) {
const response = await fetch(url, init);
if (!response.ok) {
const body = await response.text();
throw new Error(
`Google API request failed with HTTP ${response.status}: ${body.slice(0, 500)}`,
);
}
return await response.json();
}
export const processCampaignRun = internalAction({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const started: {
campaign: CampaignDoc;
runId: Id<"agentRuns">;
} | null = await ctx.runMutation(internal.leadDiscovery.startCampaignRun, {
runId: args.runId,
});
if (!started) {
return null;
}
try {
const geocodingApiKey = getRequiredEnv("GOOGLE_GEOCODING_API_KEY");
const placesApiKey = getRequiredEnv("GOOGLE_PLACES_API_KEY");
const campaign = started.campaign;
const fetchedAt = Date.now();
let latitude = campaign.latitude;
let longitude = campaign.longitude;
if (typeof latitude !== "number" || typeof longitude !== "number") {
const geocodingUrl = buildGeocodingUrl({
postalCode: campaign.postalCode,
apiKey: geocodingApiKey,
});
const geocodingJson = await fetchJson(geocodingUrl);
const geocoding = parseGeocodingResponse(geocodingJson, fetchedAt);
latitude = geocoding.latitude;
longitude = geocoding.longitude;
await ctx.runMutation(internal.leadDiscovery.cacheCampaignGeocode, {
campaignId: campaign._id,
latitude,
longitude,
geocodedAt: geocoding.fetchedAt,
geocodingPlaceId: geocoding.placeId,
geocodingFormattedAddress: geocoding.formattedAddress,
});
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "PLZ geocodiert.",
details: [
{ label: "PLZ", value: campaign.postalCode, source: "google_geocoding" },
{
label: "Koordinaten",
value: `${latitude}, ${longitude}`,
source: "google_geocoding",
},
],
});
} else {
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "Geocoding-Cache der Kampagne verwendet.",
details: [
{ label: "PLZ", value: campaign.postalCode },
{ label: "Koordinaten", value: `${latitude}, ${longitude}` },
],
});
}
const searchSpec = getPlacesSearchSpec({
categoryMode: campaign.categoryMode,
category: campaign.category,
customSearchTerm: campaign.customSearchTerm,
postalCode: campaign.postalCode,
radiusKm: campaign.radiusKm,
latitude,
longitude,
});
const placesJson = await fetchJson(
`https://places.googleapis.com/v1/places:${searchSpec.endpoint}`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Goog-Api-Key": placesApiKey,
"X-Goog-FieldMask": GOOGLE_PLACES_FIELD_MASK,
},
body: JSON.stringify(searchSpec.body),
},
);
const candidates = normalizePlacesResponse(placesJson, Date.now());
if (candidates.length === 0) {
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "warning",
message: "Google Places lieferte keine Ergebnisse.",
details: [
{ label: "Suchtyp", value: searchSpec.searchType, source: "google_places" },
{ label: "Kategorie", value: getCampaignNiche(campaign), source: "google_places" },
],
});
}
const result: {
leadsFound: number;
leadsCreated: number;
skippedDuplicates: number;
skippedBlacklisted: number;
errors: number;
} = await ctx.runMutation(internal.leadDiscovery.persistDiscoveredLeads, {
runId: args.runId,
campaignId: campaign._id,
maxNewLeads: campaign.maxNewLeadsPerRun,
niche: getCampaignNiche(campaign),
postalCode: campaign.postalCode,
candidates,
});
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "succeeded",
currentStep: "lead_discovery",
counters: buildLeadDiscoveryCounters({
leadsFound: result.leadsFound,
leadsCreated: result.leadsCreated,
errors: result.errors,
}),
});
return result;
} catch (error) {
const errorSummary = messageFromError(error);
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "error",
message: "Lead-Recherche fehlgeschlagen.",
details: [{ label: "Fehler", value: errorSummary }],
});
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "failed",
currentStep: "lead_discovery",
errorSummary,
counters: buildLeadDiscoveryCounters({
leadsFound: 0,
leadsCreated: 0,
errors: 1,
}),
});
return null;
}
},
});
export const startCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
},
handler: async (ctx, args) => {
const now = Date.now();
const run = await ctx.db.get(args.runId);
if (!run || !run.campaignId || run.status !== "pending") {
return null;
}
const activeRunning = await ctx.db
.query("agentRuns")
.withIndex("by_status", (q) => q.eq("status", "running"))
.take(1);
if (activeRunning.length > 0) {
await ctx.db.patch(args.runId, {
status: "canceled",
currentStep: "lead_discovery",
errorSummary: "Ein anderer Agentenlauf ist bereits aktiv.",
finishedAt: now,
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Lauf nicht gestartet, weil ein anderer Agentenlauf aktiv ist.",
createdAt: now,
});
return null;
}
const campaign = await ctx.db.get(run.campaignId);
if (!campaign) {
await ctx.db.patch(args.runId, {
status: "failed",
currentStep: "lead_discovery",
errorSummary: "Kampagne nicht gefunden.",
finishedAt: now,
updatedAt: now,
});
return null;
}
await ctx.db.patch(args.runId, {
status: "running",
currentStep: "lead_discovery",
startedAt: now,
updatedAt: now,
});
await ctx.db.patch(campaign._id, {
lastRunAt: now,
nextRunAt: calculateNextRunAt({
recurrence: campaign.recurrence,
status: campaign.status,
lastRunAt: now,
now,
}),
updatedAt: now,
});
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche gestartet.",
details: [
{ label: "Kampagne", value: campaign.name },
{ label: "PLZ", value: campaign.postalCode },
],
createdAt: now,
});
return { runId: args.runId, campaign };
},
});
export const cacheCampaignGeocode = internalMutation({
args: {
campaignId: v.id("campaigns"),
latitude: v.number(),
longitude: v.number(),
geocodedAt: v.number(),
geocodingPlaceId: v.string(),
geocodingFormattedAddress: v.string(),
},
handler: async (ctx, args) => {
await ctx.db.patch(args.campaignId, {
latitude: args.latitude,
longitude: args.longitude,
geocodedAt: args.geocodedAt,
geocodingPlaceId: args.geocodingPlaceId,
geocodingFormattedAddress: args.geocodingFormattedAddress,
updatedAt: Date.now(),
});
},
});
export const appendRunEvent = internalMutation({
args: {
runId: v.id("agentRuns"),
level: v.union(v.literal("info"), v.literal("warning"), v.literal("error")),
message: v.string(),
details: v.optional(v.array(eventDetailValidator)),
},
handler: async (ctx, args) => {
await ctx.db.insert("agentRunEvents", {
...args,
createdAt: Date.now(),
});
},
});
export const persistDiscoveredLeads = internalMutation({
args: {
runId: v.id("agentRuns"),
campaignId: v.id("campaigns"),
maxNewLeads: v.number(),
niche: v.string(),
postalCode: v.string(),
candidates: v.array(candidateValidator),
},
handler: async (ctx, args) => {
const now = Date.now();
let leadsCreated = 0;
let skippedDuplicates = 0;
let skippedBlacklisted = 0;
let errors = 0;
for (const candidate of args.candidates) {
if (leadsCreated >= args.maxNewLeads) {
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Limit des Laufs erreicht.",
details: [{ label: "Limit", value: String(args.maxNewLeads) }],
createdAt: Date.now(),
});
break;
}
if (!candidate.businessName.trim()) {
errors += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Google-Places-Ergebnis ohne Unternehmensname übersprungen.",
details: [{ label: "Place ID", value: candidate.placeId }],
createdAt: Date.now(),
});
continue;
}
const existingByPlaceId = await ctx.db
.query("leads")
.withIndex("by_googlePlaceId", (q) =>
q.eq("googlePlaceId", candidate.placeId),
)
.take(1);
const candidateDomain = candidate.websiteDomain;
const existingByDomain = candidateDomain
? await ctx.db
.query("leads")
.withIndex("by_websiteDomain", (q) =>
q.eq("websiteDomain", candidateDomain),
)
.take(1)
: [];
if (existingByPlaceId.length > 0 || existingByDomain.length > 0) {
skippedDuplicates += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Doppelter Lead übersprungen.",
details: [
{ label: "Unternehmen", value: candidate.businessName, source: "google_places" },
{ label: "Place ID", value: candidate.placeId, source: "google_places" },
],
createdAt: Date.now(),
});
continue;
}
const blacklistRows = [];
for (const lookup of getBlacklistLookupValues(candidate)) {
const rows = await ctx.db
.query("blacklistEntries")
.withIndex("by_type_and_normalizedValue", (q) =>
q
.eq("type", lookup.type)
.eq("normalizedValue", lookup.normalizedValue),
)
.take(1);
blacklistRows.push(...rows);
}
const blacklistMatches = getBlacklistMatches(candidate, blacklistRows);
if (blacklistMatches.length > 0) {
skippedBlacklisted += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "warning",
message: "Gesperrter Lead übersprungen.",
details: blacklistMatches.map((match) => ({
label: match.type,
value: match.value,
source: "blacklist",
})),
createdAt: Date.now(),
});
continue;
}
const lead = buildLeadDiscoveryLeadRecord({
campaignId: args.campaignId,
runId: args.runId,
niche: args.niche,
postalCode: args.postalCode,
candidate,
now,
});
await ctx.db.insert("leads", lead);
leadsCreated += 1;
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead aus Google Places gespeichert.",
details: [
{ label: "Unternehmen", value: candidate.businessName, source: "google_places" },
{ label: "Place ID", value: candidate.placeId, source: "google_places" },
],
createdAt: Date.now(),
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
message: "Lead-Recherche abgeschlossen.",
details: [
{ label: "Gefunden", value: String(args.candidates.length) },
{ label: "Gespeichert", value: String(leadsCreated) },
{ label: "Dubletten übersprungen", value: String(skippedDuplicates) },
{ label: "Sperrliste übersprungen", value: String(skippedBlacklisted) },
],
createdAt: Date.now(),
});
return {
leadsFound: args.candidates.length,
leadsCreated,
skippedDuplicates,
skippedBlacklisted,
errors,
};
},
});
export const finishCampaignRun = internalMutation({
args: {
runId: v.id("agentRuns"),
status: v.union(v.literal("succeeded"), v.literal("failed"), v.literal("canceled")),
currentStep: v.optional(v.string()),
errorSummary: v.optional(v.string()),
counters: v.object({
leadsFound: v.number(),
leadsCreated: v.number(),
auditsCreated: v.number(),
outreachPrepared: v.number(),
errors: v.number(),
}),
},
handler: async (ctx, args) => {
const patch: {
status: typeof args.status;
updatedAt: number;
finishedAt: number;
counters: typeof args.counters;
currentStep?: string;
errorSummary?: string;
} = {
status: args.status,
updatedAt: Date.now(),
finishedAt: Date.now(),
counters: args.counters,
};
if (args.currentStep !== undefined) {
patch.currentStep = args.currentStep;
}
if (args.errorSummary !== undefined) {
patch.errorSummary = args.errorSummary;
}
await ctx.db.patch(args.runId, patch);
},
});

View File

@@ -6,6 +6,7 @@ import { mutation, query } from "./_generated/server";
export const create = mutation({ export const create = mutation({
args: { args: {
campaignId: v.optional(v.id("campaigns")), campaignId: v.optional(v.id("campaigns")),
discoveryRunId: v.optional(v.id("agentRuns")),
companyName: v.string(), companyName: v.string(),
niche: v.optional(v.string()), niche: v.optional(v.string()),
address: v.optional(v.string()), address: v.optional(v.string()),
@@ -13,6 +14,14 @@ export const create = mutation({
postalCode: v.optional(v.string()), postalCode: v.optional(v.string()),
googlePlaceId: v.optional(v.string()), googlePlaceId: v.optional(v.string()),
googleMapsUrl: v.optional(v.string()), googleMapsUrl: v.optional(v.string()),
googlePrimaryType: v.optional(v.string()),
googleTypes: v.optional(v.array(v.string())),
googleRating: v.optional(v.number()),
googleUserRatingCount: v.optional(v.number()),
googleBusinessStatus: v.optional(v.string()),
sourceProvider: v.optional(v.literal("google_places")),
sourceFetchedAt: v.optional(v.number()),
websiteUrl: v.optional(v.string()),
websiteDomain: v.optional(v.string()), websiteDomain: v.optional(v.string()),
phone: v.optional(v.string()), phone: v.optional(v.string()),
email: v.optional(v.string()), email: v.optional(v.string()),

View File

@@ -125,6 +125,9 @@ export default defineSchema({
region: v.optional(v.string()), region: v.optional(v.string()),
latitude: v.optional(v.number()), latitude: v.optional(v.number()),
longitude: v.optional(v.number()), longitude: v.optional(v.number()),
geocodedAt: v.optional(v.number()),
geocodingPlaceId: v.optional(v.string()),
geocodingFormattedAddress: v.optional(v.string()),
radiusKm: v.number(), radiusKm: v.number(),
maxNewLeadsPerRun: v.number(), maxNewLeadsPerRun: v.number(),
maxAuditsPerRun: v.number(), maxAuditsPerRun: v.number(),
@@ -148,6 +151,7 @@ export default defineSchema({
leads: defineTable({ leads: defineTable({
campaignId: v.optional(v.id("campaigns")), campaignId: v.optional(v.id("campaigns")),
discoveryRunId: v.optional(v.id("agentRuns")),
companyName: v.string(), companyName: v.string(),
niche: v.optional(v.string()), niche: v.optional(v.string()),
address: v.optional(v.string()), address: v.optional(v.string()),
@@ -155,6 +159,14 @@ export default defineSchema({
postalCode: v.optional(v.string()), postalCode: v.optional(v.string()),
googlePlaceId: v.optional(v.string()), googlePlaceId: v.optional(v.string()),
googleMapsUrl: v.optional(v.string()), googleMapsUrl: v.optional(v.string()),
googlePrimaryType: v.optional(v.string()),
googleTypes: v.optional(v.array(v.string())),
googleRating: v.optional(v.number()),
googleUserRatingCount: v.optional(v.number()),
googleBusinessStatus: v.optional(v.string()),
sourceProvider: v.optional(v.literal("google_places")),
sourceFetchedAt: v.optional(v.number()),
websiteUrl: v.optional(v.string()),
websiteDomain: v.optional(v.string()), websiteDomain: v.optional(v.string()),
phone: v.optional(v.string()), phone: v.optional(v.string()),
email: v.optional(v.string()), email: v.optional(v.string()),
@@ -169,6 +181,7 @@ export default defineSchema({
updatedAt: v.number(), updatedAt: v.number(),
}) })
.index("by_campaignId", ["campaignId"]) .index("by_campaignId", ["campaignId"])
.index("by_discoveryRunId", ["discoveryRunId"])
.index("by_contactStatus", ["contactStatus"]) .index("by_contactStatus", ["contactStatus"])
.index("by_googlePlaceId", ["googlePlaceId"]) .index("by_googlePlaceId", ["googlePlaceId"])
.index("by_websiteDomain", ["websiteDomain"]) .index("by_websiteDomain", ["websiteDomain"])

View File

@@ -0,0 +1,453 @@
export const GOOGLE_PLACES_FIELD_MASK =
"places.id,places.displayName,places.formattedAddress,places.websiteUri,places.nationalPhoneNumber,places.internationalPhoneNumber,places.rating,places.userRatingCount,places.businessStatus,places.types,places.primaryType,places.googleMapsUri";
type CampaignLike = {
categoryMode?: "preset" | "custom";
category?: string | null;
customSearchTerm?: string | null;
postalCode: string;
radiusKm: number;
latitude?: number;
longitude?: number;
};
type PlacesNearbyBody = {
includedTypes: string[];
maxResultCount: number;
locationRestriction: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
type PlacesTextBody = {
textQuery: string;
maxResultCount: number;
locationBias?: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
type PlacesSearchBody = PlacesNearbyBody | PlacesTextBody;
export type PlacesSearchSpec = {
searchType: "nearby" | "text";
endpoint: "searchNearby" | "searchText";
body: PlacesSearchBody;
};
const GOOGLE_PLACES_MAX_RESULTS = 20;
const PRESET_CATEGORY_TYPES: Record<string, string> = {
Anwalt: "lawyer",
Restaurant: "restaurant",
Café: "cafe",
Cafe: "cafe",
Friseur: "hair_salon",
Zahnarzt: "dentist",
Physiotherapie: "physiotherapist",
};
function ensureRadiusMeters(radiusKm: number) {
if (typeof radiusKm !== "number" || !Number.isFinite(radiusKm)) {
throw new Error("Radius must be a finite number.");
}
return Math.round(radiusKm * 1000);
}
function normalizeCustomSearchTerm(value?: string | null) {
return (value ?? "").trim();
}
export function getPlacesSearchSpec(campaignLike: CampaignLike): PlacesSearchSpec {
const category = normalizeCustomSearchTerm(campaignLike.category);
const isCustomSearch =
campaignLike.categoryMode === "custom" || category === "Anderes";
const isNearbyPreset =
campaignLike.categoryMode !== "custom" && category in PRESET_CATEGORY_TYPES;
if (isNearbyPreset) {
const latitude = campaignLike.latitude;
const longitude = campaignLike.longitude;
if (typeof latitude !== "number" || typeof longitude !== "number") {
throw new Error("Nearby places search requires latitude and longitude.");
}
return {
searchType: "nearby",
endpoint: "searchNearby",
body: {
includedTypes: [PRESET_CATEGORY_TYPES[category]!],
maxResultCount: GOOGLE_PLACES_MAX_RESULTS,
locationRestriction: {
circle: {
center: {
latitude,
longitude,
},
radius: ensureRadiusMeters(campaignLike.radiusKm),
},
},
},
};
}
const baseTerm = isCustomSearch
? normalizeCustomSearchTerm(campaignLike.customSearchTerm)
: category;
const locationQuerySuffix = campaignLike.postalCode
? `${campaignLike.postalCode} Deutschland`
: "Deutschland";
const fallbackTerm = baseTerm
? `${baseTerm} in ${locationQuerySuffix}`
: `Unternehmen in ${locationQuerySuffix}`;
const textBody: PlacesTextBody = {
textQuery: fallbackTerm,
maxResultCount: GOOGLE_PLACES_MAX_RESULTS,
};
if (
typeof campaignLike.latitude === "number" &&
typeof campaignLike.longitude === "number"
) {
textBody.locationBias = {
circle: {
center: {
latitude: campaignLike.latitude,
longitude: campaignLike.longitude,
},
radius: ensureRadiusMeters(campaignLike.radiusKm),
},
};
}
return {
searchType: "text",
endpoint: "searchText",
body: textBody,
};
}
type LegacyGeocodingResponse = {
status: string;
results?: Array<{
geometry?: {
location?: {
lat?: unknown;
lng?: unknown;
};
};
formatted_address?: string;
place_id?: string;
}>;
};
export type GeocodingCoordinates = {
latitude: number;
longitude: number;
formattedAddress: string;
placeId: string;
fetchedAt: number;
};
export function buildGeocodingUrl({
postalCode,
apiKey,
}: {
postalCode: string;
apiKey: string;
}): string {
const url = new URL("https://maps.googleapis.com/maps/api/geocode/json");
url.searchParams.set("address", `${postalCode}, Deutschland`);
url.searchParams.set("components", `country:DE|postal_code:${postalCode}`);
url.searchParams.set("language", "de");
url.searchParams.set("region", "de");
url.searchParams.set("key", apiKey);
return url.toString();
}
export function parseGeocodingResponse(
response: LegacyGeocodingResponse,
fetchedAt: number,
) {
if (!response || response.status !== "OK") {
throw new Error(`Geocoding failed with status "${response?.status ?? "unknown"}".`);
}
const firstResult = response.results?.[0];
if (!firstResult) {
throw new Error("Geocoding returned no results.");
}
const latitude = firstResult.geometry?.location?.lat;
const longitude = firstResult.geometry?.location?.lng;
const formattedAddress = firstResult.formatted_address;
const placeId = firstResult.place_id;
if (typeof latitude !== "number" || !Number.isFinite(latitude)) {
throw new Error("Geocoding result is missing latitude.");
}
if (typeof longitude !== "number" || !Number.isFinite(longitude)) {
throw new Error("Geocoding result is missing longitude.");
}
if (!formattedAddress) {
throw new Error("Geocoding result is missing formatted address.");
}
if (!placeId) {
throw new Error("Geocoding result is missing place id.");
}
return {
latitude,
longitude,
formattedAddress,
placeId,
fetchedAt,
};
}
type GooglePlaceDisplayName =
| string
| {
text?: string;
};
type GooglePlaceApiPlace = {
id?: string;
displayName?: GooglePlaceDisplayName;
formattedAddress?: string;
websiteUri?: string;
nationalPhoneNumber?: string;
internationalPhoneNumber?: string;
rating?: number;
userRatingCount?: number;
businessStatus?: string;
types?: string[];
primaryType?: string;
googleMapsUri?: string;
};
export type GooglePlacesApiResponse = {
places?: GooglePlaceApiPlace[] | null;
};
export type GooglePlaceCandidate = {
placeId: string;
businessName: string;
address: string;
websiteUrl: string | null;
websiteDomain: string | null;
phone: string | null;
rating: number | null;
userRatingCount: number | null;
businessStatus: string | null;
googleTypes: string[];
googlePrimaryType: string | null;
googleMapsUrl: string | null;
sourceProvider: "google_places";
sourceFetchedAt: number;
};
function normalizeDisplayName(value?: GooglePlaceDisplayName) {
if (typeof value === "string") {
return value.trim();
}
return value?.text?.trim() ?? "";
}
function normalizeNumber(value: unknown) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number.parseFloat(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function normalizeWebsiteDomain(input?: string | null) {
if (!input) {
return null;
}
try {
const url = new URL(input);
const host = url.hostname.toLowerCase();
return host.replace(/^www\./, "");
} catch {
return null;
}
}
export function normalizePlacesResponse(
response: GooglePlacesApiResponse,
fetchedAt: number,
): GooglePlaceCandidate[] {
const places = Array.isArray(response?.places) ? response.places : [];
return places.flatMap((place) => {
if (!place || !place.id) {
return [];
}
const websiteUrl = place.websiteUri?.trim() ?? null;
const candidate: GooglePlaceCandidate = {
placeId: place.id,
businessName: normalizeDisplayName(place.displayName),
address: place.formattedAddress?.trim() ?? "",
websiteUrl,
websiteDomain: normalizeWebsiteDomain(websiteUrl),
phone: place.nationalPhoneNumber ?? place.internationalPhoneNumber ?? null,
rating: normalizeNumber(place.rating),
userRatingCount: normalizeNumber(place.userRatingCount),
businessStatus: place.businessStatus?.trim() ?? null,
googleTypes: Array.isArray(place.types) ? place.types : [],
googlePrimaryType: place.primaryType?.trim() ?? null,
googleMapsUrl: place.googleMapsUri?.trim() ?? null,
sourceProvider: "google_places",
sourceFetchedAt: fetchedAt,
};
return [candidate];
});
}
export type ExistingLeadLike = {
googlePlaceId?: string | null;
websiteDomain?: string | null;
};
export type BlacklistRow = {
type: "domain" | "email" | "phone" | "company" | "google_place_id";
value: string;
normalizedValue: string;
};
export type BlacklistLookupValue = {
type: "domain" | "phone" | "company" | "google_place_id";
normalizedValue: string;
};
function normalizeDomain(value?: string | null) {
return value?.trim().toLowerCase().replace(/^www\./, "") ?? "";
}
function normalizePhone(value?: string | null) {
if (!value) {
return "";
}
return value.replace(/\D+/g, "");
}
function uniqueLookupValues(values: BlacklistLookupValue[]) {
const seen = new Set<string>();
return values.filter((value) => {
const key = `${value.type}:${value.normalizedValue}`;
if (!value.normalizedValue || seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
}
export function getBlacklistLookupValues(
candidate: GooglePlaceCandidate,
): BlacklistLookupValue[] {
return uniqueLookupValues([
{
type: "google_place_id",
normalizedValue: normalizeDomain(candidate.placeId),
},
{
type: "domain",
normalizedValue: normalizeDomain(candidate.websiteDomain),
},
{
type: "company",
normalizedValue: normalizeDomain(candidate.businessName),
},
{
type: "phone",
normalizedValue: normalizePhone(candidate.phone),
},
{
type: "phone",
normalizedValue: normalizeDomain(candidate.phone),
},
]);
}
export function isDuplicateCandidate(
candidate: GooglePlaceCandidate,
existing: ExistingLeadLike[],
): boolean {
const candidatePlaceId = normalizeDomain(candidate.placeId);
const candidateDomain = normalizeDomain(candidate.websiteDomain);
return existing.some((entry) => {
const entryPlaceId = normalizeDomain(entry.googlePlaceId);
const entryDomain = normalizeDomain(entry.websiteDomain);
return (
(candidatePlaceId && entryPlaceId === candidatePlaceId) ||
(candidateDomain && entryDomain === candidateDomain)
);
});
}
export function getBlacklistMatches(
candidate: GooglePlaceCandidate,
blacklistRows: BlacklistRow[],
) {
const candidatePlaceId = normalizeDomain(candidate.placeId);
const candidateDomain = normalizeDomain(candidate.websiteDomain);
const candidateCompany = normalizeDomain(candidate.businessName);
const candidatePhone = normalizePhone(candidate.phone);
return blacklistRows.filter((row) => {
if (!row.normalizedValue) {
return false;
}
switch (row.type) {
case "google_place_id":
return candidatePlaceId !== "" && row.normalizedValue === candidatePlaceId;
case "domain":
return candidateDomain !== "" && row.normalizedValue === candidateDomain;
case "company":
return (
candidateCompany !== "" && row.normalizedValue === candidateCompany
);
case "phone":
return (
candidatePhone !== "" &&
(row.normalizedValue === candidatePhone ||
normalizePhone(row.value) === candidatePhone)
);
default:
return false;
}
});
}

166
lib/lead-discovery-run.ts Normal file
View File

@@ -0,0 +1,166 @@
import type { GooglePlaceCandidate } from "./lead-discovery-google";
type AgentRunLike = {
status: string;
updatedAt?: number;
};
type LeadDiscoveryCounterInput = {
leadsFound: number;
leadsCreated: number;
errors: number;
};
type LeadDiscoveryContactInput = {
websiteDomain?: string | null;
phone?: string | null;
};
type LeadDiscoveryLeadRecordInput<TCampaignId extends string, TRunId extends string> = {
campaignId: TCampaignId;
runId: TRunId;
niche: string;
postalCode: string;
candidate: GooglePlaceCandidate;
now: number;
};
function optionalString(value: string | null) {
return value && value.trim().length > 0 ? value : undefined;
}
function optionalNumber(value: number | null) {
return typeof value === "number" && Number.isFinite(value)
? value
: undefined;
}
export const PENDING_AGENT_RUN_GRACE_MS = 10 * 60 * 1000;
export function isStalePendingAgentRun(run: AgentRunLike, now: number) {
const updatedAt = typeof run.updatedAt === "number" ? run.updatedAt : 0;
return (
run.status === "pending" &&
updatedAt > 0 &&
now - updatedAt > PENDING_AGENT_RUN_GRACE_MS
);
}
export function canStartAgentRun(runs: AgentRunLike[], now = Date.now()) {
return !runs.some((run) => {
if (run.status === "running") {
return true;
}
return run.status === "pending" && !isStalePendingAgentRun(run, now);
});
}
export function buildLeadDiscoveryCounters(input: LeadDiscoveryCounterInput) {
return {
leadsFound: input.leadsFound,
leadsCreated: input.leadsCreated,
auditsCreated: 0,
outreachPrepared: 0,
errors: input.errors,
};
}
export function getLeadDiscoveryContactStatus(
input: LeadDiscoveryContactInput,
) {
if (input.websiteDomain || input.phone) {
return "new";
}
return "missing_contact";
}
export function buildLeadDiscoveryLeadRecord<
TCampaignId extends string,
TRunId extends string,
>(input: LeadDiscoveryLeadRecordInput<TCampaignId, TRunId>) {
const lead: {
campaignId: TCampaignId;
discoveryRunId: TRunId;
companyName: string;
niche: string;
address: string;
postalCode: string;
googlePlaceId: string;
googleMapsUrl?: string;
googlePrimaryType?: string;
googleTypes: string[];
googleRating?: number;
googleUserRatingCount?: number;
googleBusinessStatus?: string;
sourceProvider: "google_places";
sourceFetchedAt: number;
websiteUrl?: string;
websiteDomain?: string;
phone?: string;
priority: "medium";
contactStatus: "new" | "missing_contact";
duplicateStatus: "unique";
blacklistStatus: "clear";
createdAt: number;
updatedAt: number;
} = {
campaignId: input.campaignId,
discoveryRunId: input.runId,
companyName: input.candidate.businessName,
niche: input.niche,
address: input.candidate.address,
postalCode: input.postalCode,
googlePlaceId: input.candidate.placeId,
googleTypes: input.candidate.googleTypes,
sourceProvider: input.candidate.sourceProvider,
sourceFetchedAt: input.candidate.sourceFetchedAt,
priority: "medium",
contactStatus: getLeadDiscoveryContactStatus({
websiteDomain: input.candidate.websiteDomain,
phone: input.candidate.phone,
}),
duplicateStatus: "unique",
blacklistStatus: "clear",
createdAt: input.now,
updatedAt: input.now,
};
const googleMapsUrl = optionalString(input.candidate.googleMapsUrl);
const googlePrimaryType = optionalString(input.candidate.googlePrimaryType);
const googleRating = optionalNumber(input.candidate.rating);
const googleUserRatingCount = optionalNumber(input.candidate.userRatingCount);
const googleBusinessStatus = optionalString(input.candidate.businessStatus);
const websiteUrl = optionalString(input.candidate.websiteUrl);
const websiteDomain = optionalString(input.candidate.websiteDomain);
const phone = optionalString(input.candidate.phone);
if (googleMapsUrl !== undefined) {
lead.googleMapsUrl = googleMapsUrl;
}
if (googlePrimaryType !== undefined) {
lead.googlePrimaryType = googlePrimaryType;
}
if (googleRating !== undefined) {
lead.googleRating = googleRating;
}
if (googleUserRatingCount !== undefined) {
lead.googleUserRatingCount = googleUserRatingCount;
}
if (googleBusinessStatus !== undefined) {
lead.googleBusinessStatus = googleBusinessStatus;
}
if (websiteUrl !== undefined) {
lead.websiteUrl = websiteUrl;
}
if (websiteDomain !== undefined) {
lead.websiteDomain = websiteDomain;
}
if (phone !== undefined) {
lead.phone = phone;
}
return lead;
}

View File

@@ -0,0 +1,334 @@
import assert from "node:assert/strict";
import test from "node:test";
import {
GOOGLE_PLACES_FIELD_MASK,
buildGeocodingUrl,
getBlacklistMatches,
getBlacklistLookupValues,
getPlacesSearchSpec,
isDuplicateCandidate,
normalizePlacesResponse,
parseGeocodingResponse,
} from "../lib/lead-discovery-google";
test("places search spec maps known presets to nearby search and converts radius to meters", () => {
const nearbySpec = getPlacesSearchSpec({
categoryMode: "preset",
category: "Anwalt",
postalCode: "10115",
latitude: 52.52,
longitude: 13.405,
radiusKm: 12,
});
assert.equal(nearbySpec.searchType, "nearby");
assert.equal(nearbySpec.endpoint, "searchNearby");
if (nearbySpec.searchType !== "nearby" || nearbySpec.endpoint !== "searchNearby") {
throw new Error("Expected nearby search spec for preset category.");
}
const nearbyBody = nearbySpec.body as {
includedTypes: string[];
locationRestriction: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
assert.deepEqual(nearbyBody.includedTypes, ["lawyer"]);
assert.equal(
nearbyBody.locationRestriction.circle.radius,
12_000,
);
assert.equal(nearbyBody.locationRestriction.circle.center.latitude, 52.52);
assert.equal(nearbyBody.locationRestriction.circle.center.longitude, 13.405);
});
test("places search spec uses text search for custom/Anderes and includes query context", () => {
const customSpec = getPlacesSearchSpec({
categoryMode: "custom",
category: "Anderes",
customSearchTerm: "Barber Shop für Hunde",
postalCode: "80331",
latitude: 48.137,
longitude: 11.575,
radiusKm: 5,
});
assert.equal(customSpec.searchType, "text");
assert.equal(customSpec.endpoint, "searchText");
if (customSpec.searchType !== "text" || customSpec.endpoint !== "searchText") {
throw new Error("Expected text search spec for custom/Anderes.");
}
const customBody = customSpec.body as {
textQuery: string;
locationBias?: {
circle: {
center: { latitude: number; longitude: number };
radius: number;
};
};
};
assert.equal(
customBody.textQuery,
"Barber Shop für Hunde in 80331 Deutschland",
);
assert.deepEqual(customBody.locationBias, {
circle: {
center: { latitude: 48.137, longitude: 11.575 },
radius: 5_000,
},
});
const handwerkSpec = getPlacesSearchSpec({
categoryMode: "preset",
category: "Handwerk",
customSearchTerm: "ignored",
postalCode: "80331",
radiusKm: 5,
});
assert.equal(handwerkSpec.searchType, "text");
assert.equal(handwerkSpec.endpoint, "searchText");
if (handwerkSpec.searchType !== "text" || handwerkSpec.endpoint !== "searchText") {
throw new Error("Expected text search spec for unmapped preset category.");
}
const handwerkBody = handwerkSpec.body as { textQuery: string };
assert.equal(handwerkBody.textQuery, "Handwerk in 80331 Deutschland");
});
test("geocoding URL includes API key, DE region, and components filter", () => {
const url = new URL(
buildGeocodingUrl({ postalCode: "40210", apiKey: "geocode-key-123" }),
);
assert.equal(
url.origin + url.pathname,
"https://maps.googleapis.com/maps/api/geocode/json",
);
assert.equal(url.searchParams.get("address"), "40210, Deutschland");
assert.equal(url.searchParams.get("components"), "country:DE|postal_code:40210");
assert.equal(url.searchParams.get("language"), "de");
assert.equal(url.searchParams.get("region"), "de");
assert.equal(url.searchParams.get("key"), "geocode-key-123");
});
test("geocoding parser extracts location from OK response and rejects ZERO_RESULTS", () => {
const ok = parseGeocodingResponse(
{
status: "OK",
results: [
{
formatted_address: "Berlin, 10115 Berlin, Deutschland",
place_id: "place-id-1",
geometry: {
location: {
lat: 52.5170365,
lng: 13.3888599,
},
},
},
],
},
1717480000000,
);
assert.equal(ok.latitude, 52.5170365);
assert.equal(ok.longitude, 13.3888599);
assert.equal(ok.formattedAddress, "Berlin, 10115 Berlin, Deutschland");
assert.equal(ok.placeId, "place-id-1");
assert.equal(ok.fetchedAt, 1717480000000);
assert.throws(
() =>
parseGeocodingResponse(
{ status: "ZERO_RESULTS", results: [] },
1717480000123,
),
(error: unknown) =>
error instanceof Error &&
/ZERO_RESULTS|no geocoding results/i.test(error.message),
);
});
test("places normalization maps source metadata and normalizes website domain", () => {
const normalized = normalizePlacesResponse(
{
places: [
{
id: "place-1",
displayName: { text: "Beispiel Café" },
formattedAddress: "Hauptstraße 1, 60311 Frankfurt am Main, Deutschland",
websiteUri: "https://www.Example.De/some-path",
nationalPhoneNumber: "+49 30 123456",
internationalPhoneNumber: "+49 49 654321",
rating: 4.6,
userRatingCount: 42,
businessStatus: "OPERATIONAL",
types: ["restaurant", "cafe"],
primaryType: "restaurant",
googleMapsUri: "https://maps.google.com/place-id-1",
},
],
},
1717480001000,
);
assert.equal(normalized.length, 1);
assert.deepEqual(normalized[0], {
placeId: "place-1",
businessName: "Beispiel Café",
address: "Hauptstraße 1, 60311 Frankfurt am Main, Deutschland",
websiteUrl: "https://www.Example.De/some-path",
websiteDomain: "example.de",
phone: "+49 30 123456",
rating: 4.6,
userRatingCount: 42,
businessStatus: "OPERATIONAL",
googleTypes: ["restaurant", "cafe"],
googlePrimaryType: "restaurant",
googleMapsUrl: "https://maps.google.com/place-id-1",
sourceProvider: "google_places",
sourceFetchedAt: 1717480001000,
});
assert.equal(
GOOGLE_PLACES_FIELD_MASK,
"places.id,places.displayName,places.formattedAddress,places.websiteUri,places.nationalPhoneNumber,places.internationalPhoneNumber,places.rating,places.userRatingCount,places.businessStatus,places.types,places.primaryType,places.googleMapsUri",
);
});
test("duplicate detection uses placeId and websiteDomain", () => {
const existingLeads = [
{ googlePlaceId: "dup-1", websiteDomain: "other.de" },
{ googlePlaceId: "other-2", websiteDomain: "example.de" },
];
assert.equal(
isDuplicateCandidate(
{
placeId: "dup-1",
businessName: "Test",
address: "A",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.example.de",
websiteDomain: "example.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.new.de",
websiteDomain: "new.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
false,
);
});
test("blacklist matches include google_place_id, domain, company and phone", () => {
const candidate = {
placeId: "place-blacklisted",
businessName: "Muster GmbH",
address: "A",
websiteUrl: "https://www.Blocked.de",
websiteDomain: "blocked.de",
phone: "+49 30 555 123",
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places" as const,
sourceFetchedAt: 0,
};
assert.deepEqual(getBlacklistLookupValues(candidate), [
{ type: "google_place_id", normalizedValue: "place-blacklisted" },
{ type: "domain", normalizedValue: "blocked.de" },
{ type: "company", normalizedValue: "muster gmbh" },
{ type: "phone", normalizedValue: "4930555123" },
{ type: "phone", normalizedValue: "+49 30 555 123" },
]);
const matches = getBlacklistMatches(
candidate,
[
{
type: "google_place_id",
value: "place-blacklisted",
normalizedValue: "place-blacklisted",
},
{ type: "domain", value: "blocked.de", normalizedValue: "blocked.de" },
{ type: "company", value: "Muster GmbH", normalizedValue: "muster gmbh" },
{ type: "phone", value: "+49 30 555 123", normalizedValue: "4930555123" },
{
type: "phone",
value: "+49 30 555 123",
normalizedValue: "+49 30 555 123",
},
{ type: "email", value: "x@example.de", normalizedValue: "x@example.de" },
{ type: "phone", value: "+49 30 999 999", normalizedValue: "4930999999" },
],
);
const matchTypes = matches.map((match) => match.type).sort();
assert.deepEqual(
matchTypes,
["company", "domain", "google_place_id", "phone", "phone"].sort(),
);
});

View File

@@ -0,0 +1,108 @@
import assert from "node:assert/strict";
import test from "node:test";
import {
buildLeadDiscoveryLeadRecord,
buildLeadDiscoveryCounters,
canStartAgentRun,
isStalePendingAgentRun,
getLeadDiscoveryContactStatus,
} from "../lib/lead-discovery-run";
test("agent run guard ignores stale pending runs but blocks active runs", () => {
const now = Date.UTC(2026, 5, 4, 13, 20, 0);
assert.equal(canStartAgentRun([{ status: "succeeded" }], now), true);
assert.equal(
canStartAgentRun([{ status: "pending", updatedAt: now - 20 * 60 * 1000 }], now),
true,
);
assert.equal(
canStartAgentRun([{ status: "pending", updatedAt: now - 30_000 }], now),
false,
);
assert.equal(
canStartAgentRun([{ status: "failed" }, { status: "running" }], now),
false,
);
});
test("stale pending runs are older than the discovery startup grace period", () => {
const now = Date.UTC(2026, 5, 4, 13, 20, 0);
assert.equal(
isStalePendingAgentRun({ status: "pending", updatedAt: now - 11 * 60 * 1000 }, now),
true,
);
assert.equal(
isStalePendingAgentRun({ status: "pending", updatedAt: now - 9 * 60 * 1000 }, now),
false,
);
assert.equal(
isStalePendingAgentRun({ status: "running", updatedAt: now - 60 * 60 * 1000 }, now),
false,
);
});
test("lead discovery counters preserve audit and outreach counters", () => {
const counters = buildLeadDiscoveryCounters({
leadsFound: 12,
leadsCreated: 3,
errors: 1,
});
assert.deepEqual(counters, {
leadsFound: 12,
leadsCreated: 3,
auditsCreated: 0,
outreachPrepared: 0,
errors: 1,
});
});
test("lead discovery contact status separates leads without any contact route", () => {
assert.equal(
getLeadDiscoveryContactStatus({ websiteDomain: null, phone: null }),
"missing_contact",
);
assert.equal(
getLeadDiscoveryContactStatus({ websiteDomain: "example.de", phone: null }),
"new",
);
assert.equal(
getLeadDiscoveryContactStatus({ websiteDomain: null, phone: "030 123" }),
"new",
);
});
test("lead discovery lead record keeps raw website url and normalized domain", () => {
const record = buildLeadDiscoveryLeadRecord({
campaignId: "campaign-1",
runId: "run-1",
niche: "Restaurant",
postalCode: "10115",
now: 1717480000000,
candidate: {
placeId: "place-1",
businessName: "Beispiel GmbH",
address: "Hauptstraße 1",
websiteUrl: "https://www.example.de/path",
websiteDomain: "example.de",
phone: "+49 30 123",
rating: 4.5,
userRatingCount: 12,
businessStatus: "OPERATIONAL",
googleTypes: ["restaurant"],
googlePrimaryType: "restaurant",
googleMapsUrl: "https://maps.google.com/place-1",
sourceProvider: "google_places",
sourceFetchedAt: 1717480001000,
},
});
assert.equal(record.websiteUrl, "https://www.example.de/path");
assert.equal(record.websiteDomain, "example.de");
assert.equal(record.googleRating, 4.5);
assert.equal(record.googleUserRatingCount, 12);
assert.equal(record.sourceFetchedAt, 1717480001000);
});