Files
pitchfast/lib/lead-discovery-google.ts

696 lines
18 KiB
TypeScript

export const GOOGLE_PLACES_FIELD_MASK =
"places.id,places.displayName,places.formattedAddress,places.websiteUri,places.nationalPhoneNumber,places.internationalPhoneNumber,places.rating,places.userRatingCount,places.businessStatus,places.types,places.primaryType,places.googleMapsUri";
type CampaignLike = {
categoryMode?: "preset" | "custom";
category?: string | null;
customSearchTerm?: string | null;
postalCode: string;
radiusKm: number;
latitude?: number;
longitude?: number;
};
type PlacesNearbyBody = {
includedTypes: string[];
maxResultCount: number;
locationRestriction: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
type PlacesTextBody = {
textQuery: string;
maxResultCount: number;
locationBias?: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
type PlacesSearchBody = PlacesNearbyBody | PlacesTextBody;
export type PlacesSearchSpec = {
searchType: "nearby" | "text";
endpoint: "searchNearby" | "searchText";
body: PlacesSearchBody;
};
const GOOGLE_PLACES_MAX_RESULTS = 20;
const PRESET_CATEGORY_TYPES: Record<string, string> = {
Anwalt: "lawyer",
Restaurant: "restaurant",
Café: "cafe",
Cafe: "cafe",
Friseur: "hair_salon",
Zahnarzt: "dentist",
Physiotherapie: "physiotherapist",
};
function ensureRadiusMeters(radiusKm: number) {
if (typeof radiusKm !== "number" || !Number.isFinite(radiusKm)) {
throw new Error("Radius must be a finite number.");
}
return Math.round(radiusKm * 1000);
}
function normalizeCustomSearchTerm(value?: string | null) {
return (value ?? "").trim();
}
export function getPlacesSearchSpec(campaignLike: CampaignLike): PlacesSearchSpec {
const category = normalizeCustomSearchTerm(campaignLike.category);
const isCustomSearch =
campaignLike.categoryMode === "custom" || category === "Anderes";
const isNearbyPreset =
campaignLike.categoryMode !== "custom" && category in PRESET_CATEGORY_TYPES;
if (isNearbyPreset) {
const latitude = campaignLike.latitude;
const longitude = campaignLike.longitude;
if (typeof latitude !== "number" || typeof longitude !== "number") {
throw new Error("Nearby places search requires latitude and longitude.");
}
return {
searchType: "nearby",
endpoint: "searchNearby",
body: {
includedTypes: [PRESET_CATEGORY_TYPES[category]!],
maxResultCount: GOOGLE_PLACES_MAX_RESULTS,
locationRestriction: {
circle: {
center: {
latitude,
longitude,
},
radius: ensureRadiusMeters(campaignLike.radiusKm),
},
},
},
};
}
const baseTerm = isCustomSearch
? normalizeCustomSearchTerm(campaignLike.customSearchTerm)
: category;
const locationQuerySuffix = campaignLike.postalCode
? `${campaignLike.postalCode} Deutschland`
: "Deutschland";
const fallbackTerm = baseTerm
? `${baseTerm} in ${locationQuerySuffix}`
: `Unternehmen in ${locationQuerySuffix}`;
const textBody: PlacesTextBody = {
textQuery: fallbackTerm,
maxResultCount: GOOGLE_PLACES_MAX_RESULTS,
};
if (
typeof campaignLike.latitude === "number" &&
typeof campaignLike.longitude === "number"
) {
textBody.locationBias = {
circle: {
center: {
latitude: campaignLike.latitude,
longitude: campaignLike.longitude,
},
radius: ensureRadiusMeters(campaignLike.radiusKm),
},
};
}
return {
searchType: "text",
endpoint: "searchText",
body: textBody,
};
}
type LegacyGeocodingResponse = {
status: string;
results?: Array<{
geometry?: {
location?: {
lat?: unknown;
lng?: unknown;
};
};
formatted_address?: string;
place_id?: string;
}>;
};
export type GeocodingCoordinates = {
latitude: number;
longitude: number;
formattedAddress: string;
placeId: string;
fetchedAt: number;
};
export function buildGeocodingUrl({
postalCode,
apiKey,
}: {
postalCode: string;
apiKey: string;
}): string {
const url = new URL("https://maps.googleapis.com/maps/api/geocode/json");
url.searchParams.set("address", `${postalCode}, Deutschland`);
url.searchParams.set("components", `country:DE|postal_code:${postalCode}`);
url.searchParams.set("language", "de");
url.searchParams.set("region", "de");
url.searchParams.set("key", apiKey);
return url.toString();
}
export function parseGeocodingResponse(
response: LegacyGeocodingResponse,
fetchedAt: number,
) {
if (!response || response.status !== "OK") {
throw new Error(`Geocoding failed with status "${response?.status ?? "unknown"}".`);
}
const firstResult = response.results?.[0];
if (!firstResult) {
throw new Error("Geocoding returned no results.");
}
const latitude = firstResult.geometry?.location?.lat;
const longitude = firstResult.geometry?.location?.lng;
const formattedAddress = firstResult.formatted_address;
const placeId = firstResult.place_id;
if (typeof latitude !== "number" || !Number.isFinite(latitude)) {
throw new Error("Geocoding result is missing latitude.");
}
if (typeof longitude !== "number" || !Number.isFinite(longitude)) {
throw new Error("Geocoding result is missing longitude.");
}
if (!formattedAddress) {
throw new Error("Geocoding result is missing formatted address.");
}
if (!placeId) {
throw new Error("Geocoding result is missing place id.");
}
return {
latitude,
longitude,
formattedAddress,
placeId,
fetchedAt,
};
}
type GooglePlaceDisplayName =
| string
| {
text?: string;
};
type GooglePlaceContactEmailSource = {
email: string;
emailSource?: string | null;
contactPerson?: string | null;
isBusinessContactAddress?: boolean;
};
export type LeadDiscoverySourceProvider = "google_places" | "local_business_data";
type GooglePlaceApiPlace = {
id?: string;
displayName?: GooglePlaceDisplayName;
formattedAddress?: string;
websiteUri?: string;
nationalPhoneNumber?: string;
internationalPhoneNumber?: string;
rating?: number;
userRatingCount?: number;
businessStatus?: string;
types?: string[];
primaryType?: string;
googleMapsUri?: string;
};
export type GooglePlacesApiResponse = {
places?: GooglePlaceApiPlace[] | null;
};
export type GooglePlaceCandidate = {
placeId: string;
sourceBusinessId?: string | null;
businessName: string;
address: string;
websiteUrl: string | null;
websiteDomain: string | null;
phone: string | null;
email?: string | null;
emailSource?: string | null;
contactPerson?: string | null;
isBusinessContactAddress?: boolean;
contactEmails?: GooglePlaceContactEmailSource[];
rating: number | null;
userRatingCount: number | null;
businessStatus: string | null;
googleTypes: string[];
googlePrimaryType: string | null;
googleMapsUrl: string | null;
sourceProvider: LeadDiscoverySourceProvider;
sourceFetchedAt: number;
};
function normalizeDisplayName(value?: GooglePlaceDisplayName) {
if (typeof value === "string") {
return value.trim();
}
return value?.text?.trim() ?? "";
}
function normalizeNumber(value: unknown) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number.parseFloat(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function normalizeWebsiteDomain(input?: string | null) {
if (!input) {
return null;
}
try {
const url = new URL(input);
const host = url.hostname.toLowerCase();
return host.replace(/^www\./, "");
} catch {
return null;
}
}
const GENERIC_BUSINESS_EMAIL_LOCAL_PARTS = new Set([
"info",
"kontakt",
"hello",
"hallo",
"office",
"post",
"service",
"team",
"anfrage",
]);
export function normalizeText(value?: string | null) {
return value?.trim().toLowerCase().replace(/\s+/g, " ") ?? "";
}
export function normalizeEmailAddress(value?: string | null) {
const valueTrimmed = value?.trim().toLowerCase();
if (!valueTrimmed) {
return null;
}
const [localPart, domain] = valueTrimmed.split("@");
if (!localPart || !domain) {
return null;
}
if (!/^[a-z0-9._%+-]+$/.test(localPart)) {
return null;
}
if (!/^[^\s@]+\.[^\s@]+$/.test(domain)) {
return null;
}
return valueTrimmed;
}
export type UsableContactEmail = {
email: string;
emailSource: string | null;
contactPerson: string | null;
};
type ParsedContactEmail = {
email: string;
emailSource: string | null;
contactPerson: string | null;
isBusinessContactAddress: boolean;
isGeneric: boolean;
};
type ContactEmailRuleInput = {
email: string;
emailSource?: string | null;
contactPerson?: string | null;
isBusinessContactAddress?: boolean;
};
export function getUsableContactEmailFromEntries(
entries: ContactEmailRuleInput[] | undefined,
) {
if (!Array.isArray(entries) || entries.length === 0) {
return null;
}
const parsedEntries: ParsedContactEmail[] = [];
for (const emailEntry of entries) {
const normalized = normalizeEmailAddress(emailEntry.email);
if (!normalized) {
continue;
}
parsedEntries.push({
email: normalized,
emailSource: emailEntry.emailSource ?? null,
contactPerson: emailEntry.contactPerson ?? null,
isBusinessContactAddress: emailEntry.isBusinessContactAddress === true,
isGeneric: isGenericBusinessEmail(normalized),
});
}
const generic = parsedEntries.find((entry) => entry.isGeneric);
if (generic) {
return {
email: generic.email,
emailSource: generic.emailSource,
contactPerson: generic.contactPerson,
};
}
const named = parsedEntries.find((entry) => entry.isBusinessContactAddress);
if (!named) {
return null;
}
return {
email: named.email,
emailSource: named.emailSource,
contactPerson: named.contactPerson,
};
}
function getCandidateEmailMetadata(candidate: GooglePlaceCandidate) {
const emails: GooglePlaceContactEmailSource[] = [];
if (candidate.email) {
emails.push({
email: candidate.email,
emailSource: candidate.emailSource,
contactPerson: candidate.contactPerson,
isBusinessContactAddress: candidate.isBusinessContactAddress,
});
}
if (Array.isArray(candidate.contactEmails)) {
emails.push(...candidate.contactEmails);
}
return emails;
}
export function getCandidateEmailValues(candidate: GooglePlaceCandidate) {
return getCandidateEmailMetadata(candidate)
.map((entry) => normalizeEmailAddress(entry.email))
.filter((value): value is string => value !== null);
}
function splitEmailLocalPart(email: string) {
const [localPart] = email.split("@");
return localPart?.split("+")[0] ?? "";
}
function isGenericBusinessEmail(email: string) {
const normalizedLocalPart = splitEmailLocalPart(email).toLowerCase();
return GENERIC_BUSINESS_EMAIL_LOCAL_PARTS.has(normalizedLocalPart);
}
export function getUsableContactEmail(
candidate: GooglePlaceCandidate,
): UsableContactEmail | null {
return getUsableContactEmailFromEntries(
getCandidateEmailMetadata(candidate).map((entry) => ({
email: entry.email,
emailSource: entry.emailSource,
contactPerson: entry.contactPerson,
isBusinessContactAddress: entry.isBusinessContactAddress,
})),
);
}
export function normalizePlacesResponse(
response: GooglePlacesApiResponse,
fetchedAt: number,
): GooglePlaceCandidate[] {
const places = Array.isArray(response?.places) ? response.places : [];
return places.flatMap((place) => {
if (!place || !place.id) {
return [];
}
const websiteUrl = place.websiteUri?.trim() ?? null;
const candidate: GooglePlaceCandidate = {
placeId: place.id,
businessName: normalizeDisplayName(place.displayName),
address: place.formattedAddress?.trim() ?? "",
websiteUrl,
websiteDomain: normalizeWebsiteDomain(websiteUrl),
phone: place.nationalPhoneNumber ?? place.internationalPhoneNumber ?? null,
rating: normalizeNumber(place.rating),
userRatingCount: normalizeNumber(place.userRatingCount),
businessStatus: place.businessStatus?.trim() ?? null,
googleTypes: Array.isArray(place.types) ? place.types : [],
googlePrimaryType: place.primaryType?.trim() ?? null,
googleMapsUrl: place.googleMapsUri?.trim() ?? null,
sourceProvider: "google_places",
sourceFetchedAt: fetchedAt,
};
return [candidate];
});
}
export type ExistingLeadLike = {
googlePlaceId?: string | null;
sourceBusinessId?: string | null;
websiteDomain?: string | null;
email?: string | null;
companyName?: string | null;
address?: string | null;
phone?: string | null;
};
export type BlacklistRow = {
type: "domain" | "email" | "phone" | "company" | "google_place_id" | "source_business_id";
value: string;
normalizedValue: string;
};
export type BlacklistLookupValue = {
type: "domain" | "email" | "phone" | "company" | "google_place_id" | "source_business_id";
normalizedValue: string;
};
export function normalizeDomain(value?: string | null) {
return value?.trim().toLowerCase().replace(/^www\./, "") ?? "";
}
export function normalizePhone(value?: string | null) {
if (!value) {
return "";
}
const digits = value.replace(/\D+/g, "");
if (digits.startsWith("00")) {
return digits.slice(2);
}
return digits;
}
function uniqueLookupValues(values: BlacklistLookupValue[]) {
const seen = new Set<string>();
return values.filter((value) => {
const key = `${value.type}:${value.normalizedValue}`;
if (!value.normalizedValue || seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
}
export function getBlacklistLookupValues(
candidate: GooglePlaceCandidate,
): BlacklistLookupValue[] {
const emailAddresses = getCandidateEmailValues(candidate);
return uniqueLookupValues([
{
type: "google_place_id",
normalizedValue: normalizeDomain(candidate.placeId),
},
{
type: "source_business_id",
normalizedValue: normalizeDomain(candidate.sourceBusinessId ?? candidate.placeId),
},
{
type: "domain",
normalizedValue: normalizeDomain(candidate.websiteDomain),
},
{
type: "company",
normalizedValue: normalizeText(candidate.businessName),
},
{
type: "phone",
normalizedValue: normalizePhone(candidate.phone),
},
{
type: "phone",
normalizedValue: normalizeDomain(candidate.phone),
},
...emailAddresses.map((email) => ({
type: "email" as const,
normalizedValue: email ?? "",
})),
]);
}
export function isDuplicateCandidate(
candidate: GooglePlaceCandidate,
existing: ExistingLeadLike[],
): boolean {
const candidatePlaceId = normalizeDomain(candidate.placeId);
const candidateSourceBusinessId = normalizeDomain(
candidate.sourceBusinessId ?? candidate.placeId,
);
const candidateDomain = normalizeDomain(candidate.websiteDomain);
const candidateEmails = getCandidateEmailValues(candidate);
return existing.some((entry) => {
const entryPlaceId = normalizeDomain(entry.googlePlaceId);
const entrySourceBusinessId = normalizeDomain(entry.sourceBusinessId);
const entryDomain = normalizeDomain(entry.websiteDomain);
const entryEmail = normalizeEmailAddress(entry.email);
return (
(candidatePlaceId && entryPlaceId === candidatePlaceId) ||
(candidateSourceBusinessId &&
entrySourceBusinessId === candidateSourceBusinessId) ||
(candidateDomain && entryDomain === candidateDomain) ||
candidateEmails.some(
(candidateEmail) => candidateEmail && entryEmail === candidateEmail,
)
);
});
}
export function isProbableDuplicateCandidate(
candidate: GooglePlaceCandidate,
existing: ExistingLeadLike[],
): boolean {
const candidateCompany = normalizeText(candidate.businessName);
const candidateAddress = normalizeText(candidate.address);
const candidatePhone = normalizePhone(candidate.phone);
return existing.some((entry) => {
const entryCompany = normalizeText(entry.companyName);
const entryAddress = normalizeText(entry.address);
const entryPhone = normalizePhone(entry.phone);
const isSameCompanyAndAddress =
candidateCompany &&
candidateAddress &&
entryCompany &&
entryAddress &&
candidateCompany === entryCompany &&
candidateAddress === entryAddress;
const isSamePhone = candidatePhone && entryPhone && candidatePhone === entryPhone;
return isSameCompanyAndAddress || isSamePhone;
});
}
export function getBlacklistMatches(
candidate: GooglePlaceCandidate,
blacklistRows: BlacklistRow[],
) {
const candidatePlaceId = normalizeDomain(candidate.placeId);
const candidateSourceBusinessId = normalizeDomain(
candidate.sourceBusinessId ?? candidate.placeId,
);
const candidateDomain = normalizeDomain(candidate.websiteDomain);
const candidateCompany = normalizeText(candidate.businessName);
const candidatePhone = normalizePhone(candidate.phone);
return blacklistRows.filter((row) => {
if (!row.normalizedValue) {
return false;
}
switch (row.type) {
case "google_place_id":
return candidatePlaceId !== "" && row.normalizedValue === candidatePlaceId;
case "source_business_id":
return (
candidateSourceBusinessId !== "" &&
row.normalizedValue === candidateSourceBusinessId
);
case "domain":
return candidateDomain !== "" && row.normalizedValue === candidateDomain;
case "company":
return (
candidateCompany !== "" && row.normalizedValue === candidateCompany
);
case "phone":
return (
candidatePhone !== "" &&
(row.normalizedValue === candidatePhone ||
normalizePhone(row.value) === candidatePhone)
);
case "email":
return getCandidateEmailValues(candidate).some(
(candidateEmail) => candidateEmail === row.normalizedValue,
);
default:
return false;
}
});
}