Files
pitchfast/tests/lead-discovery-google.test.ts

700 lines
19 KiB
TypeScript

import assert from "node:assert/strict";
import test from "node:test";
import {
GOOGLE_PLACES_FIELD_MASK,
buildGeocodingUrl,
getUsableContactEmail,
getUsableContactEmailFromEntries,
getBlacklistMatches,
getBlacklistLookupValues,
getPlacesSearchSpec,
isProbableDuplicateCandidate,
isDuplicateCandidate,
normalizeEmailAddress,
normalizePlacesResponse,
parseGeocodingResponse,
} from "../lib/lead-discovery-google";
test("places search spec maps known presets to nearby search and converts radius to meters", () => {
const nearbySpec = getPlacesSearchSpec({
categoryMode: "preset",
category: "Anwalt",
postalCode: "10115",
latitude: 52.52,
longitude: 13.405,
radiusKm: 12,
});
assert.equal(nearbySpec.searchType, "nearby");
assert.equal(nearbySpec.endpoint, "searchNearby");
if (nearbySpec.searchType !== "nearby" || nearbySpec.endpoint !== "searchNearby") {
throw new Error("Expected nearby search spec for preset category.");
}
const nearbyBody = nearbySpec.body as {
includedTypes: string[];
locationRestriction: {
circle: {
center: {
latitude: number;
longitude: number;
};
radius: number;
};
};
};
assert.deepEqual(nearbyBody.includedTypes, ["lawyer"]);
assert.equal(
nearbyBody.locationRestriction.circle.radius,
12_000,
);
assert.equal(nearbyBody.locationRestriction.circle.center.latitude, 52.52);
assert.equal(nearbyBody.locationRestriction.circle.center.longitude, 13.405);
});
test("places search spec uses text search for custom/Anderes and includes query context", () => {
const customSpec = getPlacesSearchSpec({
categoryMode: "custom",
category: "Anderes",
customSearchTerm: "Barber Shop für Hunde",
postalCode: "80331",
latitude: 48.137,
longitude: 11.575,
radiusKm: 5,
});
assert.equal(customSpec.searchType, "text");
assert.equal(customSpec.endpoint, "searchText");
if (customSpec.searchType !== "text" || customSpec.endpoint !== "searchText") {
throw new Error("Expected text search spec for custom/Anderes.");
}
const customBody = customSpec.body as {
textQuery: string;
locationBias?: {
circle: {
center: { latitude: number; longitude: number };
radius: number;
};
};
};
assert.equal(
customBody.textQuery,
"Barber Shop für Hunde in 80331 Deutschland",
);
assert.deepEqual(customBody.locationBias, {
circle: {
center: { latitude: 48.137, longitude: 11.575 },
radius: 5_000,
},
});
const handwerkSpec = getPlacesSearchSpec({
categoryMode: "preset",
category: "Handwerk",
customSearchTerm: "ignored",
postalCode: "80331",
radiusKm: 5,
});
assert.equal(handwerkSpec.searchType, "text");
assert.equal(handwerkSpec.endpoint, "searchText");
if (handwerkSpec.searchType !== "text" || handwerkSpec.endpoint !== "searchText") {
throw new Error("Expected text search spec for unmapped preset category.");
}
const handwerkBody = handwerkSpec.body as { textQuery: string };
assert.equal(handwerkBody.textQuery, "Handwerk in 80331 Deutschland");
});
test("geocoding URL includes API key, DE region, and components filter", () => {
const url = new URL(
buildGeocodingUrl({ postalCode: "40210", apiKey: "geocode-key-123" }),
);
assert.equal(
url.origin + url.pathname,
"https://maps.googleapis.com/maps/api/geocode/json",
);
assert.equal(url.searchParams.get("address"), "40210, Deutschland");
assert.equal(url.searchParams.get("components"), "country:DE|postal_code:40210");
assert.equal(url.searchParams.get("language"), "de");
assert.equal(url.searchParams.get("region"), "de");
assert.equal(url.searchParams.get("key"), "geocode-key-123");
});
test("geocoding parser extracts location from OK response and rejects ZERO_RESULTS", () => {
const ok = parseGeocodingResponse(
{
status: "OK",
results: [
{
formatted_address: "Berlin, 10115 Berlin, Deutschland",
place_id: "place-id-1",
geometry: {
location: {
lat: 52.5170365,
lng: 13.3888599,
},
},
},
],
},
1717480000000,
);
assert.equal(ok.latitude, 52.5170365);
assert.equal(ok.longitude, 13.3888599);
assert.equal(ok.formattedAddress, "Berlin, 10115 Berlin, Deutschland");
assert.equal(ok.placeId, "place-id-1");
assert.equal(ok.fetchedAt, 1717480000000);
assert.throws(
() =>
parseGeocodingResponse(
{ status: "ZERO_RESULTS", results: [] },
1717480000123,
),
(error: unknown) =>
error instanceof Error &&
/ZERO_RESULTS|no geocoding results/i.test(error.message),
);
});
test("places normalization maps source metadata and normalizes website domain", () => {
const normalized = normalizePlacesResponse(
{
places: [
{
id: "place-1",
displayName: { text: "Beispiel Café" },
formattedAddress: "Hauptstraße 1, 60311 Frankfurt am Main, Deutschland",
websiteUri: "https://www.Example.De/some-path",
nationalPhoneNumber: "+49 30 123456",
internationalPhoneNumber: "+49 49 654321",
rating: 4.6,
userRatingCount: 42,
businessStatus: "OPERATIONAL",
types: ["restaurant", "cafe"],
primaryType: "restaurant",
googleMapsUri: "https://maps.google.com/place-id-1",
},
],
},
1717480001000,
);
assert.equal(normalized.length, 1);
assert.deepEqual(normalized[0], {
placeId: "place-1",
businessName: "Beispiel Café",
address: "Hauptstraße 1, 60311 Frankfurt am Main, Deutschland",
websiteUrl: "https://www.Example.De/some-path",
websiteDomain: "example.de",
phone: "+49 30 123456",
rating: 4.6,
userRatingCount: 42,
businessStatus: "OPERATIONAL",
googleTypes: ["restaurant", "cafe"],
googlePrimaryType: "restaurant",
googleMapsUrl: "https://maps.google.com/place-id-1",
sourceProvider: "google_places",
sourceFetchedAt: 1717480001000,
});
assert.equal(
GOOGLE_PLACES_FIELD_MASK,
"places.id,places.displayName,places.formattedAddress,places.websiteUri,places.nationalPhoneNumber,places.internationalPhoneNumber,places.rating,places.userRatingCount,places.businessStatus,places.types,places.primaryType,places.googleMapsUri",
);
});
test("duplicate detection uses placeId and websiteDomain", () => {
const existingLeads = [
{
googlePlaceId: "dup-1",
websiteDomain: "other.de",
email: "blocked@example.de",
},
{ googlePlaceId: "other-2", websiteDomain: "example.de", email: "blocked@example.de" },
];
assert.equal(
isDuplicateCandidate(
{
placeId: "dup-1",
businessName: "Test",
address: "A",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.example.de",
websiteDomain: "example.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.new.de",
websiteDomain: "new.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
false,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.example.de",
websiteDomain: "new.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
contactEmails: [{ email: "Owner@Example.De", isBusinessContactAddress: false }],
},
existingLeads,
),
false,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.new.de",
websiteDomain: "new.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
contactEmails: [{ email: "newlead@new.de" }],
},
existingLeads,
),
false,
);
assert.equal(
isDuplicateCandidate(
{
placeId: "none",
businessName: "Test",
address: "A",
websiteUrl: "https://www.example.de",
websiteDomain: "new.de",
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
email: "Blocked@Example.De",
},
existingLeads,
),
true,
);
});
test("probable duplicates are detected by normalized company+address or normalized phone", () => {
const existingLeads = [
{
googlePlaceId: "dup-1",
companyName: "Muster GmbH",
address: "Hauptstraße 1, 60311 Frankfurt am Main",
phone: "+49 30 123456",
},
];
assert.equal(
isProbableDuplicateCandidate(
{
placeId: "none-1",
businessName: "Muster GmbH",
address: "Hauptstraße 1, 60311 Frankfurt am Main",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isProbableDuplicateCandidate(
{
placeId: "none-2",
businessName: "Other GmbH",
address: "Nebenstraße 9",
websiteUrl: null,
websiteDomain: null,
phone: "0049 30 123456",
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
true,
);
assert.equal(
isProbableDuplicateCandidate(
{
placeId: "none-3",
businessName: "Different GmbH",
address: "Musterallee 5",
websiteUrl: null,
websiteDomain: null,
phone: "+49 89 999999",
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
},
existingLeads,
),
false,
);
});
test("blacklist matches include google_place_id, domain, company and phone", () => {
const candidate = {
placeId: "place-blacklisted",
businessName: "Muster GmbH",
address: "A",
websiteUrl: "https://www.Blocked.de",
websiteDomain: "blocked.de",
phone: "+49 30 555 123",
email: "Info@Blocked.De",
contactEmails: [{ email: "Hello@blocked.de", isBusinessContactAddress: false }],
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places" as const,
sourceFetchedAt: 0,
};
assert.deepEqual(getBlacklistLookupValues(candidate), [
{ type: "google_place_id", normalizedValue: "place-blacklisted" },
{ type: "domain", normalizedValue: "blocked.de" },
{ type: "company", normalizedValue: "muster gmbh" },
{ type: "phone", normalizedValue: "4930555123" },
{ type: "phone", normalizedValue: "+49 30 555 123" },
{ type: "email", normalizedValue: "info@blocked.de" },
{ type: "email", normalizedValue: "hello@blocked.de" },
]);
const matches = getBlacklistMatches(
candidate,
[
{
type: "google_place_id",
value: "place-blacklisted",
normalizedValue: "place-blacklisted",
},
{ type: "domain", value: "blocked.de", normalizedValue: "blocked.de" },
{ type: "company", value: "Muster GmbH", normalizedValue: "muster gmbh" },
{ type: "phone", value: "+49 30 555 123", normalizedValue: "4930555123" },
{
type: "phone",
value: "+49 30 555 123",
normalizedValue: "+49 30 555 123",
},
{ type: "email", value: "x@example.de", normalizedValue: "x@example.de" },
{ type: "phone", value: "+49 30 999 999", normalizedValue: "4930999999" },
{
type: "email",
value: "Info@Blocked.De",
normalizedValue: "info@blocked.de",
},
],
);
const matchTypes = matches.map((match) => match.type).sort();
assert.deepEqual(
matchTypes,
["company", "domain", "google_place_id", "phone", "phone", "email"].sort(),
);
});
test("company normalization for blacklist lookup uses text normalization", () => {
const candidate = {
placeId: "place-company-spaces",
businessName: "Muster GmbH",
address: "A",
websiteUrl: null,
websiteDomain: null,
phone: "+49 30 555 123",
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places" as const,
sourceFetchedAt: 0,
};
assert.deepEqual(getBlacklistLookupValues(candidate), [
{ type: "google_place_id", normalizedValue: "place-company-spaces" },
{ type: "company", normalizedValue: "muster gmbh" },
{ type: "phone", normalizedValue: "4930555123" },
{ type: "phone", normalizedValue: "+49 30 555 123" },
]);
});
test("company blacklist matching supports whitespace-normalized names", () => {
const candidate = {
placeId: "place-company-spaces-2",
businessName: "Muster GmbH",
address: "A",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places" as const,
sourceFetchedAt: 0,
};
const matches = getBlacklistMatches(candidate, [
{ type: "company", value: "Muster GmbH", normalizedValue: "muster gmbh" },
]);
assert.equal(matches.length, 1);
assert.equal(matches[0]!.normalizedValue, "muster gmbh");
});
test("email normalization strips whitespace, lowercases, and rejects malformed addresses", () => {
assert.equal(normalizeEmailAddress(" INFO@Example.DE "), "info@example.de");
assert.equal(normalizeEmailAddress("hello@domain"), null);
assert.equal(normalizeEmailAddress("no-at-symbol"), null);
assert.equal(normalizeEmailAddress("@missing-local.com"), null);
assert.equal(normalizeEmailAddress("name@"), null);
assert.equal(normalizeEmailAddress(""), null);
assert.equal(normalizeEmailAddress("näm@beispiel.de"), null);
});
test("usable email helper prefers generic business aliases and requires explicit metadata for named contacts", () => {
const genericPreferred = getUsableContactEmail({
placeId: "place-1",
businessName: "Bäckerei",
address: "Musterweg 1",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
contactEmails: [
{
email: "müller@bäckerei.de",
isBusinessContactAddress: false,
},
{
email: "Hello@Bäckerei.De",
isBusinessContactAddress: false,
},
{
email: "owner@Bäckerei.De",
isBusinessContactAddress: true,
},
],
});
assert.deepEqual(genericPreferred, {
email: "hello@bäckerei.de",
emailSource: null,
contactPerson: null,
});
const namedWithoutMetadata = getUsableContactEmail({
placeId: "place-2",
businessName: "Bäckerei",
address: "Musterweg 2",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
contactEmails: [
{
email: "owner@Bäckerei.De",
isBusinessContactAddress: false,
},
],
});
assert.equal(namedWithoutMetadata, null);
const namedWithMetadata = getUsableContactEmail({
placeId: "place-3",
businessName: "Bäckerei",
address: "Musterweg 3",
websiteUrl: null,
websiteDomain: null,
phone: null,
rating: null,
userRatingCount: null,
businessStatus: null,
googleTypes: [],
googlePrimaryType: null,
googleMapsUrl: null,
sourceProvider: "google_places",
sourceFetchedAt: 0,
contactEmails: [
{
email: "owner@Bäckerei.De",
isBusinessContactAddress: true,
},
],
});
assert.deepEqual(namedWithMetadata, {
email: "owner@bäckerei.de",
emailSource: null,
contactPerson: null,
});
});
test("standalone contact-email rule helper rejects invalid entries and prefers generic aliases", () => {
const validGeneric = getUsableContactEmailFromEntries([
{
email: "owner@firma.de",
isBusinessContactAddress: false,
},
{
email: "support@firma.de",
isBusinessContactAddress: false,
},
{
email: "hello@firma.de",
isBusinessContactAddress: false,
},
]);
assert.deepEqual(validGeneric, {
email: "hello@firma.de",
emailSource: null,
contactPerson: null,
});
const rejectedNamed = getUsableContactEmailFromEntries([
{
email: "owner@firma.de",
isBusinessContactAddress: false,
},
]);
assert.equal(rejectedNamed, null);
const invalid = getUsableContactEmailFromEntries([
{
email: "no-at-symbol",
isBusinessContactAddress: true,
},
]);
assert.equal(invalid, null);
});