feat: add website enrichment crawler

This commit is contained in:
2026-06-04 20:29:23 +02:00
parent ca42c8d5a6
commit 1f6e31c01c
25 changed files with 3539 additions and 56 deletions

View File

@@ -17,6 +17,7 @@ import {
buildLeadDiscoveryLeadRecord,
buildLeadDiscoveryCounters,
getLeadDiscoveryPriority,
shouldScheduleWebsiteEnrichment,
} from "../lib/lead-discovery-run";
import { calculateNextRunAt } from "../lib/campaign-scheduling";
@@ -214,6 +215,11 @@ export const processCampaignRun = internalAction({
skippedDuplicates: number;
skippedBlacklisted: number;
errors: number;
websiteEnrichmentQueue: Array<{
leadId: Id<"leads">;
companyName: string;
website: string;
}>;
} = await ctx.runMutation(internal.leadDiscovery.persistDiscoveredLeads, {
runId: args.runId,
campaignId: campaign._id,
@@ -223,6 +229,31 @@ export const processCampaignRun = internalAction({
candidates,
});
for (const enrichment of result.websiteEnrichmentQueue) {
await ctx.runMutation(internal.websiteEnrichment.queueLeadEnrichment, {
leadId: enrichment.leadId,
parentRunId: args.runId,
});
await ctx.runMutation(internal.leadDiscovery.appendRunEvent, {
runId: args.runId,
level: "info",
message: "Website-Kontaktanreicherung geplant.",
details: [
{
label: "Unternehmen",
value: enrichment.companyName,
source: "google_places",
},
{
label: "Website",
value: enrichment.website,
source: "google_places",
},
],
});
}
await ctx.runMutation(internal.leadDiscovery.finishCampaignRun, {
runId: args.runId,
status: "succeeded",
@@ -275,7 +306,9 @@ export const startCampaignRun = internalMutation({
const activeRunning = await ctx.db
.query("agentRuns")
.withIndex("by_status", (q) => q.eq("status", "running"))
.withIndex("by_type_and_status", (q) =>
q.eq("type", "campaign").eq("status", "running"),
)
.take(1);
if (activeRunning.length > 0) {
@@ -390,6 +423,11 @@ export const persistDiscoveredLeads = internalMutation({
let skippedDuplicates = 0;
let skippedBlacklisted = 0;
let errors = 0;
const websiteEnrichmentQueue: Array<{
leadId: Id<"leads">;
companyName: string;
website: string;
}> = [];
for (const candidate of args.candidates) {
if (leadsCreated >= args.maxNewLeads) {
@@ -556,8 +594,15 @@ export const persistDiscoveredLeads = internalMutation({
lead.duplicateOfLeadId = probableDuplicateLead._id;
}
await ctx.db.insert("leads", lead);
const leadId = await ctx.db.insert("leads", lead);
leadsCreated += 1;
if (shouldScheduleWebsiteEnrichment(lead)) {
websiteEnrichmentQueue.push({
leadId,
companyName: lead.companyName,
website: lead.websiteDomain ?? lead.websiteUrl ?? "unbekannt",
});
}
await ctx.db.insert("agentRunEvents", {
runId: args.runId,
level: "info",
@@ -589,6 +634,7 @@ export const persistDiscoveredLeads = internalMutation({
skippedDuplicates,
skippedBlacklisted,
errors,
websiteEnrichmentQueue,
};
},
});