feat: add website enrichment crawler
This commit is contained in:
113
convex/schema.ts
113
convex/schema.ts
@@ -1,6 +1,11 @@
|
||||
import { defineSchema, defineTable } from "convex/server";
|
||||
import { v } from "convex/values";
|
||||
import { tables as authTables } from "./betterAuth/schema";
|
||||
import {
|
||||
RUN_EVENT_LEVELS,
|
||||
RUN_STATUSES,
|
||||
RUN_TYPES,
|
||||
} from "./domain";
|
||||
|
||||
const campaignStatus = v.union(v.literal("active"), v.literal("paused"));
|
||||
const leadPriority = v.union(
|
||||
@@ -75,24 +80,19 @@ const blacklistType = v.union(
|
||||
v.literal("company"),
|
||||
v.literal("google_place_id"),
|
||||
);
|
||||
const runType = v.union(
|
||||
v.literal("campaign"),
|
||||
v.literal("lead_discovery"),
|
||||
v.literal("audit"),
|
||||
v.literal("outreach"),
|
||||
v.literal("lifecycle"),
|
||||
);
|
||||
const runStatus = v.union(
|
||||
v.literal("pending"),
|
||||
v.literal("running"),
|
||||
v.literal("succeeded"),
|
||||
v.literal("failed"),
|
||||
v.literal("canceled"),
|
||||
const websiteEnrichmentPageKind = v.union(
|
||||
v.literal("homepage"),
|
||||
v.literal("contact"),
|
||||
v.literal("impressum"),
|
||||
v.literal("services"),
|
||||
v.literal("about"),
|
||||
v.literal("team"),
|
||||
v.literal("other"),
|
||||
);
|
||||
const runType = v.union(...RUN_TYPES.map((type) => v.literal(type)));
|
||||
const runStatus = v.union(...RUN_STATUSES.map((status) => v.literal(status)));
|
||||
const runEventLevel = v.union(
|
||||
v.literal("info"),
|
||||
v.literal("warning"),
|
||||
v.literal("error"),
|
||||
...RUN_EVENT_LEVELS.map((level) => v.literal(level)),
|
||||
);
|
||||
const screenshotViewport = v.union(v.literal("desktop"), v.literal("mobile"));
|
||||
const settingsValue = v.union(v.string(), v.number(), v.boolean(), v.null());
|
||||
@@ -255,6 +255,85 @@ export default defineSchema({
|
||||
.index("by_auditId_and_viewport", ["auditId", "viewport"])
|
||||
.index("by_storageId", ["storageId"]),
|
||||
|
||||
websiteCrawlPages: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
runId: v.optional(v.id("agentRuns")),
|
||||
sourceUrl: v.string(),
|
||||
finalUrl: v.string(),
|
||||
pageKind: websiteEnrichmentPageKind,
|
||||
title: v.optional(v.string()),
|
||||
metaDescription: v.optional(v.string()),
|
||||
headings: v.array(v.string()),
|
||||
visibleTextExcerpt: v.optional(v.string()),
|
||||
hasContactFormSignal: v.boolean(),
|
||||
hasContactCtaSignal: v.boolean(),
|
||||
createdAt: v.number(),
|
||||
})
|
||||
.index("by_leadId", ["leadId"])
|
||||
.index("by_runId", ["runId"])
|
||||
.index("by_leadId_and_createdAt", ["leadId", "createdAt"]),
|
||||
|
||||
websiteCrawlLinks: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
runId: v.optional(v.id("agentRuns")),
|
||||
pageUrl: v.string(),
|
||||
href: v.string(),
|
||||
text: v.optional(v.string()),
|
||||
isInternal: v.boolean(),
|
||||
isBroken: v.optional(v.boolean()),
|
||||
createdAt: v.number(),
|
||||
})
|
||||
.index("by_leadId", ["leadId"])
|
||||
.index("by_runId", ["runId"]),
|
||||
|
||||
websiteEmailCandidates: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
runId: v.optional(v.id("agentRuns")),
|
||||
email: v.string(),
|
||||
normalizedEmail: v.string(),
|
||||
emailSource: v.string(),
|
||||
sourceUrl: v.string(),
|
||||
contactPerson: v.optional(v.string()),
|
||||
isBusinessContactAddress: v.boolean(),
|
||||
isGeneric: v.boolean(),
|
||||
accepted: v.boolean(),
|
||||
createdAt: v.number(),
|
||||
})
|
||||
.index("by_leadId", ["leadId"])
|
||||
.index("by_normalizedEmail", ["normalizedEmail"])
|
||||
.index("by_runId", ["runId"]),
|
||||
|
||||
websiteCrawlScreenshots: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
runId: v.optional(v.id("agentRuns")),
|
||||
storageId: v.id("_storage"),
|
||||
viewport: screenshotViewport,
|
||||
sourceUrl: v.string(),
|
||||
capturedAt: v.number(),
|
||||
width: v.number(),
|
||||
height: v.number(),
|
||||
mimeType: v.string(),
|
||||
createdAt: v.number(),
|
||||
})
|
||||
.index("by_leadId", ["leadId"])
|
||||
.index("by_runId", ["runId"])
|
||||
.index("by_storageId", ["storageId"]),
|
||||
|
||||
websiteTechnicalChecks: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
runId: v.optional(v.id("agentRuns")),
|
||||
sourceUrl: v.string(),
|
||||
finalUrl: v.optional(v.string()),
|
||||
usesHttps: v.boolean(),
|
||||
missingTitle: v.boolean(),
|
||||
missingMetaDescription: v.boolean(),
|
||||
hasVisibleContactPath: v.boolean(),
|
||||
brokenInternalLinkCount: v.number(),
|
||||
createdAt: v.number(),
|
||||
})
|
||||
.index("by_leadId", ["leadId"])
|
||||
.index("by_runId", ["runId"]),
|
||||
|
||||
outreachRecords: defineTable({
|
||||
leadId: v.id("leads"),
|
||||
auditId: v.optional(v.id("audits")),
|
||||
@@ -309,7 +388,9 @@ export default defineSchema({
|
||||
updatedAt: v.number(),
|
||||
})
|
||||
.index("by_status", ["status"])
|
||||
.index("by_type", ["type"])
|
||||
.index("by_type_and_status", ["type", "status"])
|
||||
.index("by_type_and_status_and_leadId", ["type", "status", "leadId"])
|
||||
.index("by_campaignId_and_updatedAt", ["campaignId", "updatedAt"])
|
||||
.index("by_campaignId_and_status", ["campaignId", "status"])
|
||||
.index("by_auditId", ["auditId"]),
|
||||
|
||||
Reference in New Issue
Block a user