feat: add website enrichment crawler
This commit is contained in:
163
tests/website-enrichment-schema.test.ts
Normal file
163
tests/website-enrichment-schema.test.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
|
||||
import type { Doc } from "../convex/_generated/dataModel";
|
||||
import { RUN_EVENT_LEVELS, RUN_STATUSES, RUN_TYPES } from "../convex/domain";
|
||||
|
||||
type ExactSetEquality<A, B> = [
|
||||
Exclude<A, B>,
|
||||
] extends [never]
|
||||
? [Exclude<B, A>] extends [never]
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
type IsRequired<T> = undefined extends T ? false : true;
|
||||
type IsOptional<T> = undefined extends T ? true : false;
|
||||
|
||||
type AgentRunType = Doc<"agentRuns">["type"];
|
||||
type AgentRunStatus = Doc<"agentRuns">["status"];
|
||||
type AgentRunEventLevel = Doc<"agentRunEvents">["level"];
|
||||
type AssertWebsiteEnrichmentRunType = Extract<AgentRunType, "website_enrichment">;
|
||||
type RunTypeFromDomain = (typeof RUN_TYPES)[number];
|
||||
type RunStatusFromDomain = (typeof RUN_STATUSES)[number];
|
||||
type RunEventLevelFromDomain = (typeof RUN_EVENT_LEVELS)[number];
|
||||
|
||||
type AssertLeadCrawlPageKind = Extract<
|
||||
Doc<"websiteCrawlPages">["pageKind"],
|
||||
"homepage"
|
||||
>;
|
||||
type AssertCrawlViewportDesktop = Extract<
|
||||
Doc<"websiteCrawlScreenshots">["viewport"],
|
||||
"desktop"
|
||||
>;
|
||||
type AssertCrawlViewportMobile = Extract<
|
||||
Doc<"websiteCrawlScreenshots">["viewport"],
|
||||
"mobile"
|
||||
>;
|
||||
type AssertNormalizedEmailType = Doc<"websiteEmailCandidates">["normalizedEmail"];
|
||||
type AssertAcceptedEmailFlag = Doc<"websiteEmailCandidates">["accepted"];
|
||||
type AssertTechnicalUsesHttps = Doc<"websiteTechnicalChecks">["usesHttps"];
|
||||
type AssertTechnicalHasVisibleContactPath = Doc<"websiteTechnicalChecks">["hasVisibleContactPath"];
|
||||
|
||||
type AssertRunTypeInDomain = "website_enrichment" extends (
|
||||
typeof RUN_TYPES
|
||||
) [number]
|
||||
? true
|
||||
: false;
|
||||
|
||||
type AssertRunTypeEnumParity = ExactSetEquality<AgentRunType, RunTypeFromDomain>;
|
||||
type AssertRunStatusEnumParity = ExactSetEquality<
|
||||
AgentRunStatus,
|
||||
RunStatusFromDomain
|
||||
>;
|
||||
type AssertRunEventLevelEnumParity = ExactSetEquality<
|
||||
AgentRunEventLevel,
|
||||
RunEventLevelFromDomain
|
||||
>;
|
||||
|
||||
const schemaSource = readFileSync(
|
||||
join(process.cwd(), "convex", "schema.ts"),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const _assertRunTypeSchemaHasWebsiteEnrichment: AssertWebsiteEnrichmentRunType =
|
||||
"website_enrichment";
|
||||
const _assertRunTypeInDomainHasWebsiteEnrichment: AssertRunTypeInDomain = true;
|
||||
const _assertRunTypeEnumParity: AssertRunTypeEnumParity = true;
|
||||
const _assertRunStatusEnumParity: AssertRunStatusEnumParity = true;
|
||||
const _assertRunEventLevelEnumParity: AssertRunEventLevelEnumParity = true;
|
||||
const _assertPageKindSchemaIncludesHomepage: AssertLeadCrawlPageKind =
|
||||
"homepage";
|
||||
const _assertScreenshotViewportTypeDesktop: AssertCrawlViewportDesktop = "desktop";
|
||||
const _assertScreenshotViewportTypeMobile: AssertCrawlViewportMobile = "mobile";
|
||||
const _assertRunIdOptionalOnPages: IsOptional<Doc<"websiteCrawlPages">["runId"]> =
|
||||
true;
|
||||
const _assertRunIdOptionalOnLinks: IsOptional<Doc<"websiteCrawlLinks">["runId"]> =
|
||||
true;
|
||||
const _assertRunIdOptionalOnEmailCandidates: IsOptional<
|
||||
Doc<"websiteEmailCandidates">["runId"]
|
||||
> = true;
|
||||
const _assertRunIdOptionalOnScreenshots: IsOptional<
|
||||
Doc<"websiteCrawlScreenshots">["runId"]
|
||||
> = true;
|
||||
const _assertRunIdOptionalOnTechnicalChecks: IsOptional<
|
||||
Doc<"websiteTechnicalChecks">["runId"]
|
||||
> = true;
|
||||
const _assertPagesHasCreatedAt: IsRequired<Doc<"websiteCrawlPages">["createdAt"]> =
|
||||
true;
|
||||
const _assertLinksHasCreatedAt: IsRequired<Doc<"websiteCrawlLinks">["createdAt"]> =
|
||||
true;
|
||||
const _assertEmailCandidatesHasCreatedAt: IsRequired<
|
||||
Doc<"websiteEmailCandidates">["createdAt"]
|
||||
> = true;
|
||||
const _assertScreenshotsHasCreatedAt: IsRequired<
|
||||
Doc<"websiteCrawlScreenshots">["createdAt"]
|
||||
> = true;
|
||||
const _assertTechnicalChecksHasCreatedAt: IsRequired<
|
||||
Doc<"websiteTechnicalChecks">["createdAt"]
|
||||
> = true;
|
||||
const _assertWebsiteEmailCandidatesNormalizedEmail: AssertNormalizedEmailType = "user@example.com";
|
||||
const _assertEmailAcceptedTrue: AssertAcceptedEmailFlag = true;
|
||||
const _assertEmailAcceptedFalse: AssertAcceptedEmailFlag = false;
|
||||
const _assertScreenshotStorageIdRequired: IsRequired<
|
||||
Doc<"websiteCrawlScreenshots">["storageId"]
|
||||
> = true;
|
||||
const _assertTechnicalUsesHttpsTrue: AssertTechnicalUsesHttps = true;
|
||||
const _assertTechnicalUsesHttpsFalse: AssertTechnicalUsesHttps = false;
|
||||
const _assertTechnicalMissingTitleFalse: Doc<"websiteTechnicalChecks">["missingTitle"] =
|
||||
false;
|
||||
const _assertTechnicalMissingMetaDescriptionTrue: Doc<"websiteTechnicalChecks">["missingMetaDescription"] =
|
||||
true;
|
||||
const _assertTechnicalHasVisibleContactPathTrue: AssertTechnicalHasVisibleContactPath =
|
||||
true;
|
||||
const _assertTechnicalHasVisibleContactPathFalse: AssertTechnicalHasVisibleContactPath =
|
||||
false;
|
||||
|
||||
// Convex index structure can't be asserted from Doc types safely; this test validates
|
||||
// field contracts and value domains that are practical to verify at compile/runtime.
|
||||
test("website enrichment schema contracts are present", () => {
|
||||
assert.equal(_assertRunTypeSchemaHasWebsiteEnrichment, "website_enrichment");
|
||||
assert.equal(_assertRunTypeInDomainHasWebsiteEnrichment, true);
|
||||
assert.equal(_assertRunTypeEnumParity, true);
|
||||
assert.equal(_assertRunStatusEnumParity, true);
|
||||
assert.equal(_assertRunEventLevelEnumParity, true);
|
||||
assert.equal(_assertPageKindSchemaIncludesHomepage, "homepage");
|
||||
assert.equal(_assertScreenshotViewportTypeDesktop, "desktop");
|
||||
assert.equal(_assertScreenshotViewportTypeMobile, "mobile");
|
||||
assert.equal(_assertRunIdOptionalOnPages, true);
|
||||
assert.equal(_assertRunIdOptionalOnLinks, true);
|
||||
assert.equal(_assertRunIdOptionalOnEmailCandidates, true);
|
||||
assert.equal(_assertRunIdOptionalOnScreenshots, true);
|
||||
assert.equal(_assertRunIdOptionalOnTechnicalChecks, true);
|
||||
assert.equal(_assertPagesHasCreatedAt, true);
|
||||
assert.equal(_assertLinksHasCreatedAt, true);
|
||||
assert.equal(_assertEmailCandidatesHasCreatedAt, true);
|
||||
assert.equal(_assertScreenshotsHasCreatedAt, true);
|
||||
assert.equal(_assertTechnicalChecksHasCreatedAt, true);
|
||||
assert.equal(_assertScreenshotStorageIdRequired, true);
|
||||
assert.equal(_assertWebsiteEmailCandidatesNormalizedEmail, "user@example.com");
|
||||
assert.equal(_assertEmailAcceptedTrue, true);
|
||||
assert.equal(_assertEmailAcceptedFalse, false);
|
||||
assert.equal(_assertTechnicalUsesHttpsTrue, true);
|
||||
assert.equal(_assertTechnicalUsesHttpsFalse, false);
|
||||
assert.equal(_assertTechnicalMissingTitleFalse, false);
|
||||
assert.equal(_assertTechnicalMissingMetaDescriptionTrue, true);
|
||||
assert.equal(_assertTechnicalHasVisibleContactPathTrue, true);
|
||||
assert.equal(_assertTechnicalHasVisibleContactPathFalse, false);
|
||||
});
|
||||
|
||||
test("agentRuns schema defines lead-aware active-run index", () => {
|
||||
assert.equal(
|
||||
schemaSource.includes('["type", "status", "leadId"]'),
|
||||
true,
|
||||
"Schema should include by_type_and_status_and_leadId index fields in order.",
|
||||
);
|
||||
assert.equal(
|
||||
schemaSource.includes('by_type_and_status_and_leadId'),
|
||||
true,
|
||||
"Schema should define the by_type_and_status_and_leadId index.",
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user