diff --git a/apps/web/scripts/drain-review-queue.ts b/apps/web/scripts/drain-review-queue.ts index ffc63d5..22472e1 100644 --- a/apps/web/scripts/drain-review-queue.ts +++ b/apps/web/scripts/drain-review-queue.ts @@ -143,7 +143,7 @@ type ProfileRow = { previous_company_names: string[] | null; }; -type SponsorRow = { town_city: string | null; route: string }; +type SponsorRow = { route: string }; type StrategyOutcome = | { action: 'swap'; reason: string; s_e?: number; s_p?: number } @@ -180,26 +180,26 @@ async function loadProfiles( return new Map(rows.map((r) => [r.company_number, r])); } -/** Picks the most common (town_city, route) tuple per organisation_name. - * HMRC publishes one row per worker, so an org with mixed routes/locations - * picks the dominant pairing — same heuristic the inline scorer will use. */ +/** Picks the most common route per organisation_name. HMRC publishes one + * row per worker, so an org with mixed routes picks the dominant one — + * same heuristic the inline scorer will use. (The 2026-06 feed dropped + * town/county, so the locality tiebreak is inert.) */ async function loadSponsors( orgNames: string[], ): Promise> { if (orgNames.length === 0) return new Map(); const rows = (await sql` SELECT DISTINCT ON (organisation_name) - organisation_name, town_city, route + organisation_name, route FROM ( - SELECT organisation_name, town_city, route, COUNT(*) AS n + SELECT organisation_name, route, COUNT(*) AS n FROM hmrc_skilled_workers WHERE organisation_name = ANY(${orgNames}) - GROUP BY organisation_name, town_city, route + GROUP BY organisation_name, route ) ranked - ORDER BY organisation_name, n DESC, route, town_city NULLS LAST + ORDER BY organisation_name, n DESC, route `) as { organisation_name: string; - town_city: string | null; route: string; }[]; return new Map(rows.map((r) => [r.organisation_name, r])); @@ -285,7 +285,7 @@ function profileRowToFullProfile(row: ProfileRow): CHFullProfile { } function sponsorRowToScorerSponsor(row: SponsorRow): ScorerSponsor { - return { route: row.route, townCity: row.town_city }; + return { route: row.route, townCity: null }; } // ───────────────────────────────────────────────────────────────────────────── diff --git a/apps/web/scripts/generate-sitemap.ts b/apps/web/scripts/generate-sitemap.ts index b96f4e3..2dde220 100644 --- a/apps/web/scripts/generate-sitemap.ts +++ b/apps/web/scripts/generate-sitemap.ts @@ -6,7 +6,7 @@ import { hmrcSkilledWorkers, } from '@ss/db'; import { Glob } from 'bun'; -import { eq } from 'drizzle-orm'; +import { eq, sql } from 'drizzle-orm'; import { db } from '../src/db.server'; @@ -30,9 +30,13 @@ async function generate() { } // Single pass over all rows; LEFT JOIN keeps HMRC entries without a CH match. + // One URL per (org, rating, route) group: multi-licence siblings 301 to the + // canonical min(hash) page, so only that hash belongs in the sitemap. + // updatedAt is constant per org (mapping PK is organisation_name), so adding + // it to GROUP BY never splits a group — it just keeps drizzle's Date mapping. const allRows = await db .select({ - hash: hmrcSkilledWorkers.hash, + hash: sql`min(${hmrcSkilledWorkers.hash})`, nameSlug: hmrcSkilledWorkers.nameSlug, updatedAt: companiesHouseProfiles.updatedAt, }) @@ -51,7 +55,14 @@ async function generate() { hmrcCompanyMapping.companyNumber, ), ) - .orderBy(hmrcSkilledWorkers.hash); + .groupBy( + hmrcSkilledWorkers.organisationName, + hmrcSkilledWorkers.nameSlug, + hmrcSkilledWorkers.typeRating, + hmrcSkilledWorkers.route, + companiesHouseProfiles.updatedAt, + ) + .orderBy(sql`min(${hmrcSkilledWorkers.hash})`); const entries = new Map( allRows.map((row) => [ diff --git a/apps/web/scripts/ingest-hmrc-csv.ts b/apps/web/scripts/ingest-hmrc-csv.ts index 4cbd947..0b7c6d9 100644 --- a/apps/web/scripts/ingest-hmrc-csv.ts +++ b/apps/web/scripts/ingest-hmrc-csv.ts @@ -5,14 +5,16 @@ import { slugify } from '../src/utils'; import { setGitHubOutput } from './ci-utils'; const EXPECTED_COLUMNS = [ + 'Sponsor Licence Number', 'Organisation Name', - 'Town/City', - 'County', - 'Type & Rating', - 'Route', + 'TierRating', + 'Migrant Classification', + 'Sponsor Status', ] as const; const BATCH_SIZE = 500; +// Must agree with sponsor_licence_number varchar(64) in packages/db/src/schema.ts +const LICENCE_MAX_LEN = 64; const force = process.argv.includes('--force'); const url = process.argv.filter((a) => !a.startsWith('--'))[2]; @@ -85,18 +87,20 @@ console.log(`Validated schema: ${records.length} records found`); // Step 4: Create staging table console.log('Creating staging table...'); await sql`DROP TABLE IF EXISTS "hmrc_skilled_workers_staging"`; -await sql` +// sql.query (not the tagged template): DDL can't take $n params, and the +// licence width interpolates from LICENCE_MAX_LEN so DDL and guard can't drift +await sql.query(` CREATE TABLE "hmrc_skilled_workers_staging" ( "id" serial PRIMARY KEY NOT NULL, "hash" varchar(11) NOT NULL UNIQUE, "organisation_name" varchar(255) NOT NULL, "name_slug" varchar(255) NOT NULL, - "town_city" varchar(100), - "county" varchar(100), + "sponsor_licence_number" varchar(${LICENCE_MAX_LEN}), + "sponsor_status" varchar(64), "type_rating" varchar(100) NOT NULL, "route" varchar(100) NOT NULL ) -`; +`); // Step 5: Bulk insert into staging table console.log( @@ -110,16 +114,15 @@ function clean(val: string | undefined): string | null { return trimmed; } +/** Mint the stable URL id from the licence-based row identity. Licence is a + * durable per-sponsor key, so hashes survive company renames and future + * ingests — org name is deliberately excluded. */ function computeHash( - orgName: string, - townCity: string | null, - county: string | null, + licence: string, typeRating: string, route: string, ): string { - const input = [orgName, townCity ?? '', county ?? '', typeRating, route].join( - '|', - ); + const input = [licence, typeRating, route].join('|'); const bytes = new Bun.CryptoHasher('sha256').update(input).digest(); // Take first 8 bytes (64 bits), encode as base64url, trim to 11 chars return Buffer.from(bytes.slice(0, 8)).toString('base64url').slice(0, 11); @@ -130,36 +133,72 @@ type CleanedRow = { hash: string; orgName: string; nameSlug: string; - townCity: string | null; - county: string | null; + licence: string; + status: string | null; typeRating: string; route: string; }; -const seen = new Set(); +const seen = new Map(); const dedupedRows: CleanedRow[] = []; +// Licence is the hash backbone: blank values collide distinct orgs into one +// hash (silently dropped by dedup), and >20 chars aborts the batched INSERT +// mid-ingest with no row context. Fail fast naming the rows instead. +const invalidRows: string[] = []; -for (const r of records) { +for (const [i, r] of records.entries()) { + const rowNum = i + 2; // 1-based, after the header row + const licence = r['Sponsor Licence Number'].trim(); const orgName = r['Organisation Name'].trim(); - const townCity = clean(r['Town/City']); - const county = clean(r.County); - const typeRating = r['Type & Rating'].trim(); - const route = r.Route.trim(); - const hash = computeHash(orgName, townCity, county, typeRating, route); + const typeRating = r.TierRating.trim(); + const route = r['Migrant Classification'].trim(); + const status = clean(r['Sponsor Status']); + + if (!licence || licence.length > LICENCE_MAX_LEN) { + invalidRows.push( + `row ${rowNum} ("${orgName || '?'}"): bad Sponsor Licence Number ${JSON.stringify(licence)}`, + ); + continue; + } + if (status && status.length > 64) { + invalidRows.push( + `row ${rowNum} ("${orgName}"): Sponsor Status exceeds 64 chars (${status.length})`, + ); + continue; + } + + const hash = computeHash(licence, typeRating, route); const nameSlug = slugify(orgName) || hash; - if (!seen.has(hash)) { - seen.add(hash); - dedupedRows.push({ + const previous = seen.get(hash); + if (!previous) { + const row: CleanedRow = { hash, orgName, nameSlug, - townCity, - county, + licence, + status, typeRating, route, - }); + }; + seen.set(hash, row); + dedupedRows.push(row); + } else if (previous.orgName !== orgName || previous.status !== status) { + // Same licence|rating|route with a different identity: keeping either row + // picks an arbitrary name (and therefore CH mapping). Upstream anomaly. + invalidRows.push( + `row ${rowNum} ("${orgName}"): conflicts with earlier "${previous.orgName}" sharing licence|rating|route (${hash})`, + ); + } +} + +if (invalidRows.length > 0) { + console.error(`Row validation failed for ${invalidRows.length} row(s):`); + for (const line of invalidRows.slice(0, 10)) console.error(` ${line}`); + if (invalidRows.length > 10) { + console.error(` …and ${invalidRows.length - 10} more`); } + process.exit(1); } console.log( @@ -181,15 +220,15 @@ for (let i = 0; i < dedupedRows.length; i += BATCH_SIZE) { r.hash, r.orgName, r.nameSlug, - r.townCity, - r.county, + r.licence, + r.status, r.typeRating, r.route, ); } await sql.query( - `INSERT INTO "hmrc_skilled_workers_staging" ("hash", "organisation_name", "name_slug", "town_city", "county", "type_rating", "route") VALUES ${placeholders.join(', ')}`, + `INSERT INTO "hmrc_skilled_workers_staging" ("hash", "organisation_name", "name_slug", "sponsor_licence_number", "sponsor_status", "type_rating", "route") VALUES ${placeholders.join(', ')}`, values, ); @@ -203,7 +242,7 @@ console.log('Building indexes on staging table...'); await Promise.all([ sql`CREATE INDEX "stg_idx_hmrc_org_name" ON "hmrc_skilled_workers_staging" USING btree ("organisation_name")`, sql`CREATE INDEX "stg_idx_hmrc_name_slug" ON "hmrc_skilled_workers_staging" USING btree ("name_slug")`, - sql`CREATE INDEX "stg_idx_hmrc_town_city" ON "hmrc_skilled_workers_staging" USING btree ("town_city")`, + sql`CREATE INDEX "stg_idx_hmrc_licence" ON "hmrc_skilled_workers_staging" USING btree ("sponsor_licence_number")`, sql`CREATE INDEX "stg_idx_hmrc_route" ON "hmrc_skilled_workers_staging" USING btree ("route")`, sql`CREATE INDEX "stg_idx_hmrc_org_name_trgm" ON "hmrc_skilled_workers_staging" USING gin ("organisation_name" gin_trgm_ops)`, ]); @@ -217,7 +256,7 @@ await sql.transaction([ sql`ALTER TABLE "hmrc_skilled_workers_staging" RENAME TO "hmrc_skilled_workers"`, sql`ALTER INDEX "stg_idx_hmrc_org_name" RENAME TO "idx_hmrc_org_name"`, sql`ALTER INDEX "stg_idx_hmrc_name_slug" RENAME TO "idx_hmrc_name_slug"`, - sql`ALTER INDEX "stg_idx_hmrc_town_city" RENAME TO "idx_hmrc_town_city"`, + sql`ALTER INDEX "stg_idx_hmrc_licence" RENAME TO "idx_hmrc_licence"`, sql`ALTER INDEX "stg_idx_hmrc_route" RENAME TO "idx_hmrc_route"`, sql`ALTER INDEX "stg_idx_hmrc_org_name_trgm" RENAME TO "idx_hmrc_org_name_trgm"`, sql`ALTER INDEX "hmrc_skilled_workers_staging_hash_key" RENAME TO "hmrc_skilled_workers_hash_unique"`, diff --git a/apps/web/scripts/seed-companies-house.ts b/apps/web/scripts/seed-companies-house.ts index 5604b2d..8baf4fc 100644 --- a/apps/web/scripts/seed-companies-house.ts +++ b/apps/web/scripts/seed-companies-house.ts @@ -60,15 +60,13 @@ async function fetchApi(path: string): Promise { return res.json(); } -// Get only org names that aren't already cached, plus a representative -// town_city/county per org for the locality tiebreaker in the verification -// pipeline. selectDistinctOn(orgName) collapses multi-row sponsors (one per +// Get only org names that aren't already cached. The 2026-06 HMRC feed +// dropped town/county, so the resolver's locality tiebreak runs inert. +// selectDistinctOn(orgName) collapses multi-row sponsors (one per // route/rating) to a single representative row. const uncached = await db .selectDistinctOn([hmrcSkilledWorkers.organisationName], { organisationName: hmrcSkilledWorkers.organisationName, - townCity: hmrcSkilledWorkers.townCity, - county: hmrcSkilledWorkers.county, }) .from(hmrcSkilledWorkers) .leftJoin( @@ -137,7 +135,7 @@ for (const row of uncached) { // point users at the wrong CH entity. See docs/hmrc-ch-mapping-fix.md. const result = await resolveOneSponsor( orgName, - { townCity: row.townCity, county: row.county }, + { townCity: null, county: null }, throttledFetchApi, ); diff --git a/apps/web/src/api/cache-headers.ts b/apps/web/src/api/cache-headers.ts index 9d9bc86..b25b503 100644 --- a/apps/web/src/api/cache-headers.ts +++ b/apps/web/src/api/cache-headers.ts @@ -9,6 +9,13 @@ import { getRequestUrl, setResponseHeader } from '@tanstack/start-server-core'; export const LONG_EDGE_CACHE = 's-maxage=2592000, stale-while-revalidate=604800'; +/** + * Short edge TTL for negative lookups (row not found). A missing hash can + * come back to life (licence reinstated by a later ingest), so a long-cached + * null would strand the URL — 5 minutes absorbs crawler storms without that. + */ +export const SHORT_EDGE_CACHE = 's-maxage=300, stale-while-revalidate=60'; + /** * Attach a `Cache-Control` header to the current response only when the * request is a server-fn RPC invocation (`/_serverFn/…`). Prevents the @@ -23,3 +30,14 @@ export const setRpcCacheControl = createIsomorphicFn() } }) .client(() => {}); + +/** + * Attach a `Cache-Control` header to the current SSR document response. + * Complement of `setRpcCacheControl` for route loaders that need to override + * a routeRule default on specific outcomes (e.g. short-cache a 404 document). + */ +export const setSsrCacheControl = createIsomorphicFn() + .server((value: string) => { + setResponseHeader('Cache-Control', value); + }) + .client(() => {}); diff --git a/apps/web/src/api/companiesHouse.ts b/apps/web/src/api/companiesHouse.ts index 76b06a0..c6a8307 100644 --- a/apps/web/src/api/companiesHouse.ts +++ b/apps/web/src/api/companiesHouse.ts @@ -1,9 +1,4 @@ -import { - companiesHouseProfiles, - hmrcCompanyMapping, - hmrcSkilledWorkers, - sicCodes, -} from '@ss/db'; +import { companiesHouseProfiles, hmrcCompanyMapping, sicCodes } from '@ss/db'; import { queryOptions } from '@tanstack/react-query'; import { createServerFn } from '@tanstack/react-start'; import { setResponseHeader } from '@tanstack/react-start/server'; @@ -219,24 +214,13 @@ const getCompanyProfile = createServerFn() // top-hit logic that was silently mapping new sponsors to wrong CH // entities. See docs/hmrc-ch-mapping-fix.md "Phase 3 — on-demand // resolver hardening". - const [hmrcRow] = await db - .select({ - townCity: hmrcSkilledWorkers.townCity, - county: hmrcSkilledWorkers.county, - }) - .from(hmrcSkilledWorkers) - .where(eq(hmrcSkilledWorkers.organisationName, companyName)) - .limit(1); - console.log( `[Profile] no mapping, resolving via CH for: "${companyName}"`, ); + // HMRC no longer publishes town/county, so the locality tiebreak is inert. const result = await resolveOneSponsor( companyName, - { - townCity: hmrcRow?.townCity ?? null, - county: hmrcRow?.county ?? null, - }, + { townCity: null, county: null }, async (path) => { const r = await fetchFromApi(path); return r.ok ? r.data : null; diff --git a/apps/web/src/api/hmrc.ts b/apps/web/src/api/hmrc.ts index 31c9a61..bb9ef53 100644 --- a/apps/web/src/api/hmrc.ts +++ b/apps/web/src/api/hmrc.ts @@ -1,10 +1,18 @@ -import { hmrcSkilledWorkers } from '@ss/db'; +import { + companiesHouseProfiles, + hmrcCompanyMapping, + hmrcSkilledWorkers, +} from '@ss/db'; import { queryOptions } from '@tanstack/react-query'; import { createServerFn } from '@tanstack/react-start'; -import { desc, eq, sql } from 'drizzle-orm'; +import { asc, desc, eq, sql } from 'drizzle-orm'; import { db } from '../db.server'; -import { LONG_EDGE_CACHE, setRpcCacheControl } from './cache-headers'; +import { + LONG_EDGE_CACHE, + SHORT_EDGE_CACHE, + setRpcCacheControl, +} from './cache-headers'; const PAGE_SIZE = 50; @@ -32,16 +40,25 @@ export const searchHmrc = createServerFn() THEN 1.0 + word_similarity(${query}, ${hmrcSkilledWorkers.organisationName}) ELSE word_similarity(${query}, ${hmrcSkilledWorkers.organisationName}) END`; - const rows = await db + // One row per (org, rating, route): the same org can hold several licences + // with otherwise identical feed data (888 groups in the 2026-06 feed), and + // the cards show nothing that distinguishes them. min(hash) is the + // canonical slugId — the detail loader 301s the siblings to it. + // Grouping happens in the subquery, BEFORE the CH joins, so the joins stay + // PK probes on the returned window only and ranking/LIMIT are unaffected. + const grouped = db .select({ - slugId: hmrcSkilledWorkers.hash, + slugId: sql`min(${hmrcSkilledWorkers.hash})`.as('slug_id'), organisationName: hmrcSkilledWorkers.organisationName, nameSlug: hmrcSkilledWorkers.nameSlug, - townCity: hmrcSkilledWorkers.townCity, - county: hmrcSkilledWorkers.county, + sponsorLicenceNumbers: sql< + string[] + >`coalesce(array_agg(distinct ${hmrcSkilledWorkers.sponsorLicenceNumber} order by ${hmrcSkilledWorkers.sponsorLicenceNumber}) filter (where ${hmrcSkilledWorkers.sponsorLicenceNumber} is not null), '{}')`.as( + 'sponsor_licence_numbers', + ), typeRating: hmrcSkilledWorkers.typeRating, route: hmrcSkilledWorkers.route, - score: scoreExpr, + score: scoreExpr.as('score'), }) .from(hmrcSkilledWorkers) .where( @@ -51,9 +68,56 @@ export const searchHmrc = createServerFn() OR similarity(${query}, ${hmrcSkilledWorkers.organisationName}) > 0.5 )`, ) - .orderBy(desc(scoreExpr), sql`${hmrcSkilledWorkers.organisationName} ASC`) + .groupBy( + hmrcSkilledWorkers.organisationName, + hmrcSkilledWorkers.nameSlug, + hmrcSkilledWorkers.typeRating, + hmrcSkilledWorkers.route, + ) + .orderBy( + desc(scoreExpr), + sql`${hmrcSkilledWorkers.organisationName} ASC`, + // Unique tiebreak: groups tie on score AND name, and unstable tie + // order across page fetches duplicates/drops rows at OFFSET boundaries + sql`min(${hmrcSkilledWorkers.hash}) ASC`, + ) .limit(PAGE_SIZE + 1) - .offset(offset); + .offset(offset) + .as('g'); + + // Listing location is CH-sourced (HMRC dropped town/county from the feed). + const rows = await db + .select({ + slugId: grouped.slugId, + organisationName: grouped.organisationName, + nameSlug: grouped.nameSlug, + sponsorLicenceNumbers: grouped.sponsorLicenceNumbers, + locality: sql< + string | null + >`COALESCE(${companiesHouseProfiles.locality}, ${companiesHouseProfiles.addressLine2})`, + region: companiesHouseProfiles.region, + typeRating: grouped.typeRating, + route: grouped.route, + score: grouped.score, + }) + .from(grouped) + .leftJoin( + hmrcCompanyMapping, + eq(hmrcCompanyMapping.organisationName, grouped.organisationName), + ) + .leftJoin( + companiesHouseProfiles, + eq( + companiesHouseProfiles.companyNumber, + hmrcCompanyMapping.companyNumber, + ), + ) + // Joins don't guarantee order preservation; re-sort the ≤51-row window + .orderBy( + desc(grouped.score), + asc(grouped.organisationName), + asc(grouped.slugId), + ); const hasMore = rows.length > PAGE_SIZE; return { @@ -64,17 +128,33 @@ export const searchHmrc = createServerFn() /** * Server fn returning a single `hmrc_skilled_workers` row keyed by its stable - * `hash` slug id. Returns `null` when no matching row exists. + * `hash` slug id. Returns `null` when no matching row exists. Also returns the + * group canonical: multi-licence orgs have one row per licence with identical + * (org, rating, route) — search lists only min(hash), and the loader 301s the + * sibling hashes to `canonicalSlugId`. `sponsorLicenceNumbers` carries every + * licence in the group so the canonical page shows all of them. */ const getHmrcBySlugId = createServerFn() .inputValidator((input: unknown) => input as { slugId: string }) .handler(async ({ data: { slugId } }) => { + const groupFilter = sql` + h2.organisation_name = ${hmrcSkilledWorkers.organisationName} + AND h2.type_rating = ${hmrcSkilledWorkers.typeRating} + AND h2.route = ${hmrcSkilledWorkers.route}`; const [row] = await db .select({ slugId: hmrcSkilledWorkers.hash, + canonicalSlugId: sql`( + SELECT min(h2.hash) FROM hmrc_skilled_workers h2 WHERE ${groupFilter} + )`, organisationName: hmrcSkilledWorkers.organisationName, - townCity: hmrcSkilledWorkers.townCity, - county: hmrcSkilledWorkers.county, + // The loader 301s slug mismatches onto this (renames leave stale-slug + // URLs serving 200 with a self-referential canonical otherwise) + nameSlug: hmrcSkilledWorkers.nameSlug, + sponsorLicenceNumbers: sql`( + SELECT coalesce(array_agg(distinct h2.sponsor_licence_number order by h2.sponsor_licence_number) filter (where h2.sponsor_licence_number is not null), '{}') + FROM hmrc_skilled_workers h2 WHERE ${groupFilter} + )`, typeRating: hmrcSkilledWorkers.typeRating, route: hmrcSkilledWorkers.route, }) @@ -82,9 +162,11 @@ const getHmrcBySlugId = createServerFn() .where(eq(hmrcSkilledWorkers.hash, slugId)) .limit(1); - // slugId is a content hash of the row — (slugId → data) is immutable, so - // cache aggressively without tag-based invalidation - setRpcCacheControl(LONG_EDGE_CACHE); + // Found rows cache long: the hash is licence-based, so data behind it only + // changes via ingest, and the post-ingest sitemap deploy purges the edge. + // Nulls cache short — a licence can be reinstated under the same hash, and + // a 30-day-cached null would 301-loop the revived URL against itself. + setRpcCacheControl(row ? LONG_EDGE_CACHE : SHORT_EDGE_CACHE); return row ?? null; }); @@ -122,8 +204,13 @@ export const sponsorCountQueryOptions = queryOptions({ /** * Server fn returning `hmrc_skilled_workers` rows whose `name_slug` matches * the given slug. Fallback for stale `/company/$id/$slug` URLs: when the hash - * lookup 404s, the loader checks whether the name still maps to a current row - * and 301s to its new hash. Capped at 2 since callers only branch on 0 / 1 / many. + * lookup 404s, the loader 301s to the slug's first row — and also scans the + * matches for the requested hash itself, which detects a stale cached null + * (licence reinstated under the same hash). Uncapped: rows are per LICENCE + * (not per rating/route group) and namesake slugs pool orgs, so any cap could + * hide the requested hash from the containment scan; rows per slug are + * naturally tiny (max 8 across 126k slugs). + * Ordered by hash so the multi-match 301 always picks the same canonical row. * Not wrapped in queryOptions — only the loader calls it, and the redirect * moves the user off this page so there's no second reader for the result. */ @@ -138,7 +225,7 @@ export const getHmrcBySlug = createServerFn() }) .from(hmrcSkilledWorkers) .where(eq(hmrcSkilledWorkers.nameSlug, slug)) - .limit(2); + .orderBy(asc(hmrcSkilledWorkers.hash)); return rows; }); diff --git a/apps/web/src/components/HmrcCard.tsx b/apps/web/src/components/HmrcCard.tsx index 6f3dc6f..6369f8c 100644 --- a/apps/web/src/components/HmrcCard.tsx +++ b/apps/web/src/components/HmrcCard.tsx @@ -1,7 +1,7 @@ import { Link } from '@tanstack/react-router'; import type { HmrcRow } from '../api/hmrc'; -import { titleCase } from '../utils'; +import { formatLocation, titleCase } from '../utils'; import RatingIcon from './RatingIcon'; import UnionJackLens from './UnionJackLens'; @@ -86,7 +86,7 @@ export default function HmrcCard({

- {[row.townCity, row.county].filter(Boolean).map(titleCase).join(', ')} + {formatLocation(row.locality, row.region)}

{titleCase(row.route)} diff --git a/apps/web/src/components/HmrcResults.tsx b/apps/web/src/components/HmrcResults.tsx index 9fb5ed4..1eec8c1 100644 --- a/apps/web/src/components/HmrcResults.tsx +++ b/apps/web/src/components/HmrcResults.tsx @@ -6,7 +6,7 @@ import { useVirtualTextLayout } from 'virtual-text-layout'; import { useHmrcSearch } from '../hooks/useHmrcSearch'; import { useResultsKeyboardNav } from '../hooks/useResultsKeyboardNav'; -import { titleCase } from '../utils'; +import { formatLocation, titleCase } from '../utils'; import HmrcCard from './HmrcCard'; import SkeletonCards from './SkeletonCards'; @@ -39,8 +39,7 @@ export default function HmrcResults({ search }: { search: string }) { letterSpacing: -0.4, // heading-card utility }, { - getText: (row) => - [row.townCity, row.county].filter(Boolean).map(titleCase).join(', '), + getText: (row) => formatLocation(row.locality, row.region), font: '14px Geist', // text-sm lineHeight: 20, }, diff --git a/apps/web/src/components/McpTools.tsx b/apps/web/src/components/McpTools.tsx index b537d04..e24969f 100644 --- a/apps/web/src/components/McpTools.tsx +++ b/apps/web/src/components/McpTools.tsx @@ -4,7 +4,7 @@ import '@mcp-b/global'; import { companyProfileQueryOptions } from '../api/companiesHouse'; import { searchHmrcQueryOptions } from '../api/hmrc'; -import { titleCase } from '../utils'; +import { formatLocation, titleCase } from '../utils'; /** * Registers browser-side MCP tools with `navigator.modelContext` (via @@ -24,7 +24,7 @@ export function McpTools() { ctx.registerTool({ name: 'search_uk_visa_sponsors', description: - 'Search for UK companies licensed to sponsor skilled worker visas. Returns company name, location, visa route, and sponsor rating.', + 'Search for UK companies licensed to sponsor skilled worker visas. Returns company name, location, visa route, sponsor rating, and sponsor licence numbers.', inputSchema: { type: 'object', properties: { @@ -77,12 +77,10 @@ export function McpTools() { const formatted = result.rows.map((row) => ({ name: titleCase(row.organisationName), - location: [row.townCity, row.county] - .filter(Boolean) - .map(titleCase) - .join(', '), + location: formatLocation(row.locality, row.region), visaRoute: titleCase(row.route), rating: titleCase(row.typeRating), + sponsorLicenceNumbers: row.sponsorLicenceNumbers, })); return { @@ -120,7 +118,7 @@ export function McpTools() { ctx.registerTool({ name: 'get_uk_visa_sponsor_details', description: - 'Get detailed information about a specific UK visa sponsor by company name, combining HMRC sponsorship data (location, visa routes, sponsor ratings) with Companies House registration data (company number, status, incorporation date, registered address, industry/SIC descriptions). Use the exact name returned by search_uk_visa_sponsors for best results.', + 'Get detailed information about a specific UK visa sponsor by company name, combining HMRC sponsorship data (location, visa routes, sponsor ratings, sponsor licence numbers) with Companies House registration data (company number, status, incorporation date, registered address, industry/SIC descriptions). Use the exact name returned by search_uk_visa_sponsors for best results.', inputSchema: { type: 'object', properties: { @@ -207,15 +205,13 @@ export function McpTools() { .map((row) => ({ visaRoute: titleCase(row.route), rating: titleCase(row.typeRating), + // Per-row, not top-level: licences vary by (rating, route) group + sponsorLicenceNumbers: row.sponsorLicenceNumbers, })); const details = { name: titleCase(top.organisationName), - location: - [top.townCity, top.county] - .filter(Boolean) - .map(titleCase) - .join(', ') || null, + location: formatLocation(top.locality, top.region) || null, sponsorship, companiesHouse: profile ? { diff --git a/apps/web/src/lib/phase5/sql.ts b/apps/web/src/lib/phase5/sql.ts index f70cabe..79ecd19 100644 --- a/apps/web/src/lib/phase5/sql.ts +++ b/apps/web/src/lib/phase5/sql.ts @@ -116,9 +116,9 @@ function toExistingMapping(row: RawMappingRow): ExistingMapping { } /** Build a `lookupSponsor` matching `SweepDeps['lookupSponsor']`. Pulls - * `town_city` / `county` / `route` from `hmrc_skilled_workers` — the - * locality fields feed the resolver's tiebreak, the route feeds the inline - * scorer's route-type hard gate. */ + * `route` from `hmrc_skilled_workers` for the inline scorer's route-type + * hard gate. Locality is always null since the 2026-06 HMRC feed dropped + * town/county — the resolver's geographic tiebreak is inert. */ export function makeLookupSponsor(sql: Sql): SweepDeps['lookupSponsor'] { return async (organisationName) => { // `ORDER BY id ASC` for deterministic row selection — `hmrc_skilled_workers` @@ -127,20 +127,18 @@ export function makeLookupSponsor(sql: Sql): SweepDeps['lookupSponsor'] { // resolver's tiebreak depends on Postgres's storage order, which can // shift between runs. (CodeRabbit PR #85, comment 2.) const rows = (await sql` - SELECT town_city, county, route + SELECT route FROM hmrc_skilled_workers WHERE organisation_name = ${organisationName} ORDER BY id ASC LIMIT 1 `) as { - town_city: string | null; - county: string | null; route: string | null; }[]; const first = rows[0]; return { - townCity: first?.town_city ?? null, - county: first?.county ?? null, + townCity: null, + county: null, route: first?.route ?? null, } satisfies SweepSponsor; }; diff --git a/apps/web/src/routes/company.$id.$slug.tsx b/apps/web/src/routes/company.$id.$slug.tsx index 77a4fc1..116c3f4 100644 --- a/apps/web/src/routes/company.$id.$slug.tsx +++ b/apps/web/src/routes/company.$id.$slug.tsx @@ -9,6 +9,7 @@ import { import { ExternalLink, MapPin } from 'lucide-react'; import { useEffect, useRef, useState } from 'react'; +import { SHORT_EDGE_CACHE, setSsrCacheControl } from '../api/cache-headers'; import { companyProfileQueryOptions } from '../api/companiesHouse'; import { flagStateQueryOptions } from '../api/flags'; import { getHmrcBySlug, hmrcBySlugIdQueryOptions } from '../api/hmrc'; @@ -34,6 +35,24 @@ import { buildCompanyJsonLd, ratingPhrase } from '../utils/jsonld'; // Grammatical "A, B and C" joiner for the former-names sentence in the summary. const listFormatter = new Intl.ListFormat('en-GB', { type: 'conjunction' }); +/** + * Display location for a CH registered-office address. Mirrors searchHmrc's + * COALESCE(locality, address_line_2) + region so the detail page agrees with + * the listing card on whether a sponsor has a location. + */ +function registeredLocation( + address?: { + address_line_2?: string; + locality?: string; + region?: string; + } | null, +) { + return formatLocation( + address?.locality ?? address?.address_line_2, + address?.region, + ); +} + // Canonical key for company-name equality (case, punctuation, LTD/LIMITED). function normalizeName(name: string): string { return name @@ -52,13 +71,28 @@ export const Route = createFileRoute('/company/$id/$slug')({ middlewares: [stripSearchParams({ search: '' })], }, loader: async ({ params, context: { queryClient } }) => { - const sponsor = await queryClient.ensureQueryData( + let sponsor = await queryClient.ensureQueryData( hmrcBySlugIdQueryOptions(params.id), ); if (!sponsor) { const matches = await getHmrcBySlug({ data: { slug: params.slug } }); - if (matches.length === 1) { + if (matches.some((m) => m.slugId === params.id)) { + // The (uncached) slug lookup sees this very hash, so the cached null + // is stale — licence reinstated under the same hash by a later + // ingest. Drop the entry and refetch: invalidateQueries never + // refetches an observer-less query, and ensureQueryData would just + // return the cached null again. + queryClient.removeQueries({ + queryKey: hmrcBySlugIdQueryOptions(params.id).queryKey, + }); + sponsor = await queryClient.ensureQueryData( + hmrcBySlugIdQueryOptions(params.id), + ); + } else if (matches.length > 0) { + // 301 to the slug's canonical (hash-ordered first) row so stale + // URLs land on a real page and keep link equity. Safe on client + // navs too: getHmrcBySlug reads the DB uncached. throw redirect({ to: '/company/$id/$slug', params: { id: matches[0].slugId, slug: params.slug }, @@ -66,14 +100,41 @@ export const Route = createFileRoute('/company/$id/$slug')({ statusCode: 301, }); } - if (matches.length > 1) { - throw redirect({ - to: '/', - search: { search: matches[0].organisationName }, - statusCode: 302, - }); + if (!sponsor) { + // Best effort: keep the 404 document short-lived at the edge (a + // reinstated licence can revive the URL). The static /company/** + // routeRule header may still win at the edge — verify on deploy; + // the post-ingest deploy purge bounds the damage either way. + setSsrCacheControl(SHORT_EDGE_CACHE); + throw notFound(); } - throw notFound(); + } + + // Canonicalize on SSR only. Server loaders read the DB in-process, so the + // redirect decision is always fresh; client navs read RQ/edge caches whose + // canonicalSlugId/nameSlug can be stale (rename, removed sibling) — acting + // on those loops redirects or bounces correct URLs onto stale slugs. + // Crawlers only ever see SSR, so the SEO-relevant 301s are unaffected; + // client navs simply render under the URL they were given. + // Truthiness guards: a cached pre-deploy row may predate these fields, and + // `undefined !== params.id` would 301 to /company/undefined/undefined. + if ( + import.meta.env.SSR && + ((sponsor.canonicalSlugId && sponsor.canonicalSlugId !== params.id) || + (sponsor.nameSlug && sponsor.nameSlug !== params.slug)) + ) { + // One canonical URL per page: sibling licence hashes 301 onto the + // group's min-hash row, and stale slugs (post-rename) onto the current + // slug — otherwise near-duplicate 200s accumulate in the index. + throw redirect({ + to: '/company/$id/$slug', + params: { + id: sponsor.canonicalSlugId || params.id, + slug: sponsor.nameSlug || params.slug, + }, + search: (prev) => ({ search: prev.search ?? '' }), + statusCode: 301, + }); } const profile = await queryClient.ensureQueryData( @@ -89,8 +150,6 @@ export const Route = createFileRoute('/company/$id/$slug')({ | { sponsor: { organisationName: string; - townCity?: string | null; - county?: string | null; typeRating: string; route: string; }; @@ -124,7 +183,7 @@ export const Route = createFileRoute('/company/$id/$slug')({ ? titleCase(loaderData.sponsor.organisationName) : ''; const location = loaderData - ? formatLocation(loaderData.sponsor.townCity, loaderData.sponsor.county) + ? registeredLocation(loaderData.profile?.registered_office_address) : ''; const industry = loaderData?.profile?.sicDescriptions ?.map((sic) => sic.description) @@ -228,6 +287,9 @@ function CompanyDetail() { return () => observer.disconnect(); }, []); + // Router match-cache can replay loaderData from an older bundle (SWR render + // on revisit); tolerate the field's absence instead of crashing on .length + const licenceNumbers = sponsor.sponsorLicenceNumbers ?? []; const hmrcName = titleCase(sponsor.organisationName); // Lead with the Companies House current name; HMRC may hold a stale former name. const displayName = profile?.company_name @@ -241,7 +303,9 @@ function CompanyDetail() { const alsoRegisteredAs = normalizeName(sponsor.organisationName) !== currentKey ? hmrcName : null; const displayRoute = titleCase(sponsor.route); - const displayLocation = formatLocation(sponsor.townCity, sponsor.county); + const displayLocation = registeredLocation( + profile?.registered_office_address, + ); const industry = profile?.sicDescriptions ?.map((s) => s.description) .join(', '); @@ -332,6 +396,19 @@ function CompanyDetail() { {titleCase(sponsor.typeRating)}

+ {/* No CH profile → the second card never renders; surface the licence here instead */} + {!profile && licenceNumbers.length > 0 && ( +
+
+ Sponsor Licence {licenceNumbers.length > 1 ? 'Nos.' : 'No.'} +
+
+ + {licenceNumbers.join(', ')} + +
+
+ )} @@ -384,6 +461,20 @@ function CompanyDetail() { )} + {licenceNumbers.length > 0 && ( +
+
+ Sponsor Licence{' '} + {licenceNumbers.length > 1 ? 'Nos.' : 'No.'} +
+
+ + {licenceNumbers.join(', ')} + +
+
+ )} + {formatAddress(profile.registered_office_address) && (
diff --git a/docs/hmrc-csv-format-change.md b/docs/hmrc-csv-format-change.md new file mode 100644 index 0000000..cb4f7a5 --- /dev/null +++ b/docs/hmrc-csv-format-change.md @@ -0,0 +1,192 @@ +# Plan: Adapt HMRC ingestion to the new gov.uk CSV format (+ CH-sourced location) + +## Context + +The gov.uk "Worker and Temporary Worker" sponsor CSV changed format on 2026-06-09, which +hard-failed the ingestion GitHub Action (schema-validation guard, working as designed — +exit 1 before touching the DB). The feed: + +- **Dropped** `Town/City`, `County` (HMRC can't guarantee location correctness — it's + Companies House's domain, which is *why* they removed it). +- **Renamed** `Type & Rating` → `TierRating` and `Route` → `Migrant Classification` + (verified: the value strings are **byte-for-byte identical** — same 9 / 17 distinct + values, trailing spaces and all). +- **Added** `Sponsor Licence Number` (a stable per-sponsor ID) and `Sponsor Status` + (currently single-valued: "Licensed and Fully Active"). + +**Decisions locked with the user:** + +1. Ingest stays **true to HMRC** — clean atomic swap, **no carry-forward** of old + town/county or slugs. Row `hash` (the `/company/$id/$slug` URL id) therefore changes. +2. **Adopt** the Sponsor Licence Number (and Status) into the table. +3. Confirmed Companies House API exposes **no** sponsor licence number — so the licence is + a within-dataset stable key, *not* a CH join key. CH joins still go via org name. +4. Listing-page location now comes from Companies House via a **query-time `LEFT JOIN`** + (not denormalized). Verified live: ~86% of rows (121,265 / 141,264) resolve to a CH + locality; `address_line_2` fallback adds only ~850. The join is ~free — location is + pure display (never in `WHERE`/`ORDER BY`), so it's PK probes on the ≤50 returned rows, + dominated by the existing trigram scan. + +**Outcome:** ingestion green again on the new feed; listing/detail location sourced from +CH; licence number captured; the HMRC table reflects exactly what HMRC publishes. + +--- + +## Changes by area + +### 1. Ingestion script — `apps/web/scripts/ingest-hmrc-csv.ts` + +- **`EXPECTED_COLUMNS`** (lines 7-13) → `['Sponsor Licence Number', 'Organisation Name', + 'TierRating', 'Migrant Classification', 'Sponsor Status']`. Keep the guard — it now also + catches the *next* format change. +- **Column mapping at parse** (dedup loop, lines 142-163): keep internal DB names so the + matching pipeline is untouched — `TierRating` → `type_rating`, `Migrant Classification` + → `route`. Read `Sponsor Licence Number` → `sponsor_licence_number`, `Sponsor Status` → + `sponsor_status`. Drop all `Town/City` / `County` reads. +- **New hash basis** — `computeHash(licence, typeRating, route)` (replaces lines 113-126): + `base64url(sha256(licence | type_rating | route))[:11]`. Rationale: + - Stable: licence is durable, so **future ingests no longer churn URLs — this is the + last hash migration** (the old town/county inputs were the churny part). + - Disambiguates the 903 org-names that map to >1 licence. + - Excludes org name → company renames don't change the URL (`nameSlug` is cosmetic; the + loader resolves by hash). + - Keep the `UNIQUE(hash)` constraint as a collision guard; dedup on the same key. +- **`nameSlug`** unchanged: `slugify(orgName) || hash` (line 149). +- **Staging DDL** (lines 88-99): drop `town_city`, `county`; add + `sponsor_licence_number varchar(20)`, `sponsor_status varchar(64)`. Update the + `INSERT`/placeholder builder (lines 169-199) and `CleanedRow` type (129-137) to match. +- **Indexes** (lines 203-209): drop `stg_idx_hmrc_town_city`; keep org_name / name_slug / + route / org_name_trgm; add `stg_idx_hmrc_licence` on `sponsor_licence_number`. +- **Atomic-swap renames** (lines 215-224): drop the `town_city` index rename; add the + licence index rename. +- Checksum guard + `hmrc_ingestion_meta` insert unchanged. + +### 2. DB schema + migration — `packages/db/src/schema.ts` + +- `hmrcSkilledWorkers` (lines 17-39): remove `townCity`, `county`; add + `sponsorLicenceNumber varchar(20)`, `sponsorStatus varchar(64)`. Remove + `idx_hmrc_town_city`; add `idx_hmrc_licence`. +- Generate a Drizzle migration (`bun run` the drizzle-kit generate flow from root). +- **Lockstep invariant:** `schema.ts` ≡ the ingest script's `CREATE TABLE` DDL ≡ the + migration result must all agree. The ingest atomic-swap rebuilds the table from its own + DDL, but the migration must run *first* so the live table has the new columns before any + deployed query references them (avoids a SELECT on a not-yet-existing column). + +### 3. Search query — `apps/web/src/api/hmrc.ts` + +- `searchHmrc` (18-63): add + `.leftJoin(hmrcCompanyMapping, eq(hmrcCompanyMapping.organisationName, hmrcSkilledWorkers.organisationName))` + then `.leftJoin(companiesHouseProfiles, eq(companiesHouseProfiles.companyNumber, hmrcCompanyMapping.companyNumber))`. + Replace the `townCity`/`county` selects with + `location: sql\`COALESCE(${companiesHouseProfiles.locality}, ${companiesHouseProfiles.addressLine2})\``. + Score/order/limit unchanged (they reference only `hmrcSkilledWorkers`, so ranking and + `LIMIT` pushdown are unaffected). +- `getHmrcBySlugId` (69-90): drop `townCity`/`county` (detail page sources location from + the CH profile it already loads). Optionally also select `sponsorStatus` for a future + status badge. +- `HmrcRow` type (157) updates automatically; drop town/county, add `location`. +- `getHmrcBySlug` fallback (130-143) unchanged. + +### 4. UI + +- **`HmrcCard.tsx:88-90`** — replace `[row.townCity, row.county]…join(', ')` with + `titleCase(row.location ?? '')` (single token, e.g. "London"). +- **`HmrcResults.tsx:41-46`** (pretext field 2 `getText`) — change to + `(row) => titleCase(row.location ?? '')`. Keep `font:'14px Geist'`, `lineHeight:20`, + `fixedHeight:62`. ⚠️ **`CLAUDE.md` invariant** — `useCardMetrics` must stay in sync with + card CSS; the measured *text* changes (now a single CH token, shorter → less wrapping) + but the font/height config is unchanged, so `fixedHeight` stays 62. +- **`company.$id.$slug.tsx`** — the loader already fetches the CH `profile` + (lines 79-81). Source location from it: `displayLocation` (244) and head() (127-128) → + `formatLocation(profile?.registered_office_address?.locality, …?.region)`. Detail page + keeps its richer two-part display (town + region), now CH-sourced. Remove `townCity`/ + `county` from the loaderData `sponsor` type (90-96) and all usages. +- **`McpTools.tsx:80,215`** — update location formatting to use the new `location` field + from the search row. +- **`utils.ts` `formatLocation` (87-102)** — keep as-is (generic two-arg joiner), now fed + CH locality/region on the detail page. + +### 5. Matching pipeline — compile-safe, accept tiebreak loss + +`route` is preserved (renamed at ingest), so the route-type hard gate is unaffected. Only +the town/county *DB reads* break: + +- **`apps/web/src/lib/phase5/sql.ts` `makeLookupSponsor` (122-147)** — SELECT drops + `town_city, county`, keeps `route`; return `{ townCity: null, county: null, route }`. +- **`apps/web/src/api/companiesHouse.ts` on-demand resolver (222-244)** — remove the + `hmrcRow` SELECT of `townCity`/`county`; pass `{ townCity: null, county: null }` to + `resolveOneSponsor`. +- Leave the resolver/scorer **types and logic intact** (`pipeline.ts pickByLocality`, + `score-candidate.ts`, `compare-candidates.ts`, `resolve-sponsor.ts`, `sweep.ts`) — they + now operate on null locality, so the geographic tiebreak is inert. Existing phase5 tests + pass mock localities directly, so they still pass. +- **Documented degradation (accepted):** without HMRC town, ambiguous same-name orgs lose + the `pickByLocality` tiebreak → more `human_review` / a growing review queue. Acceptable + per the premise (HMRC location was unreliable); the 88% already-mapped orgs keep their + `company_number`, so steady-state location coverage holds. Optional later: replace the + geo tiebreak with a non-geo signal (company status / name-similarity margin) — out of + scope here. + +### 6. Sitemap + SEO + +- `apps/web/scripts/generate-sitemap.ts` — no change (selects `hash`, `nameSlug`, both + retained). After the swap all hashes change; the workflow's `data-changed`-gated + `sitemap:generate` + PR steps regenerate them. +- **One-time URL churn:** every detail URL changes once. The existing loader fallback + (`company.$id.$slug.tsx:59-77`) absorbs it — 301 for single-row orgs (~90%, unique + `name_slug`), 302→search for multi-route orgs. Because the new hash is licence-based and + stable, this is a one-time event, not recurring. +- **Optional SEO safeguard (recommend):** change the multi-match branch (69-75) from + 302→search to **301→the canonical (first) row**, so old multi-route URLs land on a real + page and retain link equity instead of bouncing to search. + +### 7. Workflow / dev scripts + +- `.github/workflows/hmrc-ingestion.yaml` — no change; it succeeds once the script handles + the new columns. (The `actions/checkout@v4` Node-20 deprecation warning is unrelated + housekeeping.) +- **Follow-up (non-blocking, local-only):** `generate-hmrc-seed-sql.ts` parses the old CSV + columns and will break on the new format / old fixture; update it or refresh + `apps/web/data/2026-03-31-Worker.csv`. Re-check `seed-companies-house.ts` for town/county + reads. + +--- + +## Verification + +1. **Lint/type/test** (from repo root): `bun lint:fix && bun lint`, the monorepo + type-check (HmrcRow ripple → HmrcCard/HmrcResults/McpTools/detail page), `bun test` + (phase5 tests should stay green). +2. **Ingest dry-run on a branch/staging DB** against the live new CSV with `--force`: + confirm schema validation passes, ~140,876 unique rows, no town/county columns, + `sponsor_licence_number`/`sponsor_status` populated, hashes minted, UNIQUE holds. +3. **Listing**: run search locally, confirm cards show CH locality (e.g. "Checkout LTD" → + "London"), ~86% populated / ~14% blank; confirm the join doesn't regress search latency + (EXPLAIN: trigram scan dominates, joins are PK nested-loops on the returned window). +4. **Detail**: confirm location now renders from the CH profile. +5. **Sitemap**: `bun run sitemap:generate` → new hashes in output. +6. **Redirects**: hit an old `/company/OLDHASH/slug` → 301 (single-row) or 302/301 + (multi-route, depending on the optional safeguard). +7. **Deploy order**: migration (adds columns) → deploy code → trigger ingest (`--force`, + repopulates licence/status + new hashes) → sitemap regen PR. + +--- + +## Open items to confirm before implementing + +- **Hash basis = `licence|type_rating|route`** (recommended; makes this the last URL + migration). Alternative would be `org|type|route`, but that re-churns on renames and + collides on the 903 multi-licence names. +- **Optional 301→canonical multi-match safeguard** (§6) — include or skip? +- **Matching tiebreak degradation** accepted as-is (§5) — confirm OK to defer a + replacement signal. + +--- + +## Source data reference (2026-06-09 feed) + +- New columns: `Sponsor Licence Number, Organisation Name, TierRating, Migrant Classification, Sponsor Status` +- Old columns: `Organisation Name, Town/City, County, Type & Rating, Route` +- New feed: 141,806 rows → ~140,876 unique `(org, type, route)` +- Live DB coverage at planning time: 141,264 rows / 126,420 orgs; 111,320 orgs mapped to a + company number; 121,265 rows (~86%) resolve to a CH locality (or `address_line_2`). diff --git a/packages/db/migrations/0025_add-sponsor-licence.sql b/packages/db/migrations/0025_add-sponsor-licence.sql new file mode 100644 index 0000000..2d0558c --- /dev/null +++ b/packages/db/migrations/0025_add-sponsor-licence.sql @@ -0,0 +1,3 @@ +ALTER TABLE "hmrc_skilled_workers" ADD COLUMN IF NOT EXISTS "sponsor_licence_number" varchar(20);--> statement-breakpoint +ALTER TABLE "hmrc_skilled_workers" ADD COLUMN IF NOT EXISTS "sponsor_status" varchar(64);--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "idx_hmrc_licence" ON "hmrc_skilled_workers" USING btree ("sponsor_licence_number"); \ No newline at end of file diff --git a/packages/db/migrations/0026_drop-town-county.sql b/packages/db/migrations/0026_drop-town-county.sql new file mode 100644 index 0000000..9b58977 --- /dev/null +++ b/packages/db/migrations/0026_drop-town-county.sql @@ -0,0 +1,3 @@ +DROP INDEX IF EXISTS "idx_hmrc_town_city";--> statement-breakpoint +ALTER TABLE "hmrc_skilled_workers" DROP COLUMN IF EXISTS "town_city";--> statement-breakpoint +ALTER TABLE "hmrc_skilled_workers" DROP COLUMN IF EXISTS "county"; \ No newline at end of file diff --git a/packages/db/migrations/0027_widen-sponsor-licence.sql b/packages/db/migrations/0027_widen-sponsor-licence.sql new file mode 100644 index 0000000..9d56afc --- /dev/null +++ b/packages/db/migrations/0027_widen-sponsor-licence.sql @@ -0,0 +1 @@ +ALTER TABLE "hmrc_skilled_workers" ALTER COLUMN "sponsor_licence_number" SET DATA TYPE varchar(64); \ No newline at end of file diff --git a/packages/db/migrations/meta/0025_snapshot.json b/packages/db/migrations/meta/0025_snapshot.json new file mode 100644 index 0000000..f67a688 --- /dev/null +++ b/packages/db/migrations/meta/0025_snapshot.json @@ -0,0 +1,946 @@ +{ + "id": "f72258ad-cc70-4aa0-9556-2ef319a1aca2", + "prevId": "986c92c9-3ee8-4ca9-bd17-323cc05a3d99", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.ch_stream_state": { + "name": "ch_stream_state", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_timepoint": { + "name": "last_timepoint", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_cache": { + "name": "companies_house_profile_cache", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_trail_id": { + "name": "last_trail_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_trails": { + "name": "companies_house_profile_trails", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true + }, + "column_name": { + "name": "column_name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "old_value": { + "name": "old_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "new_value": { + "name": "new_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_trail_company_number": { + "name": "idx_ch_trail_company_number", + "columns": [ + { + "expression": "company_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_trail_created_at": { + "name": "idx_ch_trail_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profiles": { + "name": "companies_house_profiles", + "schema": "", + "columns": { + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": true, + "notNull": true + }, + "company_name": { + "name": "company_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "company_status": { + "name": "company_status", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "company_type": { + "name": "company_type", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "date_of_creation": { + "name": "date_of_creation", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "address_line_1": { + "name": "address_line_1", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "address_line_2": { + "name": "address_line_2", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "locality": { + "name": "locality", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "region": { + "name": "region", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "postal_code": { + "name": "postal_code", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "country": { + "name": "country", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "sic_codes": { + "name": "sic_codes", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "accounts_next_made_up_to": { + "name": "accounts_next_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_last_made_up_to": { + "name": "accounts_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_overdue": { + "name": "accounts_overdue", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "jurisdiction": { + "name": "jurisdiction", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "has_been_liquidated": { + "name": "has_been_liquidated", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_insolvency_history": { + "name": "has_insolvency_history", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_charges": { + "name": "has_charges", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "previous_company_names": { + "name": "previous_company_names", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "confirmation_statement_last_made_up_to": { + "name": "confirmation_statement_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_company_name": { + "name": "idx_ch_company_name", + "columns": [ + { + "expression": "company_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_status": { + "name": "idx_ch_company_status", + "columns": [ + { + "expression": "company_status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_type": { + "name": "idx_ch_company_type", + "columns": [ + { + "expression": "company_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_sic_codes": { + "name": "idx_ch_sic_codes", + "columns": [ + { + "expression": "sic_codes", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_ch_jurisdiction": { + "name": "idx_ch_jurisdiction", + "columns": [ + { + "expression": "jurisdiction", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_previous_names": { + "name": "idx_ch_previous_names", + "columns": [ + { + "expression": "previous_company_names", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping": { + "name": "hmrc_company_mapping", + "schema": "", + "columns": { + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "is_public_body": { + "name": "is_public_body", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "match_method": { + "name": "match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "match_score": { + "name": "match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "query_used": { + "name": "query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified_at": { + "name": "verified_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_mapping_method_verified": { + "name": "idx_mapping_method_verified", + "columns": [ + { + "expression": "match_method", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "verified_at", + "isExpression": false, + "asc": true, + "nulls": "first" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_audit": { + "name": "hmrc_company_mapping_audit", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "old_company_number": { + "name": "old_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "new_company_number": { + "name": "new_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "old_match_method": { + "name": "old_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "new_match_method": { + "name": "new_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "changed_at": { + "name": "changed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "changed_by": { + "name": "changed_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_review_queue": { + "name": "hmrc_company_mapping_review_queue", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "reason": { + "name": "reason", + "type": "varchar(40)", + "primaryKey": false, + "notNull": true + }, + "existing_company_number": { + "name": "existing_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "existing_match_method": { + "name": "existing_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "existing_match_score": { + "name": "existing_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_company_number": { + "name": "proposed_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_method": { + "name": "proposed_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_score": { + "name": "proposed_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_query_used": { + "name": "proposed_query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ch_search_results_top5": { + "name": "ch_search_results_top5", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "detected_by": { + "name": "detected_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "detected_at": { + "name": "detected_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "resolved_at": { + "name": "resolved_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "resolved_by": { + "name": "resolved_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "resolution": { + "name": "resolution", + "type": "varchar(40)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_review_queue_unresolved": { + "name": "idx_review_queue_unresolved", + "columns": [ + { + "expression": "detected_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_review_queue_org": { + "name": "idx_review_queue_org", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ux_review_queue_unresolved_org_reason": { + "name": "ux_review_queue_unresolved_org_reason", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "reason", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_ingestion_meta": { + "name": "hmrc_ingestion_meta", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "csv_url": { + "name": "csv_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "checksum": { + "name": "checksum", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + }, + "record_count": { + "name": "record_count", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "ingested_at": { + "name": "ingested_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_skilled_workers": { + "name": "hmrc_skilled_workers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "hash": { + "name": "hash", + "type": "varchar(11)", + "primaryKey": false, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "name_slug": { + "name": "name_slug", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "town_city": { + "name": "town_city", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "county": { + "name": "county", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "sponsor_licence_number": { + "name": "sponsor_licence_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "sponsor_status": { + "name": "sponsor_status", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "type_rating": { + "name": "type_rating", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "route": { + "name": "route", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_hmrc_org_name": { + "name": "idx_hmrc_org_name", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_name_slug": { + "name": "idx_hmrc_name_slug", + "columns": [ + { + "expression": "name_slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_town_city": { + "name": "idx_hmrc_town_city", + "columns": [ + { + "expression": "town_city", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_licence": { + "name": "idx_hmrc_licence", + "columns": [ + { + "expression": "sponsor_licence_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_route": { + "name": "idx_hmrc_route", + "columns": [ + { + "expression": "route", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_org_name_trgm": { + "name": "idx_hmrc_org_name_trgm", + "columns": [ + { + "expression": "\"organisation_name\" gin_trgm_ops", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "hmrc_skilled_workers_hash_unique": { + "name": "hmrc_skilled_workers_hash_unique", + "nullsNotDistinct": false, + "columns": ["hash"] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sic_codes": { + "name": "sic_codes", + "schema": "", + "columns": { + "code": { + "name": "code", + "type": "varchar(10)", + "primaryKey": true, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/db/migrations/meta/0026_snapshot.json b/packages/db/migrations/meta/0026_snapshot.json new file mode 100644 index 0000000..9fff993 --- /dev/null +++ b/packages/db/migrations/meta/0026_snapshot.json @@ -0,0 +1,919 @@ +{ + "id": "45b567be-162a-44fc-adf8-d57fcdc67921", + "prevId": "f72258ad-cc70-4aa0-9556-2ef319a1aca2", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.ch_stream_state": { + "name": "ch_stream_state", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_timepoint": { + "name": "last_timepoint", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_cache": { + "name": "companies_house_profile_cache", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_trail_id": { + "name": "last_trail_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_trails": { + "name": "companies_house_profile_trails", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true + }, + "column_name": { + "name": "column_name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "old_value": { + "name": "old_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "new_value": { + "name": "new_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_trail_company_number": { + "name": "idx_ch_trail_company_number", + "columns": [ + { + "expression": "company_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_trail_created_at": { + "name": "idx_ch_trail_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profiles": { + "name": "companies_house_profiles", + "schema": "", + "columns": { + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": true, + "notNull": true + }, + "company_name": { + "name": "company_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "company_status": { + "name": "company_status", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "company_type": { + "name": "company_type", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "date_of_creation": { + "name": "date_of_creation", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "address_line_1": { + "name": "address_line_1", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "address_line_2": { + "name": "address_line_2", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "locality": { + "name": "locality", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "region": { + "name": "region", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "postal_code": { + "name": "postal_code", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "country": { + "name": "country", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "sic_codes": { + "name": "sic_codes", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "accounts_next_made_up_to": { + "name": "accounts_next_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_last_made_up_to": { + "name": "accounts_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_overdue": { + "name": "accounts_overdue", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "jurisdiction": { + "name": "jurisdiction", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "has_been_liquidated": { + "name": "has_been_liquidated", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_insolvency_history": { + "name": "has_insolvency_history", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_charges": { + "name": "has_charges", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "previous_company_names": { + "name": "previous_company_names", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "confirmation_statement_last_made_up_to": { + "name": "confirmation_statement_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_company_name": { + "name": "idx_ch_company_name", + "columns": [ + { + "expression": "company_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_status": { + "name": "idx_ch_company_status", + "columns": [ + { + "expression": "company_status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_type": { + "name": "idx_ch_company_type", + "columns": [ + { + "expression": "company_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_sic_codes": { + "name": "idx_ch_sic_codes", + "columns": [ + { + "expression": "sic_codes", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_ch_jurisdiction": { + "name": "idx_ch_jurisdiction", + "columns": [ + { + "expression": "jurisdiction", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_previous_names": { + "name": "idx_ch_previous_names", + "columns": [ + { + "expression": "previous_company_names", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping": { + "name": "hmrc_company_mapping", + "schema": "", + "columns": { + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "is_public_body": { + "name": "is_public_body", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "match_method": { + "name": "match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "match_score": { + "name": "match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "query_used": { + "name": "query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified_at": { + "name": "verified_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_mapping_method_verified": { + "name": "idx_mapping_method_verified", + "columns": [ + { + "expression": "match_method", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "verified_at", + "isExpression": false, + "asc": true, + "nulls": "first" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_audit": { + "name": "hmrc_company_mapping_audit", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "old_company_number": { + "name": "old_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "new_company_number": { + "name": "new_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "old_match_method": { + "name": "old_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "new_match_method": { + "name": "new_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "changed_at": { + "name": "changed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "changed_by": { + "name": "changed_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_review_queue": { + "name": "hmrc_company_mapping_review_queue", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "reason": { + "name": "reason", + "type": "varchar(40)", + "primaryKey": false, + "notNull": true + }, + "existing_company_number": { + "name": "existing_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "existing_match_method": { + "name": "existing_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "existing_match_score": { + "name": "existing_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_company_number": { + "name": "proposed_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_method": { + "name": "proposed_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_score": { + "name": "proposed_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_query_used": { + "name": "proposed_query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ch_search_results_top5": { + "name": "ch_search_results_top5", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "detected_by": { + "name": "detected_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "detected_at": { + "name": "detected_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "resolved_at": { + "name": "resolved_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "resolved_by": { + "name": "resolved_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "resolution": { + "name": "resolution", + "type": "varchar(40)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_review_queue_unresolved": { + "name": "idx_review_queue_unresolved", + "columns": [ + { + "expression": "detected_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_review_queue_org": { + "name": "idx_review_queue_org", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ux_review_queue_unresolved_org_reason": { + "name": "ux_review_queue_unresolved_org_reason", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "reason", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_ingestion_meta": { + "name": "hmrc_ingestion_meta", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "csv_url": { + "name": "csv_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "checksum": { + "name": "checksum", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + }, + "record_count": { + "name": "record_count", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "ingested_at": { + "name": "ingested_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_skilled_workers": { + "name": "hmrc_skilled_workers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "hash": { + "name": "hash", + "type": "varchar(11)", + "primaryKey": false, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "name_slug": { + "name": "name_slug", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "sponsor_licence_number": { + "name": "sponsor_licence_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "sponsor_status": { + "name": "sponsor_status", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "type_rating": { + "name": "type_rating", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "route": { + "name": "route", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_hmrc_org_name": { + "name": "idx_hmrc_org_name", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_name_slug": { + "name": "idx_hmrc_name_slug", + "columns": [ + { + "expression": "name_slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_licence": { + "name": "idx_hmrc_licence", + "columns": [ + { + "expression": "sponsor_licence_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_route": { + "name": "idx_hmrc_route", + "columns": [ + { + "expression": "route", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_org_name_trgm": { + "name": "idx_hmrc_org_name_trgm", + "columns": [ + { + "expression": "\"organisation_name\" gin_trgm_ops", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "hmrc_skilled_workers_hash_unique": { + "name": "hmrc_skilled_workers_hash_unique", + "nullsNotDistinct": false, + "columns": ["hash"] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sic_codes": { + "name": "sic_codes", + "schema": "", + "columns": { + "code": { + "name": "code", + "type": "varchar(10)", + "primaryKey": true, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/db/migrations/meta/0027_snapshot.json b/packages/db/migrations/meta/0027_snapshot.json new file mode 100644 index 0000000..94d6e03 --- /dev/null +++ b/packages/db/migrations/meta/0027_snapshot.json @@ -0,0 +1,919 @@ +{ + "id": "f1952481-19f9-4bec-b127-6153e8b8d459", + "prevId": "45b567be-162a-44fc-adf8-d57fcdc67921", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.ch_stream_state": { + "name": "ch_stream_state", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_timepoint": { + "name": "last_timepoint", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_cache": { + "name": "companies_house_profile_cache", + "schema": "", + "columns": { + "key": { + "name": "key", + "type": "varchar(50)", + "primaryKey": true, + "notNull": true + }, + "last_trail_id": { + "name": "last_trail_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profile_trails": { + "name": "companies_house_profile_trails", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true + }, + "column_name": { + "name": "column_name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "old_value": { + "name": "old_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "new_value": { + "name": "new_value", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_trail_company_number": { + "name": "idx_ch_trail_company_number", + "columns": [ + { + "expression": "company_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_trail_created_at": { + "name": "idx_ch_trail_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.companies_house_profiles": { + "name": "companies_house_profiles", + "schema": "", + "columns": { + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": true, + "notNull": true + }, + "company_name": { + "name": "company_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "company_status": { + "name": "company_status", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "company_type": { + "name": "company_type", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "date_of_creation": { + "name": "date_of_creation", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "address_line_1": { + "name": "address_line_1", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "address_line_2": { + "name": "address_line_2", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "locality": { + "name": "locality", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "region": { + "name": "region", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "postal_code": { + "name": "postal_code", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "country": { + "name": "country", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "sic_codes": { + "name": "sic_codes", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "accounts_next_made_up_to": { + "name": "accounts_next_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_last_made_up_to": { + "name": "accounts_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "accounts_overdue": { + "name": "accounts_overdue", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "jurisdiction": { + "name": "jurisdiction", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "has_been_liquidated": { + "name": "has_been_liquidated", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_insolvency_history": { + "name": "has_insolvency_history", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "has_charges": { + "name": "has_charges", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "previous_company_names": { + "name": "previous_company_names", + "type": "text[]", + "primaryKey": false, + "notNull": false, + "default": "'{}'::text[]" + }, + "confirmation_statement_last_made_up_to": { + "name": "confirmation_statement_last_made_up_to", + "type": "date", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_ch_company_name": { + "name": "idx_ch_company_name", + "columns": [ + { + "expression": "company_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_status": { + "name": "idx_ch_company_status", + "columns": [ + { + "expression": "company_status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_company_type": { + "name": "idx_ch_company_type", + "columns": [ + { + "expression": "company_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_sic_codes": { + "name": "idx_ch_sic_codes", + "columns": [ + { + "expression": "sic_codes", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_ch_jurisdiction": { + "name": "idx_ch_jurisdiction", + "columns": [ + { + "expression": "jurisdiction", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ch_previous_names": { + "name": "idx_ch_previous_names", + "columns": [ + { + "expression": "previous_company_names", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping": { + "name": "hmrc_company_mapping", + "schema": "", + "columns": { + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "company_number": { + "name": "company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "is_public_body": { + "name": "is_public_body", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "match_method": { + "name": "match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "match_score": { + "name": "match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "query_used": { + "name": "query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified_at": { + "name": "verified_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_mapping_method_verified": { + "name": "idx_mapping_method_verified", + "columns": [ + { + "expression": "match_method", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "verified_at", + "isExpression": false, + "asc": true, + "nulls": "first" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_audit": { + "name": "hmrc_company_mapping_audit", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "old_company_number": { + "name": "old_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "new_company_number": { + "name": "new_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "old_match_method": { + "name": "old_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "new_match_method": { + "name": "new_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "changed_at": { + "name": "changed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "changed_by": { + "name": "changed_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_company_mapping_review_queue": { + "name": "hmrc_company_mapping_review_queue", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "reason": { + "name": "reason", + "type": "varchar(40)", + "primaryKey": false, + "notNull": true + }, + "existing_company_number": { + "name": "existing_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "existing_match_method": { + "name": "existing_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "existing_match_score": { + "name": "existing_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_company_number": { + "name": "proposed_company_number", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_method": { + "name": "proposed_match_method", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "proposed_match_score": { + "name": "proposed_match_score", + "type": "numeric(4, 3)", + "primaryKey": false, + "notNull": false + }, + "proposed_query_used": { + "name": "proposed_query_used", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ch_search_results_top5": { + "name": "ch_search_results_top5", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "detected_by": { + "name": "detected_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "detected_at": { + "name": "detected_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "resolved_at": { + "name": "resolved_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "resolved_by": { + "name": "resolved_by", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "resolution": { + "name": "resolution", + "type": "varchar(40)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_review_queue_unresolved": { + "name": "idx_review_queue_unresolved", + "columns": [ + { + "expression": "detected_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_review_queue_org": { + "name": "idx_review_queue_org", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ux_review_queue_unresolved_org_reason": { + "name": "ux_review_queue_unresolved_org_reason", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "reason", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"hmrc_company_mapping_review_queue\".\"resolved_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_ingestion_meta": { + "name": "hmrc_ingestion_meta", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "csv_url": { + "name": "csv_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "checksum": { + "name": "checksum", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + }, + "record_count": { + "name": "record_count", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "ingested_at": { + "name": "ingested_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hmrc_skilled_workers": { + "name": "hmrc_skilled_workers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "hash": { + "name": "hash", + "type": "varchar(11)", + "primaryKey": false, + "notNull": true + }, + "organisation_name": { + "name": "organisation_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "name_slug": { + "name": "name_slug", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "sponsor_licence_number": { + "name": "sponsor_licence_number", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "sponsor_status": { + "name": "sponsor_status", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "type_rating": { + "name": "type_rating", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "route": { + "name": "route", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_hmrc_org_name": { + "name": "idx_hmrc_org_name", + "columns": [ + { + "expression": "organisation_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_name_slug": { + "name": "idx_hmrc_name_slug", + "columns": [ + { + "expression": "name_slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_licence": { + "name": "idx_hmrc_licence", + "columns": [ + { + "expression": "sponsor_licence_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_route": { + "name": "idx_hmrc_route", + "columns": [ + { + "expression": "route", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_hmrc_org_name_trgm": { + "name": "idx_hmrc_org_name_trgm", + "columns": [ + { + "expression": "\"organisation_name\" gin_trgm_ops", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "hmrc_skilled_workers_hash_unique": { + "name": "hmrc_skilled_workers_hash_unique", + "nullsNotDistinct": false, + "columns": ["hash"] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sic_codes": { + "name": "sic_codes", + "schema": "", + "columns": { + "code": { + "name": "code", + "type": "varchar(10)", + "primaryKey": true, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/db/migrations/meta/_journal.json b/packages/db/migrations/meta/_journal.json index 8920d00..47e22d4 100644 --- a/packages/db/migrations/meta/_journal.json +++ b/packages/db/migrations/meta/_journal.json @@ -176,6 +176,27 @@ "when": 1777709462971, "tag": "0024_sharp_next_avengers", "breakpoints": true + }, + { + "idx": 25, + "version": "7", + "when": 1781078983389, + "tag": "0025_add-sponsor-licence", + "breakpoints": true + }, + { + "idx": 26, + "version": "7", + "when": 1781079003844, + "tag": "0026_drop-town-county", + "breakpoints": true + }, + { + "idx": 27, + "version": "7", + "when": 1781107776369, + "tag": "0027_widen-sponsor-licence", + "breakpoints": true } ] } diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index f20f648..85cab7b 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -21,15 +21,15 @@ export const hmrcSkilledWorkers = pgTable( hash: varchar('hash', { length: 11 }).notNull().unique(), organisationName: varchar('organisation_name', { length: 255 }).notNull(), nameSlug: varchar('name_slug', { length: 255 }).notNull(), - townCity: varchar('town_city', { length: 100 }), - county: varchar('county', { length: 100 }), + sponsorLicenceNumber: varchar('sponsor_licence_number', { length: 64 }), + sponsorStatus: varchar('sponsor_status', { length: 64 }), typeRating: varchar('type_rating', { length: 100 }).notNull(), route: varchar('route', { length: 100 }).notNull(), }, (table) => [ index('idx_hmrc_org_name').on(table.organisationName), index('idx_hmrc_name_slug').on(table.nameSlug), - index('idx_hmrc_town_city').on(table.townCity), + index('idx_hmrc_licence').on(table.sponsorLicenceNumber), index('idx_hmrc_route').on(table.route), index('idx_hmrc_org_name_trgm').using( 'gin',