Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions tests/unit/test_dv2_business_vault_ddl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Unit tests for the DV2 ClickHouse business-vault views (no Docker).

The ClickHouse ``business_vault/*.sql`` views had no parse or coverage before
this file. It pins that every view parses under sqlglot's ClickHouse dialect and
that the customer MDM views admit hub rows by branch via
``splitByString('__', record_source)[2]`` — NOT by a hard-coded
``record_source = '1c__<branch>'`` filter that silently dropped OLTP/X5-promoted
customers (``record_source`` ``pg_ops__`` / ``x5__``).

This is the ClickHouse half of audit_28_06_26 #12. The PostgreSQL port was fixed
in ``test_dv2_postgres_ddl.py::test_customer_mdm_views_admit_all_source_conventions``;
pinning the ClickHouse views to the same source-agnostic admission keeps the two
engines from drifting apart (the split-brain the audit flagged). A live apply +
``bv_customer_mdm`` query is a separate Mac smoke (see business_vault/README.md).
"""

from __future__ import annotations

import glob
import re
from pathlib import Path

import sqlglot

import warehouse.agentflow.dv2.dialects as dialects_mod

DV2_DIR = Path(dialects_mod.__file__).resolve().parent
BV_DIR = DV2_DIR / "business_vault"

MDM_BRANCHES = ("msk", "spb", "ekb", "dxb", "ala")


def _strip_comments(sql: str) -> str:
sql = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
sql = re.sub(r"--[^\n]*", "", sql)
return sql


def _bv_sql_files() -> list[Path]:
return [Path(p) for p in sorted(glob.glob(str(BV_DIR / "*.sql")))]


def test_all_business_vault_ddl_parses():
files = _bv_sql_files()
# 5 customer MDM views + bv_order_canonical + bv_order_canonical_mat.
assert len(files) >= 7, f"expected >=7 business-vault SQL files, found {len(files)}"
for path in files:
parsed = sqlglot.parse(path.read_text(encoding="utf-8"), dialect="clickhouse")
assert parsed, f"{path.name} produced no statements"


def test_every_mdm_branch_view_exists():
for branch in MDM_BRANCHES:
path = BV_DIR / f"bv_customer_mdm__{branch}.sql"
assert path.exists(), f"missing ClickHouse MDM view for branch {branch}"


def test_customer_mdm_views_admit_all_source_conventions():
"""audit_28_06_26 #12 (ClickHouse half): the customer MDM views must select
hub rows by branch via ``splitByString('__', record_source)[2]``, NOT by a
hard-coded ``record_source = '1c__<branch>'`` filter that silently drops
OLTP/X5-promoted customers (record_source ``pg_ops__`` / ``x5__``). Proven
live on PostgreSQL in #99: the buggy filter returns 1 of 2 seeded customers,
the source-agnostic filter returns both. This keeps the ClickHouse views in
lock-step with the PostgreSQL port so the engines cannot diverge again."""
for branch in MDM_BRANCHES:
path = BV_DIR / f"bv_customer_mdm__{branch}.sql"
# Strip block/line comments: the headers deliberately quote the old,
# buggy ``record_source = '1c__<branch>'`` filter to explain the fix.
body = _strip_comments(path.read_text(encoding="utf-8"))
assert f"CREATE OR REPLACE VIEW rv.bv_customer_mdm__{branch}" in body, (
f"bv_customer_mdm__{branch}.sql must define rv.bv_customer_mdm__{branch}"
)
assert f"splitByString('__', record_source)[2] = '{branch}'" in body, (
f"bv_customer_mdm__{branch} must admit hubs by branch via "
f"splitByString('__', record_source)[2], not by source convention"
)
# The regressed pattern must never reappear in executable SQL.
assert "record_source = '1c__" not in body, (
f"hard-coded record_source = '1c__<branch>' filter in "
f"bv_customer_mdm__{branch} reintroduces audit #12"
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
Purpose: Canonical customer record for the ALA branch.
Layer: Business Vault.
Branch: ala (KZ jurisdiction; Bitrix loyalty not wired in ALA).
Hub admission: splitByString('__', record_source)[2] = 'ala' (source-agnostic:
1c__/pg_ops__/x5__ all integrated, not only 1C; audit_28_06_26 #12;
mirrors the PostgreSQL port's split_part(record_source,'__',2)).
*/
CREATE OR REPLACE VIEW rv.bv_customer_mdm__ala AS
WITH
Expand All @@ -21,7 +24,7 @@ WITH
ala_hub AS (
SELECT customer_hk, customer_bk
FROM rv.hub_customer
WHERE record_source = '1c__ala'
WHERE splitByString('__', record_source)[2] = 'ala'
)
SELECT
h.customer_hk AS customer_hk,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Conflict policy:
- Loyalty — not yet wired in for DXB (Bitrix24 sat is MSK-only); the view
keeps the loyalty columns for schema parity with bv_customer_mdm__msk so
downstream marts can UNION ALL the two branches without renaming.
Hub admission: splitByString('__', record_source)[2] = 'dxb' (source-agnostic:
1c__/pg_ops__/x5__ all integrated, not only 1C; audit_28_06_26 #12;
mirrors the PostgreSQL port's split_part(record_source,'__',2)).
*/
CREATE OR REPLACE VIEW rv.bv_customer_mdm__dxb AS
WITH
Expand All @@ -26,7 +29,7 @@ WITH
dxb_hub AS (
SELECT customer_hk, customer_bk
FROM rv.hub_customer
WHERE record_source = '1c__dxb'
WHERE splitByString('__', record_source)[2] = 'dxb'
)
SELECT
h.customer_hk AS customer_hk,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
Purpose: Canonical customer record for the EKB branch.
Layer: Business Vault.
Branch: ekb (RU jurisdiction; same conflict policy as MSK).
Hub admission: splitByString('__', record_source)[2] = 'ekb' (source-agnostic:
1c__/pg_ops__/x5__ all integrated, not only 1C; audit_28_06_26 #12;
mirrors the PostgreSQL port's split_part(record_source,'__',2)).
*/
CREATE OR REPLACE VIEW rv.bv_customer_mdm__ekb AS
WITH
Expand Down Expand Up @@ -32,7 +35,7 @@ WITH
ekb_hub AS (
SELECT customer_hk, customer_bk
FROM rv.hub_customer
WHERE record_source = '1c__ekb'
WHERE splitByString('__', record_source)[2] = 'ekb'
)
SELECT
h.customer_hk AS customer_hk,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ Conflict policy:
- Loyalty (segment/points/last_visit) — Bitrix wins (live CRM state).
- If a customer exists only in Bitrix, PII columns are NULL but the row
is still returned so loyalty-only customers stay visible.
Hub admission: splitByString('__', record_source)[2] = 'msk', so a customer
promoted under ANY source convention (1c__msk, pg_ops__msk, x5__msk,
...) is integrated, not only 1C. The old record_source = '1c__msk'
filter silently dropped OLTP/X5-promoted customers (audit_28_06_26 #12);
this mirrors the PostgreSQL port's split_part(record_source,'__',2).
*/
CREATE OR REPLACE VIEW rv.bv_customer_mdm__msk AS
WITH
Expand Down Expand Up @@ -38,7 +43,7 @@ WITH
msk_hub AS (
SELECT customer_hk, customer_bk, record_source AS hub_record_source
FROM rv.hub_customer
WHERE record_source = '1c__msk'
WHERE splitByString('__', record_source)[2] = 'msk'
)
SELECT
h.customer_hk AS customer_hk,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
Purpose: Canonical customer record for the SPB branch.
Layer: Business Vault.
Branch: spb (RU jurisdiction; same conflict policy as MSK).
Hub admission: splitByString('__', record_source)[2] = 'spb' (source-agnostic:
1c__/pg_ops__/x5__ all integrated, not only 1C; audit_28_06_26 #12;
mirrors the PostgreSQL port's split_part(record_source,'__',2)).
*/
CREATE OR REPLACE VIEW rv.bv_customer_mdm__spb AS
WITH
Expand Down Expand Up @@ -32,7 +35,7 @@ WITH
spb_hub AS (
SELECT customer_hk, customer_bk
FROM rv.hub_customer
WHERE record_source = '1c__spb'
WHERE splitByString('__', record_source)[2] = 'spb'
)
SELECT
h.customer_hk AS customer_hk,
Expand Down