From 1c47527e6bfce099212888f00084122699941625 Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Fri, 7 Nov 2025 15:55:52 +0000 Subject: [PATCH] add GX DQ tests for Contacts, People, Tenancies and Property NEC data loads --- .../housing_nec_migration_gx_dq_inputs.py | 17 +++- ...c_migration_contacts_data_load_gx_suite.py | 60 +++++++++++++ ...nec_migration_people_data_load_gx_suite.py | 90 +++++++++++++++++++ ...migration_properties_data_load_gx_suite.py | 6 ++ ..._migration_tenancies_data_load_gx_suite.py | 87 +++++++++--------- 5 files changed, 214 insertions(+), 46 deletions(-) create mode 100644 scripts/jobs/housing/housing_nec_migration_contacts_data_load_gx_suite.py create mode 100644 scripts/jobs/housing/housing_nec_migration_people_data_load_gx_suite.py diff --git a/scripts/helpers/housing_nec_migration_gx_dq_inputs.py b/scripts/helpers/housing_nec_migration_gx_dq_inputs.py index e00e3d237..c68e90f7a 100644 --- a/scripts/helpers/housing_nec_migration_gx_dq_inputs.py +++ b/scripts/helpers/housing_nec_migration_gx_dq_inputs.py @@ -1,9 +1,11 @@ sql_config = { "properties": {"id_field": "LPRO_PROPREF"}, "tenancies": {"id_field": "LTCY_ALT_REF"}, + "people": {"id_field": "LPAR_PER_ALT_REF"}, + "contacts": {"id_field": "LCDE_LEGACY_REF"}, } -data_load_list = ["properties", "tenancies"] +data_load_list = ["properties", "tenancies", "people", "contacts"] table_list = { "properties": [ @@ -18,10 +20,17 @@ "properties_4b", "properties_4c", "properties_7a", - "properties_all_tranches" + "properties_all_tranches", ], - "tenancies": ["tenancies_1a", - "tenancies_1c"] + "tenancies": [ + "tenancies_1a", + "tenancies_1c", + "tenancies_2a", + "tenancies_all", + "tenancies_other", + ], + "people": ["people_1a", "people_1b", "people_1c", "people_2a"], + "contacts": ["contacts_1a", "contacts_1b", "contacts_2a"] } partition_keys = ["import_date"] diff --git a/scripts/jobs/housing/housing_nec_migration_contacts_data_load_gx_suite.py b/scripts/jobs/housing/housing_nec_migration_contacts_data_load_gx_suite.py new file mode 100644 index 000000000..6f0f06c68 --- /dev/null +++ b/scripts/jobs/housing/housing_nec_migration_contacts_data_load_gx_suite.py @@ -0,0 +1,60 @@ +# flake8: noqa: F821 + +import sys + +from awsglue.utils import getResolvedOptions +import great_expectations as gx +import great_expectations.expectations as gxe + + +class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): + column: str = "LCDE_LEGACY_REF" + description: str = "Expect LCDE_LEGACY_REF (person ref) values to not be Null in contacts load" + + +class ExpectValueColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): + column: str = "LCDE_CONTACT_VALUE" + description: str = "Expect LCDE_CONTACT_VALUE (contact value) to not be Null" + + +class ExpectContactTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet): + column: str = "LCDE_FRV_CME_CODE," + value_set: list = ["WORKTEL", "MOBILETEL", "HOMETEL", "EMAIL", "OTHER"] + description: str = "Expect contact type code to be one of the set" + + +class ExpectContactsColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList): + column_list = [ + "LCDE_START_DATE", + "LCDE_PRIMARY_REF", + "LCDE_SECONDARY_REF", + "LCDE_PRECEDENCE", + "LCDE_LEGACY_TYPE", + "LCDE_LEGACY_REF", + "LCDE_FRV_COMM_PREF_CODE", + "LCDE_FRV_CME_CODE", + "LCDE_END_DATE", + "LCDE_CREATED_DATE", + "LCDE_CREATED_BY", + "LCDE_CONTACT_VALUE", + "LCDE_CONTACT_NAME", + "LCDE_COMMENTS", + "LCDE_ALLOW_TEXTS", + ] + description: str = "Expect columns to match ordered list exactly" + + +arg_key = ["s3_target_location"] +args = getResolvedOptions(sys.argv, arg_key) +locals().update(args) + +# add to GX context +context = gx.get_context(mode="file", project_root_dir=s3_target_location) + +suite = gx.ExpectationSuite(name="properties_data_load_suite") + +suite.add_expectation(ExpectContactsColumnsToMatchOrderedList()) +suite.add_expectation(ExpectContactTypeCodeToBeInSet()) +suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull()) +suite.add_expectation(ExpectValueColumnValuesToNotBeNull()) +suite = context.suites.add(suite) diff --git a/scripts/jobs/housing/housing_nec_migration_people_data_load_gx_suite.py b/scripts/jobs/housing/housing_nec_migration_people_data_load_gx_suite.py new file mode 100644 index 000000000..ef99dc83a --- /dev/null +++ b/scripts/jobs/housing/housing_nec_migration_people_data_load_gx_suite.py @@ -0,0 +1,90 @@ +# flake8: noqa: F821 + +import sys + +from awsglue.utils import getResolvedOptions +import great_expectations as gx +import great_expectations.expectations as gxe + + +class ExpectPersonRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique): + column: str = "LPAR_PER_ALT_REF" + description: str = "Expect LPAR_PER_ALT_REF (person ref) values to be unique" + + +class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): + column: str = "LPAR_PER_ALT_REF" + description: str = "Expect LPAR_PER_ALT_REF (person ref) values to not be Null" + + +class ExpectTitleToBeInSet(gxe.ExpectColumnValuesToBeInSet): + column: str = "LPAR_PER_TITLE" + value_set: list = [ + "DAME", + "DR", + "LAD", + "LORD", + "MASTER", + "MISS", + "MR", + "MRS", + "MS", + "MX", + "PROFESSOR", + "RABBI", + "REVEREND", + "SIR", + None, + ] + description: str = "Expect title to be one of the set" + + +class ExpectPeopleColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList): + column_list = [ + "LPAR_TIN_HRV_TIR_CODE", + "LPAR_TIN_STAT_SUCCESSOR_IND", + "LPAR_TIN_START_DATE", + "LPAR_TIN_MAIN_TENANT_IND", + "LPAR_TIN_END_DATE", + "LPAR_TCY_IND", + "LPAR_TCY_ALT_REF", + "LPAR_PHONE", + "LPAR_PER_TITLE", + "LPAR_PER_SURNAME", + "LPAR_PER_OTHER_NAME", + "LPAR_PER_NI_NO", + "LPAR_PER_INITIALS", + "LPAR_PER_HOU_OAP_IND", + "LPAR_PER_HOU_HRV_HMS_CODE", + "LPAR_PER_HOU_EMPLOYER", + "LPAR_PER_HOU_DISABLED_IND", + "LPAR_PER_FRV_HGO_CODE", + "LPAR_PER_FRV_FNL_CODE", + "LPAR_PER_FRV_FGE_CODE", + "LPAR_PER_FRV_FEO_CODE", + "LPAR_PER_FORENAME", + "LPAR_PER_DATE_OF_BIRTH", + "LPAR_PER_ALT_REF", + "LPAR_HOP_START_DATE", + "LPAR_HOP_HRV_REL_CODE", + "LPAR_HOP_HPSR_CODE", + "LPAR_HOP_HPER_CODE", + "LPAR_HOP_END_DATE", + ] + description: str = "Expect people load columns to match ordered list exactly" + + +arg_key = ["s3_target_location"] +args = getResolvedOptions(sys.argv, arg_key) +locals().update(args) + +# add to GX context +context = gx.get_context(mode="file", project_root_dir=s3_target_location) + +suite = gx.ExpectationSuite(name="tenancies_data_load_suite") + +suite.add_expectation(ExpectPersonRefColumnValuesToBeUnique()) +suite.add_expectation(ExpectTitleToBeInSet()) +suite.add_expectation(ExpectPeopleColumnsToMatchOrderedList()) +suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull()) +suite = context.suites.add(suite) diff --git a/scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py b/scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py index 7c728d8de..6d337765a 100644 --- a/scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py +++ b/scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py @@ -12,6 +12,11 @@ class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique): description: str = "Expect UPRN (LPRO_PROPREF) values to be unique" +class ExpectPropRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): + column: str = "LPRO_PROPREF" + description: str = "Expect LPRO_PROPREF (prop ref) values to not be Null" + + class ExpectPropTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet): column: str = "LPRO_HOU_PTV_CODE" value_set: list = [ @@ -158,4 +163,5 @@ class ExpectPropColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedLi suite.add_expectation(ExpectResIndicatorToBeInSet()) suite.add_expectation(ExpectPropTypeValuesToBeInSet()) suite.add_expectation(ExpectPropColumnsToMatchOrderedList()) +suite.add_expectation(ExpectPropRefColumnValuesToNotBeNull()) suite = context.suites.add(suite) diff --git a/scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py b/scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py index 6242c43f3..724fd74a5 100644 --- a/scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py +++ b/scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py @@ -12,42 +12,47 @@ class ExpectTagRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique): description: str = "Expect LTCY_ALT_REF (tenancy ref) values to be unique" +class ExpectTenancyRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): + column: str = "LTCY_ALT_REF" + description: str = "Expect LTCY_ALT_REF (tenancy ref) values to not be Null" + + class ExpectTenancyTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet): column: str = "LTCY_TTY_CODE" value_set: list = [ - 'ASH', - 'ASY', - 'DEC', - 'Demoted', - 'FRE', - 'FRS', - 'HAL', - 'LIVINGRT', - 'INT', - 'LEA', - 'LHS', - 'LTA', - 'MPA', - 'NON', - 'PVG', - 'RTM', - 'SEC', - 'SHO', - 'SLL', - 'SPS', - 'SSE', - 'TACCFLAT', - 'TBB', - 'TBBFam', - 'THO', - 'TGA', - 'THL', - 'THGF', - 'TLA', - 'TPL', - 'TRA', - 'UNDER18', - 'OFFICESE' + "ASH", + "ASY", + "DEC", + "Demoted", + "FRE", + "FRS", + "HAL", + "LIVINGRT", + "INT", + "LEA", + "LHS", + "LTA", + "MPA", + "NON", + "PVG", + "RTM", + "SEC", + "SHO", + "SLL", + "SPS", + "SSE", + "TACCFLAT", + "TBB", + "TBBFam", + "THO", + "TGA", + "THL", + "THGF", + "TLA", + "TPL", + "TRA", + "UNDER18", + "OFFICESE", ] description: str = "Expect tenancy type code to contain one of the set" @@ -62,22 +67,17 @@ class ExpectTenureTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet): "TEMPORARY", "FREEHOLD", "COMMERCIAL", - "LIVINGRENT" + "LIVINGRENT", ] description: str = "Expect tenure type code to be one of the set" class ExpectTenancyStatusCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet): column: str = "LTCY_HRV_TST_CODE" - value_set: list = [ - "DECANT", - "NOTICE", - "UNAUTHOCC" - ] + value_set: list = ["DECANT", "NOTICE", "UNAUTHOCC"] description: str = "Expect tenancy status code to be one of the set" - class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList): column_list = [ "LTCY_ALT_REF", @@ -125,10 +125,11 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere "LTCY_THO_END_DATE6", "LTCY_THO_HRV_TTR_CODE6", "LTCY_PHONE", - "LTCY_REVIEW_DATE" + "LTCY_REVIEW_DATE", ] description: str = "Expect tenancy load columns to match ordered list exactly" + arg_key = ["s3_target_location"] args = getResolvedOptions(sys.argv, arg_key) locals().update(args) @@ -143,4 +144,6 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere suite.add_expectation(ExpectTenureTypeCodeToBeInSet()) suite.add_expectation(ExpectTenancyStatusCodeToBeInSet()) suite.add_expectation(ExpectTenancyColumnsToMatchOrderedList()) -suite = context.suites.add(suite) +suite.add_expectation(ExpectTenancyRefColumnValuesToNotBeNull()) + +context.suites.add(suite)