From 447821a04e90e0a2f8d2770ff2a4e211f69ee134 Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Tue, 20 May 2025 15:52:34 +0100 Subject: [PATCH 1/5] Amend sql queries to only include ke columns and rows --- scripts/helpers/housing_gx_dq_inputs.py | 78 ++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index 5f5b5cbc1..8d66fa33e 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -6,16 +6,88 @@ 'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""", 'id_field': 'tenancy_id'}, 'contacts_reshape': { - 'sql': """SELECT id, targetid, substr(createdat, 1, 10) as createdat, contacttype, subtype, value, substr(lastmodified, 1, 10) as lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""", + 'sql': """SELECT + DATE_FORMAT(from_iso8601_timestamp(createdat), '%Y-%b-%d') AS createdat, + contacttype, + subtype, + cast(value as varchar) as contact_value, + DATE_FORMAT(from_iso8601_timestamp(lastmodified), '%Y-%b-%d') AS lastmodified, + targettype, + person_id, + import_year, + import_month, + import_day, + import_date + FROM "housing-refined-zone"."contacts_reshape" + where targettype = 'person' + and import_date = (select max(import_date) from "housing-refined-zone"."contacts_reshape")""", 'id_field': 'id'}, 'housing_homeowner_record_sheet': { 'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", 'id_field': 'property_no'}, 'housing_dwellings_list': { - 'sql': """SELECT * FROM "housing-raw-zone"."housing_dwellings_list" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", + 'sql': """SELECT + cast(property_dwelling_reference_number as varchar) as property_dwelling_reference_number, + cast(llpg as varchar) as llpg, + cast(estate_reference_number as varchar) as estate_reference_number, + estate_name, + block_name_as_per_naming_convention_procedure as block_name, + cast(block_reference_number as varchar) as block_reference_number, + sub_block_name, + cast(sub_block_reference as varchar) as sub_block_reference, + block_total_stories, + block_typology_based_on_dwelling_type_defined_list as block_typology, + building_heights_h1, + communal_heat_network, + dwelling_front_door_access_floor_level, + dwelling_no_of_bedrooms, + fra_block, + no_of_floors_within_dwelling, + registered_building, + year_built_block + FROM "housing-raw-zone"."housing_dwellings_list" where import_date = (select max(import_date) from "housing-raw-zone"."housing_dwellings_list")""", 'id_field': 'property_dwelling_reference_number'}, 'assets_reshape': { - 'sql': """SELECT * FROM "housing-refined-zone"."assets_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."assets_reshape") and assettype = 'Dwelling'""", + 'sql': """select + cast(assetid as varchar) as assetid, + assettype, + cast(uprn as varchar) as uprn, + owner as property_owner, + agent, + iscouncilproperty, + istmomanaged, + managingorganisation, + cast(managingorganisationid as varchar) as managingorganisationid, + totalblockfloors, + floorno, + numberoflifts, + numberoflivingrooms, + numberofkitchens, + numberofbedrooms, + numberofbedspaces, + numberofshowers, + numberoffloors, + numberofdoublebeds, + numberofsinglebeds, + numberofbathrooms, + yearconstructed, + hascommunalareas, + hasprivatekitchen, + hasprivatebathroom, + hasrampaccess, + isstepfree, + hasstairs, + heating, + numberofcots, + windowtype, + estate_name, + cast(estate_id as varchar) as estate_id, + import_year, + import_month, + import_day, + import_date + FROM "housing-refined-zone"."assets_reshape" where import_date = (select max(import_date) from "housing-refined-zone"."assets_reshape") + and assettype in ('BoilerHouse','BoosterPump','CleanersFacilities','CombinedHeatAndPowerUnit','CommunityHall','Concierge','Dwelling','LettableNonDwelling','Lift','NA','NBD','OutBuilding','TravellerSite')""", 'id_field': 'asset_id'}, 'matenancyagreement': { 'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""", From 5e1fbb95b25e6229609b8f4c058c96f0dfb01d8b Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Tue, 20 May 2025 16:03:16 +0100 Subject: [PATCH 2/5] Amend sql queries to only include ke columns and rows --- scripts/helpers/housing_gx_dq_inputs.py | 340 ++++++++++++------------ 1 file changed, 170 insertions(+), 170 deletions(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index 8d66fa33e..bf5be059f 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -1,178 +1,178 @@ sql_config = {'person_reshape': { - 'sql': """SELECT *, substr(startdate, 1, 10) as startdate_parsed, substr(enddate, 1, 10) as enddate_parsed, - substr(dateofbirth, 1, 10) as dateofbirth_parsed FROM "housing-refined-zone"."person_reshape" WHERE import_date = (SELECT max(import_date) FROM "housing-refined-zone"."person_reshape") AND enddate IS NULL AND type IN ('Secure', 'Introductory')""", - 'id_field': 'person_id'}, - 'tenure_reshape': { - 'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""", - 'id_field': 'tenancy_id'}, - 'contacts_reshape': { - 'sql': """SELECT - DATE_FORMAT(from_iso8601_timestamp(createdat), '%Y-%b-%d') AS createdat, - contacttype, - subtype, - cast(value as varchar) as contact_value, - DATE_FORMAT(from_iso8601_timestamp(lastmodified), '%Y-%b-%d') AS lastmodified, - targettype, - person_id, - import_year, - import_month, - import_day, - import_date - FROM "housing-refined-zone"."contacts_reshape" - where targettype = 'person' - and import_date = (select max(import_date) from "housing-refined-zone"."contacts_reshape")""", - 'id_field': 'id'}, - 'housing_homeowner_record_sheet': { - 'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", - 'id_field': 'property_no'}, - 'housing_dwellings_list': { - 'sql': """SELECT - cast(property_dwelling_reference_number as varchar) as property_dwelling_reference_number, - cast(llpg as varchar) as llpg, - cast(estate_reference_number as varchar) as estate_reference_number, - estate_name, - block_name_as_per_naming_convention_procedure as block_name, - cast(block_reference_number as varchar) as block_reference_number, - sub_block_name, - cast(sub_block_reference as varchar) as sub_block_reference, - block_total_stories, - block_typology_based_on_dwelling_type_defined_list as block_typology, - building_heights_h1, - communal_heat_network, - dwelling_front_door_access_floor_level, - dwelling_no_of_bedrooms, - fra_block, - no_of_floors_within_dwelling, - registered_building, - year_built_block - FROM "housing-raw-zone"."housing_dwellings_list" where import_date = (select max(import_date) from "housing-raw-zone"."housing_dwellings_list")""", - 'id_field': 'property_dwelling_reference_number'}, - 'assets_reshape': { - 'sql': """select - cast(assetid as varchar) as assetid, - assettype, - cast(uprn as varchar) as uprn, - owner as property_owner, - agent, - iscouncilproperty, - istmomanaged, - managingorganisation, - cast(managingorganisationid as varchar) as managingorganisationid, - totalblockfloors, - floorno, - numberoflifts, - numberoflivingrooms, - numberofkitchens, - numberofbedrooms, - numberofbedspaces, - numberofshowers, - numberoffloors, - numberofdoublebeds, - numberofsinglebeds, - numberofbathrooms, - yearconstructed, - hascommunalareas, - hasprivatekitchen, - hasprivatebathroom, - hasrampaccess, - isstepfree, - hasstairs, - heating, - numberofcots, - windowtype, - estate_name, - cast(estate_id as varchar) as estate_id, - import_year, - import_month, - import_day, - import_date - FROM "housing-refined-zone"."assets_reshape" where import_date = (select max(import_date) from "housing-refined-zone"."assets_reshape") - and assettype in ('BoilerHouse','BoosterPump','CleanersFacilities','CombinedHeatAndPowerUnit','CommunityHall','Concierge','Dwelling','LettableNonDwelling','Lift','NA','NBD','OutBuilding','TravellerSite')""", - 'id_field': 'asset_id'}, - 'matenancyagreement': { - 'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""", - 'id_field': 'tag_ref'}, - 'maproperty': { - 'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""", - 'id_field': 'prop_ref'} +'sql': """SELECT *, substr(startdate, 1, 10) as startdate_parsed, substr(enddate, 1, 10) as enddate_parsed, +substr(dateofbirth, 1, 10) as dateofbirth_parsed FROM "housing-refined-zone"."person_reshape" WHERE import_date = (SELECT max(import_date) FROM "housing-refined-zone"."person_reshape") AND enddate IS NULL AND type IN ('Secure', 'Introductory')""", +'id_field': 'person_id'}, +'tenure_reshape': { + 'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""", + 'id_field': 'tenancy_id'}, +'contacts_reshape': { + 'sql': """SELECT + DATE_FORMAT(from_iso8601_timestamp(createdat), '%Y-%b-%d') AS createdat, + contacttype, + subtype, + cast(value as varchar) as contact_value, + DATE_FORMAT(from_iso8601_timestamp(lastmodified), '%Y-%b-%d') AS lastmodified, + targettype, + person_id, + import_year, + import_month, + import_day, + import_date + FROM "housing-refined-zone"."contacts_reshape" + where targettype = 'person' + and import_date = (select max(import_date) from "housing-refined-zone"."contacts_reshape")""", + 'id_field': 'id'}, +'housing_homeowner_record_sheet': { + 'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", + 'id_field': 'property_no'}, +'housing_dwellings_list': { + 'sql': """SELECT + cast(property_dwelling_reference_number as varchar) as property_dwelling_reference_number, + cast(llpg as varchar) as llpg, + cast(estate_reference_number as varchar) as estate_reference_number, + estate_name, + block_name_as_per_naming_convention_procedure as block_name, + cast(block_reference_number as varchar) as block_reference_number, + sub_block_name, + cast(sub_block_reference as varchar) as sub_block_reference, + block_total_stories, + block_typology_based_on_dwelling_type_defined_list as block_typology, + building_heights_h1, + communal_heat_network, + dwelling_front_door_access_floor_level, + dwelling_no_of_bedrooms, + fra_block, + no_of_floors_within_dwelling, + registered_building, + year_built_block + FROM "housing-raw-zone"."housing_dwellings_list" where import_date = (select max(import_date) from "housing-raw-zone"."housing_dwellings_list")""", + 'id_field': 'property_dwelling_reference_number'}, +'assets_reshape': { + 'sql': """select + cast(assetid as varchar) as assetid, + assettype, + cast(uprn as varchar) as uprn, + owner as property_owner, + agent, + iscouncilproperty, + istmomanaged, + managingorganisation, + cast(managingorganisationid as varchar) as managingorganisationid, + totalblockfloors, + floorno, + numberoflifts, + numberoflivingrooms, + numberofkitchens, + numberofbedrooms, + numberofbedspaces, + numberofshowers, + numberoffloors, + numberofdoublebeds, + numberofsinglebeds, + numberofbathrooms, + yearconstructed, + hascommunalareas, + hasprivatekitchen, + hasprivatebathroom, + hasrampaccess, + isstepfree, + hasstairs, + heating, + numberofcots, + windowtype, + estate_name, + cast(estate_id as varchar) as estate_id, + import_year, + import_month, + import_day, + import_date + FROM "housing-refined-zone"."assets_reshape" where import_date = (select max(import_date) from "housing-refined-zone"."assets_reshape") + and assettype in ('BoilerHouse','BoosterPump','CleanersFacilities','CombinedHeatAndPowerUnit','CommunityHall','Concierge','Dwelling','LettableNonDwelling','Lift','NA','NBD','OutBuilding','TravellerSite')""", + 'id_field': 'asset_id'}, +'matenancyagreement': { + 'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""", + 'id_field': 'tag_ref'}, +'maproperty': { + 'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""", + 'id_field': 'prop_ref'} } table_list = ['person_reshape', 'tenure_reshape', 'contacts_reshape', 'housing_homeowner_record_sheet', - 'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty'] + 'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty'] partition_keys = ['import_year', 'import_month', 'import_day', 'import_date'] dq_dimensions_map = { - 'expect_arr_patch_not_to_be_null': 'COMPLETENESS', - 'expect_asset_id_not_to_be_null': 'COMPLETENESS', - 'expect_asset_type_not_to_be_null': 'COMPLETENESS', - 'expect_asset_type_values_to_be_in_set': 'CONSISTENCY', - 'expect_block_ref_no_column_values_to_match_regex': 'ACCURACY', - 'expect_co_t_not_to_be_null': 'COMPLETENESS', - 'expect_co_t_to_be_between': 'VALIDITY', - 'expect_column_value_lengths_to_be_between': 'VALIDITY', - 'expect_column_values_to_be_in_set': 'CONSISTENCY', - 'expect_column_values_to_be_unique': 'UNIQUENESS', - 'expect_column_values_to_match_regex': 'ACCURACY', - 'expect_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_contact_type_column_values_to_be_in_set': 'CONSISTENCY', - 'expect_contact_type_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS', - 'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_date_of_birth_to_be_between': 'VALIDITY', - 'expect_description_values_to_be_in_set': 'CONSISTENCY', - 'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS', - 'expect_eo_t_to_be_between': 'VALIDITY', - 'expect_estate_ref_no_column_values_to_match_regex': 'ACCURACY', - 'expect_first_name_column_value_length': 'ACCURACY', - 'expect_firstname_column_value_length': 'ACCURACY', - 'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY', - 'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_llpg_column_value_lengths_between': 'VALIDITY', - 'expect_llpg_column_values_to_be_unique': 'UNIQUENESS', - 'expect_llpg_column_values_to_match_regex': 'ACCURACY', - 'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_member_full_name_column_value_lengths_between': 'VALIDITY', - 'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY', - 'expect_payment_ref_no_column_values_to_match_regex': 'ACCURACY', - 'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS', - 'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_person_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_person_id_column_values_to_be_unique': 'UNIQUENESS', - 'expect_person_id_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_person_type_values_to_be_in_set': 'CONSISTENCY', - 'expect_preferred_title_values_to_be_in_set': 'CONSISTENCY', - 'expect_prop_no_and_payment_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_prop_no_column_values_to_be_unique': 'UNIQUENESS', - 'expect_prop_no_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_prop_no_no_column_values_to_match_regex': 'ACCURACY', - 'expect_prop_ref_column_values_to_be_unique': 'UNIQUENESS', - 'expect_prop_ref_not_to_be_null': 'COMPLETENESS', - 'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_rent_group_ref_not_to_be_null': 'COMPLETENESS', - 'expect_saff_rent_acc_not_to_be_null': 'COMPLETENESS', - 'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS', - 'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY', - 'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_surname_column_value_length': 'ACCURACY', - 'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS', - 'expect_tag_ref_column_values_to_be_unique': 'UNIQUENESS', - 'expect_tag_ref_not_to_be_null': 'COMPLETENESS', - 'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY', - 'expect_tenancy_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_tenancy_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', - 'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS', - 'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS', - 'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY', - 'expect_tenure_not_to_be_null': 'COMPLETENESS', - 'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY', - 'expect_tenure_values_to_be_in_set': 'CONSISTENCY', - 'expect_uprn_column_value_lengths_between': 'VALIDITY', - 'expect_uprn_column_values_to_match_regex': 'ACCURACY', - 'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_uprn_not_to_be_null': 'COMPLETENESS'} +'expect_arr_patch_not_to_be_null': 'COMPLETENESS', +'expect_asset_id_not_to_be_null': 'COMPLETENESS', +'expect_asset_type_not_to_be_null': 'COMPLETENESS', +'expect_asset_type_values_to_be_in_set': 'CONSISTENCY', +'expect_block_ref_no_column_values_to_match_regex': 'ACCURACY', +'expect_co_t_not_to_be_null': 'COMPLETENESS', +'expect_co_t_to_be_between': 'VALIDITY', +'expect_column_value_lengths_to_be_between': 'VALIDITY', +'expect_column_values_to_be_in_set': 'CONSISTENCY', +'expect_column_values_to_be_unique': 'UNIQUENESS', +'expect_column_values_to_match_regex': 'ACCURACY', +'expect_column_values_to_not_be_null': 'COMPLETENESS', +'expect_contact_type_column_values_to_be_in_set': 'CONSISTENCY', +'expect_contact_type_column_values_to_not_be_null': 'COMPLETENESS', +'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS', +'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS', +'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS', +'expect_date_of_birth_to_be_between': 'VALIDITY', +'expect_description_values_to_be_in_set': 'CONSISTENCY', +'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS', +'expect_eo_t_to_be_between': 'VALIDITY', +'expect_estate_ref_no_column_values_to_match_regex': 'ACCURACY', +'expect_first_name_column_value_length': 'ACCURACY', +'expect_firstname_column_value_length': 'ACCURACY', +'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS', +'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY', +'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_llpg_column_value_lengths_between': 'VALIDITY', +'expect_llpg_column_values_to_be_unique': 'UNIQUENESS', +'expect_llpg_column_values_to_match_regex': 'ACCURACY', +'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS', +'expect_member_full_name_column_value_lengths_between': 'VALIDITY', +'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY', +'expect_payment_ref_no_column_values_to_match_regex': 'ACCURACY', +'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS', +'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_person_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_person_id_column_values_to_be_unique': 'UNIQUENESS', +'expect_person_id_column_values_to_not_be_null': 'COMPLETENESS', +'expect_person_type_values_to_be_in_set': 'CONSISTENCY', +'expect_preferred_title_values_to_be_in_set': 'CONSISTENCY', +'expect_prop_no_and_payment_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_prop_no_column_values_to_be_unique': 'UNIQUENESS', +'expect_prop_no_column_values_to_not_be_null': 'COMPLETENESS', +'expect_prop_no_no_column_values_to_match_regex': 'ACCURACY', +'expect_prop_ref_column_values_to_be_unique': 'UNIQUENESS', +'expect_prop_ref_not_to_be_null': 'COMPLETENESS', +'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS', +'expect_rent_group_ref_not_to_be_null': 'COMPLETENESS', +'expect_saff_rent_acc_not_to_be_null': 'COMPLETENESS', +'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS', +'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY', +'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS', +'expect_surname_column_value_length': 'ACCURACY', +'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS', +'expect_tag_ref_column_values_to_be_unique': 'UNIQUENESS', +'expect_tag_ref_not_to_be_null': 'COMPLETENESS', +'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS', +'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY', +'expect_tenancy_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_tenancy_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', +'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS', +'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS', +'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY', +'expect_tenure_not_to_be_null': 'COMPLETENESS', +'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY', +'expect_tenure_values_to_be_in_set': 'CONSISTENCY', +'expect_uprn_column_value_lengths_between': 'VALIDITY', +'expect_uprn_column_values_to_match_regex': 'ACCURACY', +'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS', +'expect_uprn_not_to_be_null': 'COMPLETENESS'} \ No newline at end of file From 21d843255a073738b5703469737166aec1f87409 Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Tue, 20 May 2025 16:04:56 +0100 Subject: [PATCH 3/5] Amend sql queries to only include ke columns and rows --- scripts/helpers/housing_gx_dq_inputs.py | 194 ++++++++++++------------ 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index bf5be059f..7ede58a0a 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -1,12 +1,12 @@ sql_config = {'person_reshape': { -'sql': """SELECT *, substr(startdate, 1, 10) as startdate_parsed, substr(enddate, 1, 10) as enddate_parsed, + 'sql': """SELECT *, substr(startdate, 1, 10) as startdate_parsed, substr(enddate, 1, 10) as enddate_parsed, substr(dateofbirth, 1, 10) as dateofbirth_parsed FROM "housing-refined-zone"."person_reshape" WHERE import_date = (SELECT max(import_date) FROM "housing-refined-zone"."person_reshape") AND enddate IS NULL AND type IN ('Secure', 'Introductory')""", -'id_field': 'person_id'}, -'tenure_reshape': { - 'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""", - 'id_field': 'tenancy_id'}, -'contacts_reshape': { - 'sql': """SELECT + 'id_field': 'person_id'}, + 'tenure_reshape': { + 'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""", + 'id_field': 'tenancy_id'}, + 'contacts_reshape': { + 'sql': """SELECT DATE_FORMAT(from_iso8601_timestamp(createdat), '%Y-%b-%d') AS createdat, contacttype, subtype, @@ -21,12 +21,12 @@ FROM "housing-refined-zone"."contacts_reshape" where targettype = 'person' and import_date = (select max(import_date) from "housing-refined-zone"."contacts_reshape")""", - 'id_field': 'id'}, -'housing_homeowner_record_sheet': { - 'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", - 'id_field': 'property_no'}, -'housing_dwellings_list': { - 'sql': """SELECT + 'id_field': 'id'}, + 'housing_homeowner_record_sheet': { + 'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""", + 'id_field': 'property_no'}, + 'housing_dwellings_list': { + 'sql': """SELECT cast(property_dwelling_reference_number as varchar) as property_dwelling_reference_number, cast(llpg as varchar) as llpg, cast(estate_reference_number as varchar) as estate_reference_number, @@ -46,9 +46,9 @@ registered_building, year_built_block FROM "housing-raw-zone"."housing_dwellings_list" where import_date = (select max(import_date) from "housing-raw-zone"."housing_dwellings_list")""", - 'id_field': 'property_dwelling_reference_number'}, -'assets_reshape': { - 'sql': """select + 'id_field': 'property_dwelling_reference_number'}, + 'assets_reshape': { + 'sql': """select cast(assetid as varchar) as assetid, assettype, cast(uprn as varchar) as uprn, @@ -88,91 +88,91 @@ import_date FROM "housing-refined-zone"."assets_reshape" where import_date = (select max(import_date) from "housing-refined-zone"."assets_reshape") and assettype in ('BoilerHouse','BoosterPump','CleanersFacilities','CombinedHeatAndPowerUnit','CommunityHall','Concierge','Dwelling','LettableNonDwelling','Lift','NA','NBD','OutBuilding','TravellerSite')""", - 'id_field': 'asset_id'}, -'matenancyagreement': { - 'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""", - 'id_field': 'tag_ref'}, -'maproperty': { - 'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""", - 'id_field': 'prop_ref'} + 'id_field': 'asset_id'}, + 'matenancyagreement': { + 'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""", + 'id_field': 'tag_ref'}, + 'maproperty': { + 'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""", + 'id_field': 'prop_ref'} } table_list = ['person_reshape', 'tenure_reshape', 'contacts_reshape', 'housing_homeowner_record_sheet', - 'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty'] + 'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty'] partition_keys = ['import_year', 'import_month', 'import_day', 'import_date'] dq_dimensions_map = { -'expect_arr_patch_not_to_be_null': 'COMPLETENESS', -'expect_asset_id_not_to_be_null': 'COMPLETENESS', -'expect_asset_type_not_to_be_null': 'COMPLETENESS', -'expect_asset_type_values_to_be_in_set': 'CONSISTENCY', -'expect_block_ref_no_column_values_to_match_regex': 'ACCURACY', -'expect_co_t_not_to_be_null': 'COMPLETENESS', -'expect_co_t_to_be_between': 'VALIDITY', -'expect_column_value_lengths_to_be_between': 'VALIDITY', -'expect_column_values_to_be_in_set': 'CONSISTENCY', -'expect_column_values_to_be_unique': 'UNIQUENESS', -'expect_column_values_to_match_regex': 'ACCURACY', -'expect_column_values_to_not_be_null': 'COMPLETENESS', -'expect_contact_type_column_values_to_be_in_set': 'CONSISTENCY', -'expect_contact_type_column_values_to_not_be_null': 'COMPLETENESS', -'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS', -'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS', -'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS', -'expect_date_of_birth_to_be_between': 'VALIDITY', -'expect_description_values_to_be_in_set': 'CONSISTENCY', -'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS', -'expect_eo_t_to_be_between': 'VALIDITY', -'expect_estate_ref_no_column_values_to_match_regex': 'ACCURACY', -'expect_first_name_column_value_length': 'ACCURACY', -'expect_firstname_column_value_length': 'ACCURACY', -'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS', -'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY', -'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_llpg_column_value_lengths_between': 'VALIDITY', -'expect_llpg_column_values_to_be_unique': 'UNIQUENESS', -'expect_llpg_column_values_to_match_regex': 'ACCURACY', -'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS', -'expect_member_full_name_column_value_lengths_between': 'VALIDITY', -'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY', -'expect_payment_ref_no_column_values_to_match_regex': 'ACCURACY', -'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS', -'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_person_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_person_id_column_values_to_be_unique': 'UNIQUENESS', -'expect_person_id_column_values_to_not_be_null': 'COMPLETENESS', -'expect_person_type_values_to_be_in_set': 'CONSISTENCY', -'expect_preferred_title_values_to_be_in_set': 'CONSISTENCY', -'expect_prop_no_and_payment_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_prop_no_column_values_to_be_unique': 'UNIQUENESS', -'expect_prop_no_column_values_to_not_be_null': 'COMPLETENESS', -'expect_prop_no_no_column_values_to_match_regex': 'ACCURACY', -'expect_prop_ref_column_values_to_be_unique': 'UNIQUENESS', -'expect_prop_ref_not_to_be_null': 'COMPLETENESS', -'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS', -'expect_rent_group_ref_not_to_be_null': 'COMPLETENESS', -'expect_saff_rent_acc_not_to_be_null': 'COMPLETENESS', -'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS', -'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY', -'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS', -'expect_surname_column_value_length': 'ACCURACY', -'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS', -'expect_tag_ref_column_values_to_be_unique': 'UNIQUENESS', -'expect_tag_ref_not_to_be_null': 'COMPLETENESS', -'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS', -'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY', -'expect_tenancy_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_tenancy_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', -'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS', -'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS', -'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY', -'expect_tenure_not_to_be_null': 'COMPLETENESS', -'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY', -'expect_tenure_values_to_be_in_set': 'CONSISTENCY', -'expect_uprn_column_value_lengths_between': 'VALIDITY', -'expect_uprn_column_values_to_match_regex': 'ACCURACY', -'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS', -'expect_uprn_not_to_be_null': 'COMPLETENESS'} \ No newline at end of file + 'expect_arr_patch_not_to_be_null': 'COMPLETENESS', + 'expect_asset_id_not_to_be_null': 'COMPLETENESS', + 'expect_asset_type_not_to_be_null': 'COMPLETENESS', + 'expect_asset_type_values_to_be_in_set': 'CONSISTENCY', + 'expect_block_ref_no_column_values_to_match_regex': 'ACCURACY', + 'expect_co_t_not_to_be_null': 'COMPLETENESS', + 'expect_co_t_to_be_between': 'VALIDITY', + 'expect_column_value_lengths_to_be_between': 'VALIDITY', + 'expect_column_values_to_be_in_set': 'CONSISTENCY', + 'expect_column_values_to_be_unique': 'UNIQUENESS', + 'expect_column_values_to_match_regex': 'ACCURACY', + 'expect_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_contact_type_column_values_to_be_in_set': 'CONSISTENCY', + 'expect_contact_type_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS', + 'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_date_of_birth_to_be_between': 'VALIDITY', + 'expect_description_values_to_be_in_set': 'CONSISTENCY', + 'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS', + 'expect_eo_t_to_be_between': 'VALIDITY', + 'expect_estate_ref_no_column_values_to_match_regex': 'ACCURACY', + 'expect_first_name_column_value_length': 'ACCURACY', + 'expect_firstname_column_value_length': 'ACCURACY', + 'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY', + 'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_llpg_column_value_lengths_between': 'VALIDITY', + 'expect_llpg_column_values_to_be_unique': 'UNIQUENESS', + 'expect_llpg_column_values_to_match_regex': 'ACCURACY', + 'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_member_full_name_column_value_lengths_between': 'VALIDITY', + 'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY', + 'expect_payment_ref_no_column_values_to_match_regex': 'ACCURACY', + 'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS', + 'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_person_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_person_id_column_values_to_be_unique': 'UNIQUENESS', + 'expect_person_id_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_person_type_values_to_be_in_set': 'CONSISTENCY', + 'expect_preferred_title_values_to_be_in_set': 'CONSISTENCY', + 'expect_prop_no_and_payment_ref_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_prop_no_column_values_to_be_unique': 'UNIQUENESS', + 'expect_prop_no_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_prop_no_no_column_values_to_match_regex': 'ACCURACY', + 'expect_prop_ref_column_values_to_be_unique': 'UNIQUENESS', + 'expect_prop_ref_not_to_be_null': 'COMPLETENESS', + 'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_rent_group_ref_not_to_be_null': 'COMPLETENESS', + 'expect_saff_rent_acc_not_to_be_null': 'COMPLETENESS', + 'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS', + 'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY', + 'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_surname_column_value_length': 'ACCURACY', + 'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS', + 'expect_tag_ref_column_values_to_be_unique': 'UNIQUENESS', + 'expect_tag_ref_not_to_be_null': 'COMPLETENESS', + 'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY', + 'expect_tenancy_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_tenancy_id_and_property_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', + 'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS', + 'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS', + 'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY', + 'expect_tenure_not_to_be_null': 'COMPLETENESS', + 'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY', + 'expect_tenure_values_to_be_in_set': 'CONSISTENCY', + 'expect_uprn_column_value_lengths_between': 'VALIDITY', + 'expect_uprn_column_values_to_match_regex': 'ACCURACY', + 'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS', + 'expect_uprn_not_to_be_null': 'COMPLETENESS'} \ No newline at end of file From 68ccc714eb177c21eac55dc757428f751f7b847c Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Tue, 20 May 2025 16:06:03 +0100 Subject: [PATCH 4/5] Amend sql queries to only include ke columns and rows --- scripts/helpers/housing_gx_dq_inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index 7ede58a0a..9da085f38 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -74,7 +74,7 @@ hascommunalareas, hasprivatekitchen, hasprivatebathroom, - hasrampaccess, + hasrampaccess, isstepfree, hasstairs, heating, From 4f0ff38c449a510e5af4c398289f3ac03105ba4e Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Tue, 20 May 2025 16:07:33 +0100 Subject: [PATCH 5/5] Amend sql queries to only include ke columns and rows --- scripts/helpers/housing_gx_dq_inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index 9da085f38..e0811b8e2 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -175,4 +175,4 @@ 'expect_uprn_column_value_lengths_between': 'VALIDITY', 'expect_uprn_column_values_to_match_regex': 'ACCURACY', 'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS', - 'expect_uprn_not_to_be_null': 'COMPLETENESS'} \ No newline at end of file + 'expect_uprn_not_to_be_null': 'COMPLETENESS'}