From 1d6dad2222be61c2ef03d48c051bdadd540bd615 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Mon, 2 Feb 2026 17:14:41 -0800 Subject: [PATCH 1/7] Changes to allow for the use of geographies in between the meta and seed geographies --- populationsim/steps/final_seed_balancing.py | 2 ++ populationsim/steps/integerize_final_seed_weights.py | 3 +++ populationsim/steps/meta_control_factoring.py | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/populationsim/steps/final_seed_balancing.py b/populationsim/steps/final_seed_balancing.py index 6ec058d4..2cfbf601 100644 --- a/populationsim/steps/final_seed_balancing.py +++ b/populationsim/steps/final_seed_balancing.py @@ -60,6 +60,8 @@ def final_seed_balancing(settings, crosswalk, control_spec, incidence_table): # we use all control_spec rows, so no need to filter on geography as for initial_seed_balancing seed_controls_df = get_control_table(seed_geography) + intermediate_geographies = geographies[1:geographies.index(seed_geography)] + control_spec = control_spec[control_spec.geography.apply(lambda x: x not in intermediate_geographies)] assert (seed_controls_df.columns == control_spec.target).all() # determine master_control_index if specified in settings diff --git a/populationsim/steps/integerize_final_seed_weights.py b/populationsim/steps/integerize_final_seed_weights.py index 73f56a58..8b52f1d1 100644 --- a/populationsim/steps/integerize_final_seed_weights.py +++ b/populationsim/steps/integerize_final_seed_weights.py @@ -50,6 +50,9 @@ def integerize_final_seed_weights(settings, crosswalk, control_spec, incidence_t seed_weights_df = get_weight_table(seed_geography) # FIXME - I assume we want to integerize using meta controls too? + geographies = settings["geographies"] + intermediate_geographies = geographies[1:geographies.index(seed_geography)] + control_spec = control_spec[control_spec.geography.apply(lambda x: x not in intermediate_geographies)] control_cols = control_spec.target assert (seed_controls_df.columns == control_cols).all() diff --git a/populationsim/steps/meta_control_factoring.py b/populationsim/steps/meta_control_factoring.py index 394aa18f..96f3254c 100644 --- a/populationsim/steps/meta_control_factoring.py +++ b/populationsim/steps/meta_control_factoring.py @@ -122,6 +122,10 @@ def meta_control_factoring(settings, control_spec, incidence_table): seed_controls_df = get_control_table(seed_geography) + # Remove controls for geographies between the seed and the meta geographies + intermediate_geographies = geographies[1:geographies.index(seed_geography)] + control_spec = control_spec[control_spec.geography.apply(lambda x: x not in intermediate_geographies)] + assert len(seed_controls_df.index) == len(seed_level_meta_controls.index) seed_controls_df = pd.concat([seed_controls_df, seed_level_meta_controls], axis=1) From 6fba34d67faf965917c86d9cc2ca676e1377d2fe Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Wed, 18 Mar 2026 10:31:43 -0700 Subject: [PATCH 2/7] Added test of allowing intermediate geographies --- .../configs_intermediate/controls.csv | 11 + .../configs_intermediate/logging.yaml | 54 ++++ .../configs_intermediate/repop_controls.csv | 3 + .../configs_intermediate/settings.yaml | 133 +++++++++ .../data_intermediate/district_controls.csv | 4 + .../data_intermediate/geo_cross_walk.csv | 39 +++ .../data_intermediate/region_controls.csv | 2 + .../data_intermediate/repop_taz_controls.csv | 5 + .../data_intermediate/seed_households.csv | 63 +++++ .../data_intermediate/seed_persons.csv | 254 ++++++++++++++++++ .../data_intermediate/taz_controls.csv | 37 +++ .../data_intermediate/tract_controls.csv | 13 + tests/test_intermediate_geography.py | 47 ++++ 13 files changed, 665 insertions(+) create mode 100644 examples/example_test/configs_intermediate/controls.csv create mode 100644 examples/example_test/configs_intermediate/logging.yaml create mode 100644 examples/example_test/configs_intermediate/repop_controls.csv create mode 100644 examples/example_test/configs_intermediate/settings.yaml create mode 100644 examples/example_test/data_intermediate/district_controls.csv create mode 100644 examples/example_test/data_intermediate/geo_cross_walk.csv create mode 100644 examples/example_test/data_intermediate/region_controls.csv create mode 100644 examples/example_test/data_intermediate/repop_taz_controls.csv create mode 100644 examples/example_test/data_intermediate/seed_households.csv create mode 100644 examples/example_test/data_intermediate/seed_persons.csv create mode 100644 examples/example_test/data_intermediate/taz_controls.csv create mode 100644 examples/example_test/data_intermediate/tract_controls.csv create mode 100644 tests/test_intermediate_geography.py diff --git a/examples/example_test/configs_intermediate/controls.csv b/examples/example_test/configs_intermediate/controls.csv new file mode 100644 index 00000000..71e3e97c --- /dev/null +++ b/examples/example_test/configs_intermediate/controls.csv @@ -0,0 +1,11 @@ +target,geography,seed_table,importance,control_field,expression +num_hh,TAZ,households,1000000000,HHBASE,(households.WGTP > 0) & (households.WGTP < np.inf) +hh_size_1,TAZ,households,1000,HHSIZE1,households.NP == 1 +hh_size_2,TAZ,households,1000,HHSIZE2,households.NP == 2 +hh_size_3,TAZ,households,1000,HHSIZE3,households.NP == 3 +hh_size_4_plus,TAZ,households,1000,HHSIZE4,households.NP >= 4 +students_by_housing_type,TAZ,persons,1000,OSUFAM,persons.OSUTAG == 1 +hh_by_type,TRACT,households,100,SF,households.HTYPE == 1 +persons_occ_1,DISTRICT,persons,1000,OCCP1,persons.OCCP == 1 +persons_occ_2,REGION,persons,100,OCCP2,persons.OCCP == 2 +persons_occ_3,REGION,persons,100,OCCP3,persons.OCCP == 3 diff --git a/examples/example_test/configs_intermediate/logging.yaml b/examples/example_test/configs_intermediate/logging.yaml new file mode 100644 index 00000000..06baa76a --- /dev/null +++ b/examples/example_test/configs_intermediate/logging.yaml @@ -0,0 +1,54 @@ +# Config for logging +# ------------------ +# See http://docs.python.org/2.7/library/logging.config.html#configuration-dictionary-schema + +logging: + version: 1 + disable_existing_loggers: true + + + # Configuring the default (root) logger is highly recommended + root: + level: NOTSET + handlers: [console] + + loggers: + + populationsim: + level: DEBUG + handlers: [console, logfile] + propagate: false + + orca: + level: WARN + handlers: [console, logfile] + propagate: false + + handlers: + + logfile: + class: logging.FileHandler + filename: !!python/object/apply:populationsim.core.config.log_file_path ['populationsim.log'] + mode: w + formatter: fileFormatter + level: NOTSET + + console: + class: logging.StreamHandler + stream: ext://sys.stdout + formatter: simpleFormatter + level: NOTSET + #level: WARN + + formatters: + + simpleFormatter: + class: logging.Formatter + # format: '%(levelname)s - %(name)s - %(message)s' + format: '%(levelname)s - %(message)s' + datefmt: '%d/%m/%Y %H:%M:%S' + + fileFormatter: + class: logging.Formatter + format: '%(asctime)s - %(levelname)s - %(name)s - %(message)s' + datefmt: '%d/%m/%Y %H:%M:%S' diff --git a/examples/example_test/configs_intermediate/repop_controls.csv b/examples/example_test/configs_intermediate/repop_controls.csv new file mode 100644 index 00000000..f9c17fda --- /dev/null +++ b/examples/example_test/configs_intermediate/repop_controls.csv @@ -0,0 +1,3 @@ +target,geography,seed_table,importance,control_field,expression +num_hh,TAZ,households,1000000000,HHBASE,(households.WGTP > 0) & (households.WGTP < np.inf) +muggle,TAZ,households,1000,MUGGLE,households.wizard == 0 diff --git a/examples/example_test/configs_intermediate/settings.yaml b/examples/example_test/configs_intermediate/settings.yaml new file mode 100644 index 00000000..f9a7e92b --- /dev/null +++ b/examples/example_test/configs_intermediate/settings.yaml @@ -0,0 +1,133 @@ +# This flag turns off integerization entirely (default False) +NO_INTEGERIZATION_EVER: True + +# These settings are ignored if not integerizing +INTEGERIZE_WITH_BACKSTOPPED_CONTROLS: True +SUB_BALANCE_WITH_FLOAT_SEED_WEIGHTS: False +GROUP_BY_INCIDENCE_SIGNATURE: True +INTEGERIZER_TIMEOUT: 300 # seconds +USE_SIMUL_INTEGERIZER: True + +geographies: [REGION, DISTRICT, PUMA, TRACT, TAZ] +seed_geography: PUMA + +# Tracing +# ------------------------------------------------------------------ +trace_geography: + TAZ: 100 + TRACT: 1 + + +household_weight_col: WGTP +household_id_col: hh_id +total_hh_control: num_hh +max_expansion_factor: 5 + +min_expansion_factor: 0.2 + +#control_file_name: controls.csv +#data_dir: data + +# input_pre_processor input_table_list +input_table_list: + - tablename: households + filename : seed_households.csv + index_col: hh_id + rename_columns: + SERIALNO: hh_id + - tablename: persons + # expression_filename: seed_persons_expressions.csv + filename : seed_persons.csv + rename_columns: + SERIALNO: hh_id + SPORDER: per_num + - tablename: geo_cross_walk + filename : geo_cross_walk.csv + rename_columns: + TRACTCE: TRACT + - tablename: TAZ_control_data + filename : taz_controls.csv + - tablename: TRACT_control_data + filename : tract_controls.csv + - tablename: DISTRICT_control_data + filename : district_controls.csv + - tablename: REGION_control_data + filename: region_controls.csv + + +run_list: + steps: + - input_pre_processor + - setup_data_structures + - initial_seed_balancing + - meta_control_factoring + - final_seed_balancing + - integerize_final_seed_weights + - sub_balancing.geography=TRACT + - sub_balancing.geography=TAZ + - expand_households + - summarize + - write_tables + - write_synthetic_population + + #resume_after: expand_households + +# "-m repop" command line option allows specification of repop run_list` +# expand_households options are append or replace +repop: + steps: + - input_pre_processor.table_list=repop_input_table_list + - repop_setup_data_structures + - initial_seed_balancing.final=true + - integerize_final_seed_weights.repop + - repop_balancing + - expand_households.repop;replace + - write_tables.repop + - write_synthetic_population.repop + + resume_after: summarize + +repop_control_file_name: repop_controls.csv + +repop_input_table_list: + - filename : repop_taz_controls.csv + tablename: TAZ_control_data + + + +# output_tables can specify either a list of output tables to include or to skip +# if neither is specified, then no checkpointed tables will be written + +output_tables: + action: include + tables: + - PUMA_weights + - TAZ_weights + - TRACT_weights + - trace_TAZ_weights + - trace_TRACT_weights + - summary_hh_weights + - summary_TAZ + +# - expanded_household_ids + +# +#output_tables: +# action: include +# tables: +# - expanded_household_ids + +output_synthetic_population: + household_id: household_id + households: + filename: synthetic_households.csv + columns: + - NP + - WGTP + - HTYPE + persons: + filename: synthetic_persons.csv + columns: + - per_num + - OSUTAG + - OCCP diff --git a/examples/example_test/data_intermediate/district_controls.csv b/examples/example_test/data_intermediate/district_controls.csv new file mode 100644 index 00000000..10a24314 --- /dev/null +++ b/examples/example_test/data_intermediate/district_controls.csv @@ -0,0 +1,4 @@ +DISTRICT,OCCP1,HHBASE,HHSIZE1,HHSIZE2,HHSIZE3,HHSIZE4,OSUFAM +1,720,450,90,144,144,72,318 +2,720,450,90,144,144,72,318 +3,960,600,120,192,192,96,424 diff --git a/examples/example_test/data_intermediate/geo_cross_walk.csv b/examples/example_test/data_intermediate/geo_cross_walk.csv new file mode 100644 index 00000000..6ff1f1b6 --- /dev/null +++ b/examples/example_test/data_intermediate/geo_cross_walk.csv @@ -0,0 +1,39 @@ +TAZ,TRACTCE,PUMA,DISTRICT,REGION +100,1,600,1,1 +101,1,600,1,1 +102,1,600,1,1 +103,2,600,1,1 +104,2,600,1,1 +105,2,600,1,1 +200,21,601,1,1 +201,21,601,1,1 +202,21,601,1,1 +203,22,601,1,1 +204,22,601,1,1 +205,22,601,1,1 +300,31,602,2,1 +301,31,602,2,1 +302,31,602,2,1 +303,32,602,2,1 +304,32,602,2,1 +305,32,602,2,1 +400,41,603,2,1 +401,41,603,2,1 +402,41,603,2,1 +403,42,603,2,1 +404,42,603,2,1 +405,42,603,2,1 +500,51,604,3,1 +501,51,604,3,1 +502,51,604,3,1 +503,52,604,3,1 +504,52,604,3,1 +505,52,604,3,1 +600,61,605,3,1 +601,61,605,3,1 +602,61,605,3,1 +603,62,605,3,1 +604,62,605,3,1 +605,62,605,3,1 +#,,,, +700,72,606,1,1 diff --git a/examples/example_test/data_intermediate/region_controls.csv b/examples/example_test/data_intermediate/region_controls.csv new file mode 100644 index 00000000..7771eae9 --- /dev/null +++ b/examples/example_test/data_intermediate/region_controls.csv @@ -0,0 +1,2 @@ +REGION,OCCP2,OCCP3 +1,300,1560 diff --git a/examples/example_test/data_intermediate/repop_taz_controls.csv b/examples/example_test/data_intermediate/repop_taz_controls.csv new file mode 100644 index 00000000..e5bd529a --- /dev/null +++ b/examples/example_test/data_intermediate/repop_taz_controls.csv @@ -0,0 +1,5 @@ +TAZ,TRACT,PUMA,DISTRICT,HHBASE,HHSIZE1,HHSIZE2,HHSIZE3,HHSIZE4,OSUFAM,SF,MUGGLE +100,1,600,1,26,6,8,8,4,53,26,20 +101,1,600,1,26,6,8,8,4,53,26,21 +205,22,601,1,51,11,16,16,8,0,51,40 +301,31,602,2,26,6,8,8,4,53,26,19 diff --git a/examples/example_test/data_intermediate/seed_households.csv b/examples/example_test/data_intermediate/seed_households.csv new file mode 100644 index 00000000..764d998e --- /dev/null +++ b/examples/example_test/data_intermediate/seed_households.csv @@ -0,0 +1,63 @@ +SERIALNO,NP,WGTP,HTYPE,PUMA,wizard +1,1,30,1,600,1 +2,2,48,1,600,1 +3,3,48,1,600,0 +4,4,14,1,600,0 +5,5,10,1,600,0 +6,10,5,0,600,0 +101,1,30,1,601,1 +102,2,48,1,601,0 +103,3,48,1,601,0 +104,4,14,1,601,0 +105,5,10,1,601,0 +106,10,5,0,601,0 +111,1,30,1,601,0 +112,2,48,1,601,0 +113,3,48,1,601,0 +114,4,14,1,601,0 +115,5,10,1,601,0 +116,10,5,0,601,0 +201,1,30,1,602,1 +202,2,48,1,602,0 +203,3,48,1,602,0 +204,4,14,1,602,0 +205,5,10,1,602,0 +206,10,5,0,602,0 +301,1,30,1,603,1 +302,2,48,1,603,0 +303,3,48,1,603,0 +304,4,14,1,603,0 +305,5,10,1,603,0 +306,10,5,0,603,0 +311,1,30,1,603,0 +312,2,48,1,603,0 +313,3,48,1,603,0 +314,4,14,1,603,0 +315,5,10,1,603,0 +316,10,5,0,603,0 +401,1,30,1,604,1 +402,2,48,1,604,0 +403,3,48,1,604,0 +404,4,14,1,604,0 +405,5,10,1,604,0 +406,10,5,0,604,0 +411,1,30,1,604,0 +412,2,48,1,604,0 +413,3,48,1,604,0 +414,4,14,1,604,0 +415,5,10,1,604,0 +416,10,5,0,604,0 +501,1,30,1,605,1 +502,2,48,1,605,0 +503,3,48,1,605,0 +504,4,14,1,605,0 +505,5,10,1,605,0 +506,10,5,0,605,0 +511,1,30,1,605,0 +512,2,48,1,605,0 +513,3,48,1,605,0 +514,4,14,1,605,0 +515,5,10,1,605,0 +516,10,5,0,605,0 +# this hh outside of seed zones should get dropped +600,10,5,0,606,0 diff --git a/examples/example_test/data_intermediate/seed_persons.csv b/examples/example_test/data_intermediate/seed_persons.csv new file mode 100644 index 00000000..dc362688 --- /dev/null +++ b/examples/example_test/data_intermediate/seed_persons.csv @@ -0,0 +1,254 @@ +SERIALNO,SPORDER,PUMA,OSUTAG,OCCP,WGTP +1,1,600,0,2,30 +2,1,600,0,1,48 +2,2,600,0,1,48 +3,1,600,0,1,48 +3,2,600,0,1,48 +3,3,600,1,3,48 +4,1,600,0,1,14 +4,2,600,0,1,14 +4,3,600,1,3,14 +4,4,600,1,3,14 +5,1,600,0,1,10 +5,2,600,0,1,10 +5,3,600,1,3,10 +5,4,600,1,3,10 +5,5,600,1,3,10 +6,1,600,2,3,5 +6,2,600,2,3,5 +6,3,600,2,3,5 +6,4,600,2,3,5 +6,5,600,2,3,5 +6,6,600,2,3,5 +6,7,600,2,3,5 +6,8,600,2,3,5 +6,9,600,2,3,5 +6,10,600,2,3,5 +101,1,601,0,2,30 +102,1,601,0,1,48 +102,2,601,0,1,48 +103,1,601,0,1,48 +103,2,601,0,1,48 +103,3,601,1,3,48 +104,1,601,0,1,14 +104,2,601,0,1,14 +104,3,601,1,3,14 +104,4,601,1,3,14 +105,1,601,0,1,10 +105,2,601,0,1,10 +105,3,601,1,3,10 +105,4,601,1,3,10 +105,5,601,1,3,10 +106,1,601,2,3,5 +106,2,601,2,3,5 +106,3,601,2,3,5 +106,4,601,2,3,5 +106,5,601,2,3,5 +106,6,601,2,3,5 +106,7,601,2,3,5 +106,8,601,2,3,5 +106,9,601,2,3,5 +106,10,601,2,3,5 +111,1,601,0,2,30 +112,1,601,0,1,48 +112,2,601,0,1,48 +113,1,601,0,1,48 +113,2,601,0,1,48 +113,3,601,1,3,48 +114,1,601,0,1,14 +114,2,601,0,1,14 +114,3,601,1,3,14 +114,4,601,1,3,14 +115,1,601,0,1,10 +115,2,601,0,1,10 +115,3,601,1,3,10 +115,4,601,1,3,10 +115,5,601,1,3,10 +116,1,601,2,3,5 +116,2,601,2,3,5 +116,3,601,2,3,5 +116,4,601,2,3,5 +116,5,601,2,3,5 +116,6,601,2,3,5 +116,7,601,2,3,5 +116,8,601,2,3,5 +116,9,601,2,3,5 +116,10,601,2,3,5 +201,1,602,0,2,30 +202,1,602,0,1,48 +202,2,602,0,1,48 +203,1,602,0,1,48 +203,2,602,0,1,48 +203,3,602,1,3,48 +204,1,602,0,1,14 +204,2,602,0,1,14 +204,3,602,1,3,14 +204,4,602,1,3,14 +205,1,602,0,1,10 +205,2,602,0,1,10 +205,3,602,1,3,10 +205,4,602,1,3,10 +205,5,602,1,3,10 +206,1,602,2,3,5 +206,2,602,2,3,5 +206,3,602,2,3,5 +206,4,602,2,3,5 +206,5,602,2,3,5 +206,6,602,2,3,5 +206,7,602,2,3,5 +206,8,602,2,3,5 +206,9,602,2,3,5 +206,10,602,2,3,5 +301,1,603,0,2,30 +302,1,603,0,1,48 +302,2,603,0,1,48 +303,1,603,0,1,48 +303,2,603,0,1,48 +303,3,603,1,3,48 +304,1,603,0,1,14 +304,2,603,0,1,14 +304,3,603,1,3,14 +304,4,603,1,3,14 +305,1,603,0,1,10 +305,2,603,0,1,10 +305,3,603,1,3,10 +305,4,603,1,3,10 +305,5,603,1,3,10 +306,1,603,2,3,5 +306,2,603,2,3,5 +306,3,603,2,3,5 +306,4,603,2,3,5 +306,5,603,2,3,5 +306,6,603,2,3,5 +306,7,603,2,3,5 +306,8,603,2,3,5 +306,9,603,2,3,5 +306,10,603,2,3,5 +311,1,603,0,2,30 +312,1,603,0,1,48 +312,2,603,0,1,48 +313,1,603,0,1,48 +313,2,603,0,1,48 +313,3,603,1,3,48 +314,1,603,0,1,14 +314,2,603,0,1,14 +314,3,603,1,3,14 +314,4,603,1,3,14 +315,1,603,0,1,10 +315,2,603,0,1,10 +315,3,603,1,3,10 +315,4,603,1,3,10 +315,5,603,1,3,10 +316,1,603,2,3,5 +316,2,603,2,3,5 +316,3,603,2,3,5 +316,4,603,2,3,5 +316,5,603,2,3,5 +316,6,603,2,3,5 +316,7,603,2,3,5 +316,8,603,2,3,5 +316,9,603,2,3,5 +316,10,603,2,3,5 +401,1,604,0,2,30 +402,1,604,0,1,48 +402,2,604,0,1,48 +403,1,604,0,1,48 +403,2,604,0,1,48 +403,3,604,1,3,48 +404,1,604,0,1,14 +404,2,604,0,1,14 +404,3,604,1,3,14 +404,4,604,1,3,14 +405,1,604,0,1,10 +405,2,604,0,1,10 +405,3,604,1,3,10 +405,4,604,1,3,10 +405,5,604,1,3,10 +406,1,604,2,3,5 +406,2,604,2,3,5 +406,3,604,2,3,5 +406,4,604,2,3,5 +406,5,604,2,3,5 +406,6,604,2,3,5 +406,7,604,2,3,5 +406,8,604,2,3,5 +406,9,604,2,3,5 +406,10,604,2,3,5 +411,1,604,0,2,30 +412,1,604,0,1,48 +412,2,604,0,1,48 +413,1,604,0,1,48 +413,2,604,0,1,48 +413,3,604,1,3,48 +414,1,604,0,1,14 +414,2,604,0,1,14 +414,3,604,1,3,14 +414,4,604,1,3,14 +415,1,604,0,1,10 +415,2,604,0,1,10 +415,3,604,1,3,10 +415,4,604,1,3,10 +415,5,604,1,3,10 +416,1,604,2,3,5 +416,2,604,2,3,5 +416,3,604,2,3,5 +416,4,604,2,3,5 +416,5,604,2,3,5 +416,6,604,2,3,5 +416,7,604,2,3,5 +416,8,604,2,3,5 +416,9,604,2,3,5 +416,10,604,2,3,5 +501,1,605,0,2,30 +502,1,605,0,1,48 +502,2,605,0,1,48 +503,1,605,0,1,48 +503,2,605,0,1,48 +503,3,605,1,3,48 +504,1,605,0,1,14 +504,2,605,0,1,14 +504,3,605,1,3,14 +504,4,605,1,3,14 +505,1,605,0,1,10 +505,2,605,0,1,10 +505,3,605,1,3,10 +505,4,605,1,3,10 +505,5,605,1,3,10 +506,1,605,2,3,5 +506,2,605,2,3,5 +506,3,605,2,3,5 +506,4,605,2,3,5 +506,5,605,2,3,5 +506,6,605,2,3,5 +506,7,605,2,3,5 +506,8,605,2,3,5 +506,9,605,2,3,5 +506,10,605,2,3,5 +511,1,605,0,2,30 +512,1,605,0,1,48 +512,2,605,0,1,48 +513,1,605,0,1,48 +513,2,605,0,1,48 +513,3,605,1,3,48 +514,1,605,0,1,14 +514,2,605,0,1,14 +514,3,605,1,3,14 +514,4,605,1,3,14 +515,1,605,0,1,10 +515,2,605,0,1,10 +515,3,605,1,3,10 +515,4,605,1,3,10 +515,5,605,1,3,10 +516,1,605,2,3,5 +516,2,605,2,3,5 +516,3,605,2,3,5 +516,4,605,2,3,5 +516,5,605,2,3,5 +516,6,605,2,3,5 +516,7,605,2,3,5 +516,8,605,2,3,5 +516,9,605,2,3,5 +516,10,605,2,3,5 +# this 2 person hh outside of seed zones should get dropped +600,1,606,2,3,5 +600,2,606,1,3,10 diff --git a/examples/example_test/data_intermediate/taz_controls.csv b/examples/example_test/data_intermediate/taz_controls.csv new file mode 100644 index 00000000..002742cb --- /dev/null +++ b/examples/example_test/data_intermediate/taz_controls.csv @@ -0,0 +1,37 @@ +TAZ,TRACT,PUMA,DISTRICT,HHBASE,HHSIZE1,HHSIZE2,HHSIZE3,HHSIZE4,OSUFAM,SF +100,1,600,1,25,5,8,8,4,53,25 +101,1,600,1,25,5,8,8,4,53,25 +102,1,600,1,25,5,8,8,4,0,25 +103,2,600,1,25,5,8,8,4,0,25 +104,2,600,1,25,5,8,8,4,0,25 +105,2,600,1,25,5,8,8,4,0,25 +200,21,601,1,50,10,16,16,8,106,50 +201,21,601,1,50,10,16,16,8,106,50 +202,21,601,1,50,10,16,16,8,0,50 +203,22,601,1,50,10,16,16,8,0,50 +204,22,601,1,50,10,16,16,8,0,50 +205,22,601,1,50,10,16,16,8,0,50 +300,31,602,2,25,5,8,8,4,53,25 +301,31,602,2,25,5,8,8,4,53,25 +302,31,602,2,25,5,8,8,4,0,25 +303,32,602,2,25,5,8,8,4,0,25 +304,32,602,2,25,5,8,8,4,0,25 +305,32,602,2,25,5,8,8,4,0,25 +400,41,603,2,50,10,16,16,8,106,50 +401,41,603,2,50,10,16,16,8,106,50 +402,41,603,2,50,10,16,16,8,0,50 +403,42,603,2,50,10,16,16,8,0,50 +404,42,603,2,50,10,16,16,8,0,50 +405,42,603,2,50,10,16,16,8,0,50 +500,51,604,3,50,10,16,16,8,106,50 +501,51,604,3,50,10,16,16,8,106,50 +502,51,604,3,50,10,16,16,8,0,50 +503,52,604,3,50,10,16,16,8,0,50 +504,52,604,3,50,10,16,16,8,0,50 +505,52,604,3,50,10,16,16,8,0,50 +600,61,605,3,50,10,16,16,8,106,50 +601,61,605,3,50,10,16,16,8,106,50 +602,61,605,3,50,10,16,16,8,0,50 +603,62,605,3,50,10,16,16,8,0,50 +604,62,605,3,50,10,16,16,8,0,50 +605,62,605,3,50,10,16,16,8,0,50 diff --git a/examples/example_test/data_intermediate/tract_controls.csv b/examples/example_test/data_intermediate/tract_controls.csv new file mode 100644 index 00000000..4308af17 --- /dev/null +++ b/examples/example_test/data_intermediate/tract_controls.csv @@ -0,0 +1,13 @@ +TRACT,PUMA,HHBASE,HHSIZE1,HHSIZE2,HHSIZE3,HHSIZE4,OSUFAM,SF +1,600,75,15,24,24,12,106,75 +2,600,75,15,24,24,12,0,75 +21,601,150,30,48,48,24,212,150 +22,601,150,30,48,48,24,0,150 +31,602,75,15,24,24,12,106,75 +32,602,75,15,24,24,12,0,75 +41,603,150,30,48,48,24,212,150 +42,603,150,30,48,48,24,0,150 +51,604,150,30,48,48,24,212,150 +52,604,150,30,48,48,24,0,150 +61,605,150,30,48,48,24,212,150 +62,605,150,30,48,48,24,0,150 diff --git a/tests/test_intermediate_geography.py b/tests/test_intermediate_geography.py new file mode 100644 index 00000000..757ae2fe --- /dev/null +++ b/tests/test_intermediate_geography.py @@ -0,0 +1,47 @@ +import numpy as np +import pandas as pd +from pathlib import Path + +from populationsim.core import tracing, inject, pipeline + + +def teardown_function(func): + inject.clear_cache() + inject.reinject_decorated_tables() + + +def test_intermediate_geography(): + + example_dir = Path(__file__).parent.parent / "examples" / "example_test" + configs_dir = example_dir / "configs_intermediate" + data_dir = example_dir / "data_intermediate" + output_dir = Path(__file__).parent / "output" + + inject.add_injectable("data_dir", data_dir) + inject.add_injectable("configs_dir", configs_dir) + inject.add_injectable("output_dir", output_dir) + + inject.clear_cache() + + tracing.config_logger() + + _MODELS = [ + "input_pre_processor", + "setup_data_structures", + "initial_seed_balancing", + "meta_control_factoring", + "final_seed_balancing", + "integerize_final_seed_weights", + "sub_balancing.geography=TRACT", + "sub_balancing.geography=TAZ", + "expand_households", + "summarize", + "write_tables", + "write_synthetic_population", + ] + + pipeline.run(models=_MODELS, resume_after=None) + + pipeline.close_pipeline() + + inject.clear_cache() \ No newline at end of file From 7f849cb81ebfeb57783632bb3acc733960861052 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Wed, 18 Mar 2026 10:54:50 -0700 Subject: [PATCH 3/7] Changed python versions in .travis.yml --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c8e60a46..8c1eac3c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,10 @@ language: python sudo: false python: -- '3.7' -- '3.8' +- '3.9' +- '3.10' +- '3.11' +- '3.12' install: - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh From 21b642a0a0b0debf3c72aafb57e85f6197569d70 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Wed, 18 Mar 2026 11:09:41 -0700 Subject: [PATCH 4/7] Added list of geographies to error message for debugging test --- populationsim/steps/setup_data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/populationsim/steps/setup_data_structures.py b/populationsim/steps/setup_data_structures.py index c749d720..94d86121 100644 --- a/populationsim/steps/setup_data_structures.py +++ b/populationsim/steps/setup_data_structures.py @@ -34,7 +34,7 @@ def read_control_spec(data_filename): for g in control_spec.geography.unique(): if g not in geographies: - raise RuntimeError("unknown geography column '%s' in control file" % g) + raise RuntimeError("unknown geography column '%s' in control file" % g + " " + str(geographies)) return control_spec From fb03864d3bbe10d9cf94ca2b25274d3236cf600f Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Wed, 18 Mar 2026 11:12:31 -0700 Subject: [PATCH 5/7] Undid previous commit as it didn't work --- populationsim/steps/setup_data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/populationsim/steps/setup_data_structures.py b/populationsim/steps/setup_data_structures.py index 94d86121..c749d720 100644 --- a/populationsim/steps/setup_data_structures.py +++ b/populationsim/steps/setup_data_structures.py @@ -34,7 +34,7 @@ def read_control_spec(data_filename): for g in control_spec.geography.unique(): if g not in geographies: - raise RuntimeError("unknown geography column '%s' in control file" % g + " " + str(geographies)) + raise RuntimeError("unknown geography column '%s' in control file" % g) return control_spec From 1a8f644229cdc17c2fbdd75bd0159e56757fee95 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 19 Mar 2026 10:36:48 -0700 Subject: [PATCH 6/7] Updated test\configs\settings.yaml to see if that's what the CI test is reading --- tests/configs/settings.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/configs/settings.yaml b/tests/configs/settings.yaml index 8ae4fd30..a357b41b 100644 --- a/tests/configs/settings.yaml +++ b/tests/configs/settings.yaml @@ -31,10 +31,12 @@ input_table_list: - tablename: TRACT_control_data filename : tract_controls.csv - tablename: DISTRICT_control_data + filename : district_controls.csv + - tablename: REGION_control_data filename : region_controls.csv -geographies: [DISTRICT, PUMA, TRACT, TAZ] +geographies: [REGION, DISTRICT, PUMA, TRACT, TAZ] seed_geography: PUMA output_tables: From 884217e3adb37ccd28ebe755883b6c40daa231a6 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 19 Mar 2026 10:43:17 -0700 Subject: [PATCH 7/7] Reverting change to test settings file --- tests/configs/settings.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/configs/settings.yaml b/tests/configs/settings.yaml index a357b41b..8ae4fd30 100644 --- a/tests/configs/settings.yaml +++ b/tests/configs/settings.yaml @@ -31,12 +31,10 @@ input_table_list: - tablename: TRACT_control_data filename : tract_controls.csv - tablename: DISTRICT_control_data - filename : district_controls.csv - - tablename: REGION_control_data filename : region_controls.csv -geographies: [REGION, DISTRICT, PUMA, TRACT, TAZ] +geographies: [DISTRICT, PUMA, TRACT, TAZ] seed_geography: PUMA output_tables: