diff --git a/python/housing_data/build_metros.py b/python/housing_data/build_metros.py index 7e11137..733aa50 100644 --- a/python/housing_data/build_metros.py +++ b/python/housing_data/build_metros.py @@ -5,7 +5,7 @@ def load_crosswalk_df(data_repo_path: Path) -> pd.DataFrame: - crosswalk_df = pd.read_csv(data_repo_path / "data/crosswalk/cbsa2fipsxw.csv") + crosswalk_df = pd.read_csv(data_repo_path / "data/crosswalk/cbsa2fipsxw_2023.csv") # Drop the μSAs, no one cares about them. # Most of them are just one county anyway, so showing the combined metro stats doesn't diff --git a/python/housing_data/california_hcd_data.py b/python/housing_data/california_hcd_data.py index 8601ca7..8e1c429 100644 --- a/python/housing_data/california_hcd_data.py +++ b/python/housing_data/california_hcd_data.py @@ -76,7 +76,9 @@ def load_california_hcd_data( ], None, ) - assert df["building_type"].isnull().sum() == 0 + + assert df["building_type"].isnull().sum() < 50 + df = df[df["building_type"].notnull()] df = df.rename(columns={"YEAR": "year"}).astype({"year": str})