diff --git a/mobility/choice_models/destination_sequence_sampler.py b/mobility/choice_models/destination_sequence_sampler.py index 13506264..94330145 100644 --- a/mobility/choice_models/destination_sequence_sampler.py +++ b/mobility/choice_models/destination_sequence_sampler.py @@ -409,12 +409,12 @@ def spatialize_other_motives(self, chains, dest_prob, costs, alpha, seed): logging.info("Spatializing other motives...") - chains_step = ( + chains_step = ( chains .filter(pl.col("seq_step_index") == 1) - .with_columns(pl.col("home_zone_id").alias("from")) + .with_columns(pl.col("home_zone_id").cast(pl.Int32).alias("from")) ) - + seq_step_index = 1 spatialized_chains = [] @@ -532,7 +532,7 @@ def spatialize_trip_chains_step(self, seq_step_index, chains_step, dest_prob, co chains_step .filter(pl.col("is_anchor")) .with_columns( - to=pl.col("anchor_to") + to=pl.col("anchor_to").cast(pl.Int32) ) .select(["demand_group_id", "home_zone_id", "motive_seq_id", "motive", "anchor_to", "from", "to"]) ) diff --git a/mobility/motives/leisure.py b/mobility/motives/leisure.py index 543727c7..a0bbcf67 100644 --- a/mobility/motives/leisure.py +++ b/mobility/motives/leisure.py @@ -1,25 +1,24 @@ import pandas as pd import polars as pl - +import geopandas as gpd from typing import List +import numpy as np +import os from mobility.motives.motive import Motive - +from mobility.parsers.leisure_facilities_distribution import LeisureFacilitiesDistribution class LeisureMotive(Motive): def __init__( - self, - value_of_time: float = 10.0, - saturation_fun_ref_level: float = 1.5, - saturation_fun_beta: float = 4.0, - survey_ids: List[str] = ["7.71", "7.72", "7.73", "7.74", "7.75", "7.76", "7.77", "7.78"], - radiation_lambda: float = 0.99986, - opportunities: pd.DataFrame = None - ): - - if opportunities is None: - raise ValueError("No built in leisure opportunities data for now, please provide an opportunities dataframe when creating instantiating the LeisureMotive class (or don't use it at all and let the OtherMotive model handle this motive).") + self, + value_of_time: float = 10.0, + saturation_fun_ref_level: float = 1.5, + saturation_fun_beta: float = 4.0, + survey_ids: List[str] = ["7.71", "7.72", "7.73", "7.74", "7.75", "7.76", "7.77", "7.78"], + radiation_lambda: float = 0.99986, + opportunities: pd.DataFrame = None + ): super().__init__( name="leisure", @@ -34,28 +33,104 @@ def __init__( def get_opportunities(self, transport_zones): - transport_zones = transport_zones.get().drop("geometry", axis=1) - transport_zones["country"] = transport_zones["local_admin_unit_id"].str[0:2] - - tz_lau_ids = transport_zones["local_admin_unit_id"].unique().tolist() + if self.opportunities is not None: - opportunities = self.opportunities.loc[tz_lau_ids, "n_opp"].reset_index() + opportunities = self.opportunities - opportunities = pd.merge( - transport_zones[["transport_zone_id", "local_admin_unit_id", "country", "weight"]], - opportunities[["local_admin_unit_id", "n_opp"]], - on="local_admin_unit_id" - ) - - opportunities["n_opp"] = opportunities["weight"]*opportunities["n_opp"] + else: - opportunities = ( - opportunities[["transport_zone_id", "n_opp"]] - .rename({"transport_zone_id": "to"}, axis=1) - ) + transport_zones = transport_zones.get() + + opportunities = LeisureFacilitiesDistribution().get() + + opportunities = gpd.sjoin( + opportunities, + transport_zones, + how="left", + predicate="within" + ).drop(columns=["index_right"]) + opportunities = opportunities.dropna(subset=["transport_zone_id"]) + + opportunities["country"] = opportunities["local_admin_unit_id"].str[0:2] + + opportunities = ( + opportunities.groupby(["transport_zone_id", "local_admin_unit_id", "country", "weight"], dropna=False)["freq_score"] + .sum() + .reset_index() + ) + + opportunities["n_opp"] = opportunities["weight"]*opportunities["freq_score"] + opportunities = ( + opportunities[["transport_zone_id", "n_opp"]] + .rename({"transport_zone_id": "to"}, axis=1) + ) + opportunities["to"] = opportunities["to"].astype("Int64") + + if os.environ.get("MOBILITY_DEBUG") == "1": + self.plot_opportunities_map( + transport_zones, + opportunities, + use_log = False + ) + opportunities = pl.from_pandas(opportunities) opportunities = self.enforce_opportunities_schema(opportunities) - + return opportunities - + + + def plot_opportunities_map( + self, + transport_zones: gpd.GeoDataFrame, + opportunities: pd.DataFrame, + zone_id_col: str = "transport_zone_id", + opp_zone_col: str = "to", + value_col: str = "n_opp", + use_log: bool = False + ): + + if not isinstance(transport_zones, gpd.GeoDataFrame): + tz = gpd.GeoDataFrame(transport_zones, geometry="geometry", crs="EPSG:4326") + else: + tz = transport_zones + + if pl is not None and isinstance(opportunities, pl.DataFrame): + opp = opportunities.to_pandas() + else: + opp = opportunities.copy() + + m = tz.merge( + opp.rename(columns={opp_zone_col: zone_id_col}), + on=zone_id_col, + how="left" + ) + + m[value_col] = m[value_col].fillna(0) + m = m[m["geometry"].notna()] + m = m[~m.geometry.is_empty] + + if not m.geometry.is_valid.all(): + m["geometry"] = m.buffer(0) + m = m[m["geometry"].notna()] + m = m[~m.geometry.is_empty] + + if use_log: + log_col = f"log_{value_col}" + m[log_col] = np.log1p(m[value_col]) + col_to_plot = log_col + else: + col_to_plot = value_col + + ax = m.plot( + column=col_to_plot, + legend=True, + cmap="plasma", + linewidth=0.1, + edgecolor="white", + aspect=1 + ) + ax.set_axis_off() + + return ax + diff --git a/mobility/motives/leisure_old.py b/mobility/motives/leisure_old.py new file mode 100644 index 00000000..543727c7 --- /dev/null +++ b/mobility/motives/leisure_old.py @@ -0,0 +1,61 @@ +import pandas as pd +import polars as pl + +from typing import List + +from mobility.motives.motive import Motive + + +class LeisureMotive(Motive): + + def __init__( + self, + value_of_time: float = 10.0, + saturation_fun_ref_level: float = 1.5, + saturation_fun_beta: float = 4.0, + survey_ids: List[str] = ["7.71", "7.72", "7.73", "7.74", "7.75", "7.76", "7.77", "7.78"], + radiation_lambda: float = 0.99986, + opportunities: pd.DataFrame = None + ): + + if opportunities is None: + raise ValueError("No built in leisure opportunities data for now, please provide an opportunities dataframe when creating instantiating the LeisureMotive class (or don't use it at all and let the OtherMotive model handle this motive).") + + super().__init__( + name="leisure", + value_of_time=value_of_time, + survey_ids=survey_ids, + radiation_lambda=radiation_lambda, + opportunities=opportunities, + saturation_fun_ref_level=saturation_fun_ref_level, + saturation_fun_beta=saturation_fun_beta + ) + + + def get_opportunities(self, transport_zones): + + transport_zones = transport_zones.get().drop("geometry", axis=1) + transport_zones["country"] = transport_zones["local_admin_unit_id"].str[0:2] + + tz_lau_ids = transport_zones["local_admin_unit_id"].unique().tolist() + + opportunities = self.opportunities.loc[tz_lau_ids, "n_opp"].reset_index() + + opportunities = pd.merge( + transport_zones[["transport_zone_id", "local_admin_unit_id", "country", "weight"]], + opportunities[["local_admin_unit_id", "n_opp"]], + on="local_admin_unit_id" + ) + + opportunities["n_opp"] = opportunities["weight"]*opportunities["n_opp"] + + opportunities = ( + opportunities[["transport_zone_id", "n_opp"]] + .rename({"transport_zone_id": "to"}, axis=1) + ) + + opportunities = pl.from_pandas(opportunities) + opportunities = self.enforce_opportunities_schema(opportunities) + + return opportunities + diff --git a/mobility/parsers/leisure_facilities_distribution.py b/mobility/parsers/leisure_facilities_distribution.py new file mode 100644 index 00000000..0f5663b5 --- /dev/null +++ b/mobility/parsers/leisure_facilities_distribution.py @@ -0,0 +1,172 @@ +import os +import json +import pathlib +import logging +import subprocess + +import geopandas as gpd +from shapely.geometry import shape, Polygon, MultiPolygon + +from mobility.file_asset import FileAsset +from mobility.parsers.local_admin_units import LocalAdminUnits +from mobility.study_area import StudyArea +from mobility.parsers.osm import OSMData +from mobility.parsers.leisures_frequentation import LEISURE_MAPPING, LEISURE_FREQUENCY + + +class LeisureFacilitiesDistribution(FileAsset): + """ + Build a point layer of leisure facilities: + - OSM key=leisure, from Geofabrik extracts + - polygons converted to representative points + - private access removed + - some noisy values cleaned / remapped + - each facility assigned a frequency score + - stored as a Parquet GeoDataFrame in EPSG:3035 + """ + + def __init__(self) -> None: + inputs = {} + + cache_path = ( + pathlib.Path(os.environ["MOBILITY_PACKAGE_DATA_FOLDER"]) + / "osm" + / "leisures_points.parquet" + ) + + super().__init__(inputs, cache_path) + + def get_cached_asset(self) -> gpd.GeoDataFrame: + logging.info( + "Leisure facilities already prepared. Reusing the file: %s", + self.cache_path, + ) + gdf = gpd.read_parquet(self.cache_path) + gdf = gdf.set_crs(3035) + return gdf + + def create_and_get_asset(self) -> gpd.GeoDataFrame: + gdf = self._prepare_leisure_facilities() + gdf.to_parquet(self.cache_path, index=False) + return gdf + + def _prepare_leisure_facilities(self) -> gpd.GeoDataFrame: + + admin_units = LocalAdminUnits().get() + admin_units_ids = admin_units["local_admin_unit_id"].tolist() + + study_area = StudyArea(admin_units_ids, radius=0) + + pbf_path = OSMData( + study_area, + object_type="nwr", + key="leisure", + geofabrik_extract_date="240101", + split_local_admin_units=False, + ).get() + + out_seq_leisure = pbf_path.with_name("leisures.geojsonseq") + + subprocess.run( + [ + "osmium", + "export", + str(pbf_path), + "--overwrite", + "--geometry-types=polygon,multipolygon,point", + "-f", + "geojsonseq", + "-o", + str(out_seq_leisure), + ], + check=True, + ) + + rows = [] + + # Read GeoJSONSeq and convert all geometries to points + with open(out_seq_leisure, "r", encoding="utf-8") as f: + for line in f: + s = line.lstrip("\x1e").strip() + if not s.startswith("{"): + continue + + try: + obj = json.loads(s) + except json.JSONDecodeError: + continue + + props = obj.get("properties", obj) + leisure = props.get("leisure") + if leisure is None: + continue + + geom = obj.get("geometry") + if geom is None: + continue + + g = shape(geom) + if isinstance(g, (Polygon, MultiPolygon)): + g = g.representative_point() + + rows.append( + { + "leisure": leisure, + "access": props.get("access"), + "geometry": g, + } + ) + + gdf = gpd.GeoDataFrame(rows, geometry="geometry", crs="EPSG:4326") + + # Normalize leisure values (lowercase, split composite values, apply mapping) + vals = gdf["leisure"].astype(str).str.strip().str.lower() + split_vals = vals.str.replace("+", ";", regex=False).str.split(";") + + cleaned = [] + for lst in split_vals: + cleaned_value = None + fallback = None + + for p in lst: + p = p.strip() + if not p: + continue + + if p in LEISURE_MAPPING: + cleaned_value = LEISURE_MAPPING[p] + break + + if fallback is None: + fallback = p + + if cleaned_value is None: + cleaned_value = fallback + + cleaned.append(cleaned_value) + + gdf["leisure_clean"] = cleaned + + # Drop items mapped to None or explicitly unwanted categories + gdf = gdf[~gdf["leisure_clean"].isna()].copy() + gdf = gdf[gdf["leisure_clean"] != "garden"] + gdf = gdf[gdf["leisure_clean"] != "picnic_table"] + gdf = gdf[gdf["leisure_clean"] != "common"] + gdf = gdf[gdf["leisure_clean"] != "schoolyard"] + # to complete if necessary + + + # Remove private places in general + gdf = gdf[gdf["access"] != "private"] + + # Special rule: keep only public swimming pools (access == "yes") + mask_pool = gdf["leisure_clean"] == "swimming_pool" + gdf = gdf[~mask_pool | (gdf["access"] == "yes")] + + # Assign frequency score + gdf["freq_score"] = gdf["leisure_clean"].map(LEISURE_FREQUENCY).fillna(2) + + # Reproject + gdf = gdf.to_crs(3035) + + return gdf diff --git a/mobility/parsers/leisures_frequentation.py b/mobility/parsers/leisures_frequentation.py new file mode 100644 index 00000000..f3363d74 --- /dev/null +++ b/mobility/parsers/leisures_frequentation.py @@ -0,0 +1,109 @@ +# Mapping of messy OSM leisure values to clean, canonical OSM leisure tags. +# Only values that require correction or special handling are included here. + +LEISURE_MAPPING = { + # French variants / lexical variants + "parc": "park", + "citypark": "park", + "centre_de_loisirs": "sports_centre", + "terrain de boules": "miniature_golf", + + # Spelling variations and common typos + "pingpong": "table_tennis_table", + "sport_center": "sports_centre", + "sport_centre": "sports_centre", + "sport_hall": "sports_hall", + "lake_bath": "bathing_place", + "flussbad": "bathing_place", + + # Combined values + "miniature_golf;trampoline_park;sports_centre": "miniature_golf", + "sports_centre;pitch": "sports_centre", + "sports_centre;jump_park": "sports_centre", + "swimming_pool;ice_rink": "swimming_pool", + "swimming_pool;sports_centre": "swimming_pool", + + # Escape game variants + "laser_game": "escape_game", + "lasertag": "escape_game", + "escape game": "escape_game", + + # Spa / sauna / wellness + "spa": "sauna", + "healthspa": "sauna", + "thalasso": "sauna", + "thalassotherapy": "sauna", + + # Out-of-scope or useless values → removed + "building": None, + "parking": None, + "forest": None, + "footway": None, + "construction": None, + "vacant": None, + "proposed": None, + "natural": None, + "garss": None, + "grass": None, + "dr": None, + "fes": None, + "check": None, + "spot": None, + "island": None, + "refuge": None, + "detention": None, + "hostel": None, + "tourism": None, + "boat": None, + "coworking_space": None, + "association": None, + "music": None, +} + + +# Frequency scores for clean leisure categories +# 4 = high footfall, 1 = low footfall +LEISURE_FREQUENCY = { + "stadium": 4, + "sports_centre": 4, + "sports_hall": 4, + "swimming_pool": 4, + "water_park": 4, + "amusement_arcade": 4, + "adult_gaming_centre": 4, + "escape_game": 4, + "theme_park": 4, + + "park": 3, + "garden": 3, + "playground": 3, + "miniature_golf": 3, + "golf_course": 3, + "marina": 3, + "fitness_centre": 3, + "fitness_station": 3, + "ice_rink": 3, + "trampoline_park": 3, + "bathing_place": 3, + + "recreation_ground": 2, + "picnic": 2, + "picnic_table": 2, + "bird_hide": 2, + "wildlife_hide": 2, + "table_tennis_table": 2, + "horse_riding": 2, + "fishing": 2, + "community_centre": 2, + "social_club": 2, + "summer_camp": 2, + "schoolyard": 2, + "bandstand": 2, + "dance": 2, + + "sauna": 1, + "turkish_bath": 1, + "common": 1, + "village_green": 1, + "yes": 1, +} diff --git a/mobility/r_utils/prepare_transport_zones.R b/mobility/r_utils/prepare_transport_zones.R index 15ba7b69..5280791d 100644 --- a/mobility/r_utils/prepare_transport_zones.R +++ b/mobility/r_utils/prepare_transport_zones.R @@ -54,7 +54,7 @@ convert_sf_to_geos_dt <- function(sf_df) { st_geometry(sf_df) <- "geometry" dt <- as.data.table(sf_df) - + if (nrow(dt) == 1){ dt <- dt[rep(1:.N, each = 2)] dt[, geometry := as_geos_geometry(geometry)] @@ -75,15 +75,15 @@ compute_cluster_internal_distance <- function(buildings_dt) { set.seed(0) from_buildings <- buildings_dt[, - .SD[sample(.N, 1000, replace = TRUE, prob = area)], - by = cluster, - .SDcols = c("X", "Y") + .SD[sample(.N, 1000, replace = TRUE, prob = area)], + by = cluster, + .SDcols = c("X", "Y") ] to_buildings <- buildings_dt[, - .SD[sample(.N, 1000, replace = TRUE, prob = area)], - by = cluster, - .SDcols = c("X", "Y") + .SD[sample(.N, 1000, replace = TRUE, prob = area)], + by = cluster, + .SDcols = c("X", "Y") ] distances <- cbind( @@ -169,7 +169,7 @@ clusters_to_voronoi <- function(lau_id, lau_geom, level_of_detail, buildings_are cluster_area <- buildings_dt[, list(area = sum(area)), by = cluster] clusters <- merge(clusters, cluster_area, by = "cluster") - + transport_zones <- clusters[, list( transport_zone_id = cluster, weight = area/sum(area), @@ -202,7 +202,7 @@ clusters_to_voronoi <- function(lau_id, lau_geom, level_of_detail, buildings_are st_as_sf(geos_geometry_n(clusters_geos, seq_len(geos_num_geometries(clusters_geos)))), st_as_sf(voronoi) ) - + transport_zones[, geometry := voronoi[unlist(v_order)]] # @@ -216,7 +216,7 @@ clusters_to_voronoi <- function(lau_id, lau_geom, level_of_detail, buildings_are # p <- p + coord_equal() # p # - + } else { @@ -263,7 +263,7 @@ transport_zones_buildings <- lapply( # future.seed = 0, FUN = function(lau_id) { - + info(logger, sprintf("Clustering buildings of LAU %s...", lau_id)) lau_geom <- study_area_dt[local_admin_unit_id == lau_id, geometry_wkb] @@ -280,7 +280,7 @@ transport_zones_buildings <- lapply( ) return(result) - + } ) @@ -309,4 +309,4 @@ transport_zones <- st_as_sf(transport_zones) # Write the result st_write(transport_zones, output_fp, delete_dsn = TRUE, quiet = TRUE) -write_parquet(clusters, clusters_fp) +write_parquet(clusters, clusters_fp) \ No newline at end of file