From 3d7627dc1562cf85d48e299567955e6140f37155 Mon Sep 17 00:00:00 2001
From: Asdominet34 <davide1.selva@mail.polimi.it>
Date: Thu, 9 Apr 2026 13:02:12 +0200
Subject: [PATCH 1/2] updates on build_hydro_profile and add_electricity

---
 config/config.default.yaml     |  38 ++-
 scripts/add_electricity.py     | 515 ++++++++++++++++++++++++++++++++-
 scripts/build_hydro_profile.py | 261 ++++++++++++++++-
 3 files changed, 775 insertions(+), 39 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 146429bbc1..0286a2813e 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -404,20 +404,28 @@ renewable:
     natura: true
     excluder_resolution: 100
     clip_p_max_pu: 0.01
-  hydro:
-    cutout: default
-    carriers:
-    - ror
-    - PHS
-    - hydro
-    PHS_max_hours: 6
-    hydro_max_hours: energy_capacity_totals_by_country
-    flatten_dispatch: false
-    flatten_dispatch_buffer: 0.2
-    clip_min_inflow: 1.0
-    eia_norm_year: false
-    eia_correct_by_capacity: false
-    eia_approximate_missing: false
+ hydro:
+  cutout: default
+  GloFAS_ERA5:
+   methods: GloFAS  #'GloFAS' for GloFAS-ERA5 methodology instead of ERA5 Runoff
+   q_min: 0.2 # qmin fraction with respect to qmax for RoR conversion
+   eff: 0.85 # mean efficiency for inflow conversion to MW
+   reservoir_subcluster_distance_threshold: 1.0
+   reservoir_subcluster_seasonal_weight: 0.5
+   reservoir_subcluster_inflow_level_weight: 1.75
+   reservoir_subcluster_max_hours_weight: 0.75
+  carriers:
+   - ror
+   - PHS
+   - hydro
+  PHS_max_hours: 6
+  hydro_max_hours: energy_capacity_totals_by_country
+  flatten_dispatch: false
+  flatten_dispatch_buffer: 0.2
+  clip_min_inflow: 1.0
+  eia_norm_year: false
+  eia_correct_by_capacity: false
+  eia_approximate_missing: false
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#conventional
 conventional:
@@ -1108,7 +1116,7 @@ clustering:
     hac_features:
     - wnd100m
     - influx_direct
-  exclude_carriers: []
+  exclude_carriers: ["hydro"] #necessary for GloFAS-ERA5 methodology
   consider_efficiency_classes: false
   aggregation_strategies:
     generators:
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 84fd226dfd..ecc09b5e55 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -59,6 +59,8 @@
 import pypsa
 import xarray as xr
 from pypsa.clustering.spatial import DEFAULT_ONE_PORT_STRATEGIES, normed_or_uniform
+from sklearn.preprocessing import StandardScaler
+from sklearn.cluster import AgglomerativeClustering
 
 from scripts._helpers import (
     PYPSA_V1,
@@ -730,6 +732,482 @@ def attach_existing_batteries(
     )
     logger.info(f"Added {len(batt)} existing battery storage units\n({stats} MW)")
 
+def _fill_reservoir_missing_values(df: pd.DataFrame) -> pd.DataFrame:
+    
+    #safety check deletable
+    
+    df = df.copy()
+
+    df["damheight_m"] = pd.to_numeric(df["damheight_m"], errors="coerce")
+    df["volume_mm3"] = pd.to_numeric(df["volume_mm3"], errors="coerce")
+    df["p_nom"] = pd.to_numeric(df["p_nom"], errors="coerce")
+
+    median_head_country = df.groupby("country")["damheight_m"].median()
+    median_vol_country = df.groupby("country")["volume_mm3"].median()
+
+    median_head_global = df["damheight_m"].median(skipna=True)
+    median_vol_global = df["volume_mm3"].median(skipna=True)
+
+    for country in df["country"].dropna().unique():
+        mask_country = df["country"] == country
+
+        mask_head_nan = mask_country & df["damheight_m"].isna()
+        head_value = median_head_country.get(country, median_head_global)
+        df.loc[mask_head_nan, "damheight_m"] = head_value
+
+        mask_vol_nan = mask_country & df["volume_mm3"].isna()
+        vol_value = median_vol_country.get(country, median_vol_global)
+        df.loc[mask_vol_nan, "volume_mm3"] = vol_value
+
+    df.loc[df["damheight_m"] <= 0, "damheight_m"] = 1.0
+    df = df.loc[df["p_nom"].notna() & (df["p_nom"] > 0)].copy()
+
+    return df          
+
+def _safe_corr(x: pd.Series, y: pd.Series) -> float:
+    x = pd.to_numeric(x, errors="coerce")
+    y = pd.to_numeric(y, errors="coerce")
+
+    valid = x.notna() & y.notna()
+    x_valid = x.loc[valid]
+    y_valid = y.loc[valid]
+
+    if len(x_valid) < 2:
+        return 0.0
+
+    x_std = x_valid.std()
+    y_std = y_valid.std()
+
+    if pd.isna(x_std) or pd.isna(y_std) or x_std == 0 or y_std == 0:
+        return 0.0
+
+    return float(x_valid.corr(y_valid))
+
+
+def _build_hydro_clustering_features(
+    inflow_plants: pd.DataFrame,
+    plants: pd.DataFrame,
+    seasonal_weight: float = 1.0,
+    inflow_level_weight: float = 1.0,
+    max_hours_weight: float = 1.0,
+) -> pd.DataFrame:
+    plants = plants.copy()
+    inflow_plants = inflow_plants.copy()
+
+    inflow_plants = inflow_plants.loc[:, plants.index]
+
+    #Metric A: seasonal shape similarity to mean bus shape
+    #Each plant inflow is divided by its own mean inflow, then correlation is computed against the average normalized inflow shape of the bus
+    plant_mean_inflow = inflow_plants.mean(axis=0).replace(0.0, np.nan)
+    inflow_norm = inflow_plants.divide(plant_mean_inflow, axis=1)
+
+    bus_mean_shape = inflow_norm.mean(axis=1)
+
+    A = pd.DataFrame(index=plants.index)
+    A["seasonal_corr"] = [
+        _safe_corr(inflow_norm[col], bus_mean_shape) for col in plants.index
+    ]
+
+    #Metric B: inflow level relative to nominal capacity
+    # mean(inflow) / p_nom [equivalent to sum(inflow) / (len(inflow) * p_nom)]
+    # ---------------------------------------------------------
+    n_steps = len(inflow_plants.index)
+
+    inflow_level = (
+        inflow_plants.sum(axis=0) / (n_steps * plants["p_nom"])
+    ).replace([np.inf, -np.inf], np.nan)
+
+    B = pd.DataFrame(index=plants.index)
+    B["inflow_level"] = inflow_level
+
+    #Metric C: max_hours aka the volume
+    C = pd.DataFrame(index=plants.index)
+    C["max_hours"] = pd.to_numeric(
+        plants["max_hours_plant"], errors="coerce"
+    ).replace([np.inf, -np.inf], np.nan)
+
+    #Fill NaNs metric-wise
+    for df_block in [A, B, C]:
+        for col in df_block.columns:
+            if df_block[col].isna().all():
+                df_block[col] = 0.0
+            elif df_block[col].isna().any():
+                df_block[col] = df_block[col].fillna(df_block[col].median(skipna=True))
+
+    # Standardize each metric separately
+    scaler_A = StandardScaler()
+    A_std = pd.DataFrame(
+        scaler_A.fit_transform(A),
+        index=A.index,
+        columns=A.columns,
+    )
+
+    scaler_B = StandardScaler()
+    B_std = pd.DataFrame(
+        scaler_B.fit_transform(B),
+        index=B.index,
+        columns=B.columns,
+    )
+
+    scaler_C = StandardScaler()
+    C_std = pd.DataFrame(
+        scaler_C.fit_transform(C),
+        index=C.index,
+        columns=C.columns,
+    )
+
+    # Apply weights
+    A_std *= seasonal_weight
+    B_std *= inflow_level_weight
+    C_std *= max_hours_weight
+
+    X = pd.concat([A_std, B_std, C_std], axis=1)
+
+    return X
+
+
+def _cluster_reservoirs_within_bus(
+    bus_plants: pd.DataFrame,
+    inflow_bus: pd.DataFrame,
+    distance_threshold: float = 2.0,
+    seasonal_weight: float = 1.0,
+    inflow_level_weight: float = 1.0,
+    max_hours_weight: float = 1.0,
+) -> pd.Series:
+    
+    #function that creates reservoir clusters to manage their aggregation within the bus. 
+    #when `distance_threshold = 0`, each reservoir remains separate; 
+    #when `distance_threshold = inf`, all reservoirs within the same bus are aggregated together (no clusters).
+    bus_plants = bus_plants.copy()
+
+    if len(bus_plants) == 1:
+        return pd.Series(0, index=bus_plants.index, name="subcluster")
+
+    X = _build_hydro_clustering_features(
+        inflow_plants=inflow_bus,
+        plants=bus_plants,
+        seasonal_weight=seasonal_weight,
+        inflow_level_weight=inflow_level_weight,
+        max_hours_weight=max_hours_weight,
+    )
+
+    if len(bus_plants) == 2:
+        dist = np.linalg.norm(X.iloc[0].values - X.iloc[1].values)
+        if dist <= distance_threshold:
+            labels = np.array([0, 0], dtype=int)
+        else:
+            labels = np.array([0, 1], dtype=int)
+    else:
+        model = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=distance_threshold,
+            linkage="ward",
+        )
+        labels = model.fit_predict(X.values)
+
+    return pd.Series(labels, index=bus_plants.index, name="subcluster")
+
+def _compute_reservoir_max_hours(
+    df: pd.DataFrame,
+    reservoir_energy_efficiency: float = 0.70,
+) -> pd.DataFrame:
+    #function that converts volume into the max_hours parameter
+    df = df.copy()
+
+    df["reservoir_mwh"] = (
+        df["volume_mm3"] * 1e6
+        * df["damheight_m"]
+        * 9.81
+        * reservoir_energy_efficiency
+        * 1e-3
+        / 3600
+    )
+
+    df["max_hours_plant"] = (
+        df["reservoir_mwh"] / df["p_nom"]
+    ).replace([np.inf, -np.inf], np.nan)
+
+    return df
+
+def attach_hydro_GloFAS(
+    n: pypsa.Network,
+    costs: pd.DataFrame,
+    ppl: pd.DataFrame,
+    profile_hydro: str,
+    carriers: list,
+    **params,
+):
+    """
+    Attach hydro generators and storage units to the network using
+    plant-level GloFAS/ERA5-based hydro profiles.
+
+    Parameters
+    ----------
+    n : pypsa.Network
+        The PyPSA network to attach the hydro units to.
+    costs : pd.DataFrame
+        DataFrame containing the cost data.
+    ppl : pd.DataFrame
+        DataFrame containing the power plant data.
+    profile_hydro : str
+        Path to the plant-level hydro profile dataset.
+    carriers : list
+        List of hydro energy carriers.
+    **params :
+        Additional parameters for the GloFAS hydro workflow.
+    """
+    add_missing_carriers(n, carriers)
+    add_co2_emissions(n, costs, carriers)
+
+    ror_agg = ppl.query('carrier == "ror"').copy()
+    phs_agg = ppl.query('carrier == "PHS"').copy()
+    res_plants_all = ppl.query('carrier == "hydro"').copy()
+
+    for df in [ror_agg, phs_agg, res_plants_all]:
+        df.columns = df.columns.str.lower()
+
+    ds = xr.open_dataset(profile_hydro)
+
+    #Load raw hydro mapping for ROR aggregation
+    ppls_raw = pd.read_csv(
+        r"/home/dselva/projects/pypsa-eur/resources/Europe_2019_GloFAS_SABER/powerplants_s_100.csv",  #####PATHHHH
+        index_col=0
+    )
+
+    ppls_raw.index = ppls_raw.index.astype(str)
+    ppls_raw.columns = ppls_raw.columns.str.strip()
+    ppls_raw = ppls_raw.rename(columns={"Capacity": "p_nom"})
+
+    ppls_raw["carrier"] = ppls_raw["Technology"].replace({
+        "Run-Of-River": "ror",
+        "Reservoir": "hydro",
+        "Pumped Storage": "PHS",
+    })
+
+    ppls_raw = ppls_raw[ppls_raw["carrier"].isin(["ror", "hydro", "PHS"])].copy()
+
+    ppls_raw["group_key_agg"] = (
+        ppls_raw["bus"].astype(str) + " " + ppls_raw["carrier"].astype(str)
+    )
+
+    ppls_raw["group_key_disagg"] = (
+        ppls_raw["bus"].astype(str)
+        + " "
+        + ppls_raw["carrier"].astype(str)
+        + " "
+        + ppls_raw.index.astype(str)
+    )
+
+    #ROR aggregation
+    p_max_pu_ror_agg = None
+
+    if not ror_agg.empty and "p_max_pu_ror" in ds:
+
+        p_max_pu_ror_plants = ds["p_max_pu_ror"].to_pandas()
+        p_max_pu_ror_plants.columns = p_max_pu_ror_plants.columns.astype(str)
+
+        common = ppls_raw.index.intersection(p_max_pu_ror_plants.columns)
+
+        p_max_pu_ror_plants = p_max_pu_ror_plants[common]
+
+        ror_plants = ppls_raw.loc[common].copy()
+        ror_plants = ror_plants[ror_plants["carrier"] == "ror"].copy()
+
+        ror_target_index = pd.Index(ror_agg.index.astype(str))
+
+        ror_plants["group_key"] = np.where(
+            ror_plants["group_key_disagg"].isin(ror_target_index),
+            ror_plants["group_key_disagg"],
+            ror_plants["group_key_agg"],
+        )
+
+        p_max_pu_ror_plants = p_max_pu_ror_plants[ror_plants.index]
+
+        inflow_equiv = p_max_pu_ror_plants.multiply(
+            ror_plants["p_nom"], axis=1
+        )
+
+        inflow_equiv_agg = (
+            inflow_equiv.T
+            .groupby(ror_plants["group_key"])
+            .sum()
+            .T
+        )
+
+        p_nom_ror_agg = ror_plants.groupby("group_key")["p_nom"].sum()
+
+        p_max_pu_ror_agg = (
+            inflow_equiv_agg.divide(p_nom_ror_agg, axis=1)
+            .clip(upper=1.0)
+        )
+
+        p_max_pu_ror_agg = p_max_pu_ror_agg.reindex(columns=ror_agg.index)
+
+    #Reservoir custom aggregation inside each final bus
+    res_agg = None
+    inflow_res_agg = None
+
+    if not res_plants_all.empty and "inflow_reservoir" in ds:
+
+        if "source_id" not in res_plants_all.columns:
+            raise ValueError(
+                "source_id column missing in ppl for disaggregated hydro workflow."
+            )
+
+        inflow_res_plants = ds["inflow_reservoir"].to_pandas()
+        inflow_res_plants.columns = inflow_res_plants.columns.astype(str)
+
+        res_plants_all["source_id"] = res_plants_all["source_id"].astype(str)
+
+        #fill missing data and compute plant-level max_hours
+        res_plants_all = _fill_reservoir_missing_values(res_plants_all)
+        res_plants_all = _compute_reservoir_max_hours(res_plants_all)
+
+        #Keep only plants for which inflow exists
+        common_mask = res_plants_all["source_id"].isin(inflow_res_plants.columns)
+        res_plants_use = res_plants_all.loc[common_mask].copy()
+
+        if res_plants_use.empty:
+            raise ValueError("No hydro reservoir plants matched inflow_reservoir columns.")
+
+        inflow_res_use = inflow_res_plants[res_plants_use["source_id"]].copy()
+        inflow_res_use.columns = res_plants_use.index
+
+        subgroup_records = []
+        
+        glofas_cfg = params.get("GloFAS_ERA5", {})
+        
+        distance_threshold = glofas_cfg.get("reservoir_subcluster_distance_threshold", 1.0)
+        seasonal_weight = glofas_cfg.get("reservoir_subcluster_seasonal_weight", 0.5)
+        inflow_level_weight = glofas_cfg.get("reservoir_subcluster_inflow_level_weight", 1.75)
+        max_hours_weight = glofas_cfg.get("reservoir_subcluster_max_hours_weight", 0.75)
+
+        #Cluster separately inside each final bus
+        for bus, bus_plants in res_plants_use.groupby("bus"):
+            inflow_bus = inflow_res_use[bus_plants.index]
+
+            labels = _cluster_reservoirs_within_bus(
+                bus_plants=bus_plants,
+                inflow_bus=inflow_bus,
+                distance_threshold=distance_threshold,
+                seasonal_weight=seasonal_weight,
+                inflow_level_weight=inflow_level_weight,
+                max_hours_weight=max_hours_weight,
+            )
+
+            bus_plants = bus_plants.copy()
+            bus_plants["subcluster"] = labels
+            bus_plants["group_key_final"] = [
+                f"{bus} hydro sg{int(lbl)}" for lbl in bus_plants["subcluster"]
+            ]
+
+            subgroup_records.append(bus_plants)
+
+        res_plants_grouped = pd.concat(subgroup_records, axis=0)
+
+        #Aggregate inflow by subgroup
+        inflow_res_agg = (
+            inflow_res_use.T
+            .groupby(res_plants_grouped["group_key_final"])
+            .sum()
+            .T
+        )
+
+        #Aggregate static parameters by subgroup
+        p_nom_agg = res_plants_grouped.groupby("group_key_final")["p_nom"].sum()
+        reservoir_energy_agg = res_plants_grouped.groupby("group_key_final")["reservoir_mwh"].sum()
+
+        hydro_max_hours = (
+            reservoir_energy_agg / p_nom_agg
+        ).replace([np.inf, -np.inf], np.nan).fillna(6.0)
+
+        bus_agg = res_plants_grouped.groupby("group_key_final")["bus"].first()
+        country_agg = res_plants_grouped.groupby("group_key_final")["country"].first()
+
+        damheight_agg = (
+            (res_plants_grouped["damheight_m"] * res_plants_grouped["p_nom"])
+            .groupby(res_plants_grouped["group_key_final"])
+            .sum()
+            .divide(p_nom_agg)
+        )
+
+        volume_agg = res_plants_grouped.groupby("group_key_final")["volume_mm3"].sum()
+        n_plants_agg = res_plants_grouped.groupby("group_key_final").size()
+        inflow_ann_agg = inflow_res_agg.sum(axis=0)
+
+        res_agg = pd.DataFrame({
+            "bus": bus_agg,
+            "country": country_agg,
+            "p_nom": p_nom_agg,
+            "damheight_m": damheight_agg,
+            "volume_mm3": volume_agg,
+            "max_hours": hydro_max_hours,
+            "n_plants_subgroup": n_plants_agg,
+            "annual_inflow_mw_sum": inflow_ann_agg,
+        })
+
+        inflow_res_agg = inflow_res_agg.reindex(columns=res_agg.index)
+
+    #Attach ROR
+    if "ror" in carriers and not ror_agg.empty and p_max_pu_ror_agg is not None:
+
+        n.add(
+            "Generator",
+            ror_agg.index,
+            carrier="ror",
+            bus=ror_agg["bus"],
+            p_nom=ror_agg["p_nom"],
+            efficiency=costs.at["ror", "efficiency"],
+            capital_cost=costs.at["ror", "capital_cost"],
+            weight=ror_agg["p_nom"],
+            p_max_pu=p_max_pu_ror_agg,
+        )
+
+    # 7) Attach Reservoir
+    if "hydro" in carriers and res_agg is not None and not res_agg.empty:
+
+        n.add(
+            "StorageUnit",
+            res_agg.index,
+            carrier="hydro",
+            bus=res_agg["bus"],
+            p_nom=res_agg["p_nom"],
+            max_hours=res_agg["max_hours"],
+            capital_cost=costs.at["hydro", "capital_cost"],
+            marginal_cost=costs.at["hydro", "marginal_cost"],
+            p_max_pu=1.0,
+            p_min_pu=0.0,
+            efficiency_dispatch=costs.at["hydro", "efficiency"],
+            efficiency_store=0.0,
+            cyclic_state_of_charge=True,
+            inflow=inflow_res_agg.loc[:, res_agg.index]
+            if inflow_res_agg is not None else None,
+        )
+
+    #Attach PHS
+    if "PHS" in carriers and not phs_agg.empty:
+
+        max_hours_default = params.get("PHS_max_hours", 6)
+
+        phs_agg["max_hours"] = (
+            phs_agg["max_hours"]
+            .replace([0, np.nan], max_hours_default)
+            if "max_hours" in phs_agg
+            else max_hours_default
+        )
+
+        n.add(
+            "StorageUnit",
+            phs_agg.index,
+            carrier="PHS",
+            bus=phs_agg["bus"],
+            p_nom=phs_agg["p_nom"],
+            max_hours=phs_agg["max_hours"],
+            capital_cost=costs.at["PHS", "capital_cost"],
+            efficiency_store=np.sqrt(costs.at["PHS", "efficiency"]),
+            efficiency_dispatch=np.sqrt(costs.at["PHS", "efficiency"]),
+            cyclic_state_of_charge=True,
+        )
 
 def attach_hydro(
     n: pypsa.Network,
@@ -1272,17 +1750,32 @@ def attach_stores(
     )
 
     if "hydro" in renewable_carriers:
-        p = params.renewable["hydro"]
-        carriers = p.pop("carriers", [])
-        attach_hydro(
-            n,
-            costs,
-            ppl,
-            snakemake.input.profile_hydro,
-            snakemake.input.hydro_capacities,
-            carriers,
-            **p,
-        )
+        p = params.renewable["hydro"].copy()
+
+        carriers = p.get("carriers", ["ror", "PHS", "hydro"])
+
+        glofas_cfg = p.get("GloFAS_ERA5", {})
+        use_glofas = glofas_cfg.get("methods", False)
+
+        if use_glofas:
+            attach_hydro_GloFAS(
+                n,
+                costs,
+                ppl,
+                snakemake.input.profile_hydro,
+                carriers,
+                **p,
+            )
+        else:
+            attach_hydro(
+                n,
+                costs,
+                ppl,
+                snakemake.input.profile_hydro,
+                snakemake.input.hydro_capacities,
+                carriers,
+                **p,
+            )
 
     estimate_renewable_caps = params.electricity["estimate_renewable_capacities"]
     if estimate_renewable_caps["enable"]:
diff --git a/scripts/build_hydro_profile.py b/scripts/build_hydro_profile.py
index 7a4e2d9461..bcf3a0b3ee 100644
--- a/scripts/build_hydro_profile.py
+++ b/scripts/build_hydro_profile.py
@@ -28,6 +28,7 @@
 import country_converter as coco
 import geopandas as gpd
 import pandas as pd
+import xarray as xr
 from numpy.polynomial import Polynomial
 
 from scripts._helpers import (
@@ -140,6 +141,126 @@ def approximate_missing_eia_stats(
     eia_stats_approximated = pd.DataFrame(eia_stats_approximated)
     return pd.concat([eia_stats, eia_stats_approximated]).sort_index()
 
+def add_qmax_turb(hydro_ppls, efficiency, q_min_factor=0.2):
+    
+    #function that adds the maximum and minimum turbinable flow rates for each hydro plant
+
+    hydro_ppls["technology"] = hydro_ppls["technology"].fillna("Run-Of-River")
+
+    #Compute country-level median heads
+    ror_mask = hydro_ppls["technology"] == "Run-Of-River"
+    res_mask = hydro_ppls["technology"].isin(["Reservoir", "Pumped Storage"])
+
+    median_ror_country = (
+        hydro_ppls[ror_mask]
+        .groupby("country")["damheight_m"]
+        .median()
+    )
+
+    median_res_country = (
+        hydro_ppls[res_mask]
+        .groupby("country")["damheight_m"]
+        .median()
+    )
+
+    # Global fallback medians
+    median_ror_global = hydro_ppls.loc[ror_mask, "damheight_m"].median(skipna=True)
+    median_res_global = hydro_ppls.loc[res_mask, "damheight_m"].median(skipna=True)
+
+    #Replace NaN heads using country medians for safety
+    mask_nan = hydro_ppls["damheight_m"].isna()
+
+    for country in hydro_ppls["country"].unique():
+
+        # ROR replacement
+        mask_country_ror = (
+            mask_nan &
+            (hydro_ppls["country"] == country) &
+            ror_mask
+        )
+
+        country_median = median_ror_country.get(country, median_ror_global)
+
+        hydro_ppls.loc[mask_country_ror, "damheight_m"] = country_median
+
+        # Reservoir + PHS replacement
+        mask_country_res = (
+            mask_nan &
+            (hydro_ppls["country"] == country) &
+            res_mask
+        )
+
+        country_median = median_res_country.get(country, median_res_global)
+
+        hydro_ppls.loc[mask_country_res, "damheight_m"] = country_median
+
+
+    #Replace zero or negative heads with non zero value
+    mask_zero_or_neg = hydro_ppls["damheight_m"] <= 0
+    hydro_ppls.loc[mask_zero_or_neg, "damheight_m"] = 1.0
+
+    #Compute qmax and qmin
+    hydro_ppls["qmax_turb"] = (
+        hydro_ppls["p_nom"] /
+        (9.81 * hydro_ppls["damheight_m"] * 1e-3 * efficiency)
+    )
+
+    hydro_ppls["qmin_turb"] = q_min_factor * hydro_ppls["qmax_turb"]
+
+    return hydro_ppls
+
+def ror_conversion(ror_plants, inflow_m3s):
+    
+    #function that converts the inflows from ROR plants into relative output (p_max_pu)
+    
+    plant_ids = ror_plants.index.astype(str).values
+    inflow_sel = inflow_m3s.sel(plant=plant_ids).load()
+
+    out = xr.zeros_like(inflow_sel)
+
+    for pid in plant_ids:
+
+        qmax = float(ror_plants.loc[pid, "qmax_turb"])
+        qmin = float(ror_plants.loc[pid, "qmin_turb"])
+
+        s = inflow_sel.sel(plant=pid)
+
+        denom = (qmax - qmin) if (qmax - qmin) != 0 else 1.0
+
+        p = xr.where(
+            s < qmin,
+            0.0,
+            xr.where(
+                s > qmax,
+                1.0,
+                (s - qmin) / denom
+            )
+        )
+
+        out.loc[dict(plant=pid)] = p
+
+    df = out.to_dataframe(name="p_max_pu").reset_index()
+    return df.pivot(index="time", columns="plant", values="p_max_pu")
+
+def reservoir_conversion(res_plants, inflow_m3s, eff):
+    
+    #function that converts the inflows of reservoir plants from flow rate to MW
+
+    plant_ids = res_plants.index.astype(str).values
+    inflow_sel = inflow_m3s.sel(plant=plant_ids).load()
+
+    converted = {}
+
+    for pid in plant_ids:
+
+        head = float(res_plants.loc[pid, "damheight_m"])
+
+        q = inflow_sel.sel(plant=pid).values
+
+        converted[pid] = q * 9.81 * head * eff * 1e-3
+
+    return pd.DataFrame(converted, index=inflow_sel.coords["time"].values)
+
 
 logger = logging.getLogger(__name__)
 
@@ -198,20 +319,134 @@ def approximate_missing_eia_stats(
     if norm_year:
         eia_stats.loc[years_in_time] = eia_stats.loc[norm_year]
     elif missing_years.any():
-        for year in missing_years:
-            eia_stats.loc[year] = eia_stats.median()
-
-    inflow = cutout.runoff(
-        shapes=country_shapes,
-        smooth=True,
-        lower_threshold_quantile=True,
-        normalize_using_yearly=eia_stats,
-    )
+        eia_stats.loc[missing_years] = eia_stats.median()
 
-    if full_years_available:
-        inflow = inflow.sel(time=time)
+    # Hydro inflow calculation method
+    config_hydro = snakemake.config["renewable"]["hydro"]
+    GloFAS_ERA5 = config_hydro.get("GloFAS_ERA5", {})
+    method = GloFAS_ERA5.get("methods", False)
+
+    if method == "GloFAS":
+        eff = GloFAS_ERA5.get("eff", 0.85)
+        q_min = GloFAS_ERA5.get("q_min", 0.2)
+
+        # 1️⃣ Load powerplants
+        ppls = pd.read_csv(
+            r"/home/dselva/projects/pypsa-eur/resources/Europe_2019_GloFAS_SABER/powerplants_s_100.csv",  #PATHHHHHHH
+            index_col=0
+        )
+
+        ppls.columns = ppls.columns.str.strip().str.lower()
+        ppls.index = ppls.index.astype(str)
+
+        hydro_ppls = ppls[ppls["fueltype"].str.lower() == "hydro"].copy()
+        hydro_ppls["technology"] = hydro_ppls["technology"].fillna("Run-Of-River")
+        hydro_ppls = hydro_ppls.rename(columns={"capacity": "p_nom"})
+        hydro_ppls = add_qmax_turb(hydro_ppls, eff, q_min)
+
+        #Load inflow
+        inflow_saber = xr.open_dataarray(
+            r"/home/dselva/projects/pypsa-eur/data/Europe_inflow_2019_SABER_regional.nc" #PATHHHHHH
+        )
+
+        inflow_saber = inflow_saber.assign_coords(
+            plant=inflow_saber.coords["plant"].astype(str)
+        )
+
+        if "clip_min_inflow" in params_hydro:
+            inflow_saber = inflow_saber.where(
+                inflow_saber > params_hydro["clip_min_inflow"], 0
+            )
+
+        inflow_index = pd.Index(inflow_saber.coords["plant"].values.astype(str))
+        hydro_index = hydro_ppls.index.astype(str)
+
+        # Intersection only
+        common_index = inflow_index.intersection(hydro_index)
+
+        print("Common plants:", len(common_index))  #logger
+
+        # Check that all hydro plants have inflow
+        missing_inflow = hydro_index.difference(inflow_index)
+        if len(missing_inflow) > 0:
+            raise ValueError(
+                f"{len(missing_inflow)} hydro plants have no inflow data."
+            )
+
+        # Now restrict inflow to matched plants only
+        inflow_matched = inflow_saber.sel(plant=common_index)
+        hydro_matched = hydro_ppls.loc[common_index]
+
+        #Split technologies
+        ror_plants = hydro_matched[
+            hydro_matched["technology"] == "Run-Of-River"
+        ]
+
+        reservoir_plants = hydro_matched[
+            hydro_matched["technology"].isin(["Reservoir", "Pumped Storage"])
+        ]
+
+        inflow_ror = inflow_matched.sel(plant=ror_plants.index)
+        inflow_res = inflow_matched.sel(plant=reservoir_plants.index)
+
+        #Convert inflow
+        if not ror_plants.empty:
+            p_max_pu_ror = ror_conversion(
+                ror_plants,
+                inflow_ror,
+            )
+        else:
+            p_max_pu_ror = pd.DataFrame()
+        
+        if not reservoir_plants.empty:
+            inflow_res_mw = reservoir_conversion(
+                reservoir_plants,
+                inflow_res,
+                eff=eff
+            )
+        else:
+            inflow_res_mw = pd.DataFrame()
+
+        outputs = {}
+
+        if not p_max_pu_ror.empty:
+            outputs["p_max_pu_ror"] = xr.DataArray(
+                p_max_pu_ror.values,
+                dims=("time", "plant"),
+                coords={
+                    "time": p_max_pu_ror.index,
+                    "plant": p_max_pu_ror.columns.values
+                }
+            )
+        
+        if not inflow_res_mw.empty:
+            outputs["inflow_reservoir"] = xr.DataArray(
+                inflow_res_mw.values,
+                dims=("time", "plant"),
+                coords={
+                    "time": inflow_res_mw.index,
+                    "plant": inflow_res_mw.columns.values,
+                }
+            )
+
+        inflow = xr.Dataset(outputs)
+
+        print("Final dataset dims:", inflow.dims)
+        
+    else:
+
+        #Default PyPSA-Eur behaviour
+        inflow = cutout.runoff(
+            shapes=country_shapes,
+            smooth=True,
+            lower_threshold_quantile=True,
+            normalize_using_yearly=eia_stats,
+        )
+
+        if full_years_available:
+            inflow = inflow.sel(time=time)
 
-    if "clip_min_inflow" in params_hydro:
-        inflow = inflow.where(inflow > params_hydro["clip_min_inflow"], 0)
+        if "clip_min_inflow" in params_hydro:
+            inflow = inflow.where(inflow > params_hydro["clip_min_inflow"], 0)
 
     inflow.to_netcdf(snakemake.output.profile)

From 1daef2fc359b249acd2e2988973b4cf5826fd991 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 11:04:46 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/add_electricity.py     | 168 +++++++++++++++++----------------
 scripts/build_hydro_profile.py | 108 +++++++--------------
 2 files changed, 120 insertions(+), 156 deletions(-)

diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index ecc09b5e55..48f7dfe86f 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -59,8 +59,8 @@
 import pypsa
 import xarray as xr
 from pypsa.clustering.spatial import DEFAULT_ONE_PORT_STRATEGIES, normed_or_uniform
-from sklearn.preprocessing import StandardScaler
 from sklearn.cluster import AgglomerativeClustering
+from sklearn.preprocessing import StandardScaler
 
 from scripts._helpers import (
     PYPSA_V1,
@@ -732,10 +732,10 @@ def attach_existing_batteries(
     )
     logger.info(f"Added {len(batt)} existing battery storage units\n({stats} MW)")
 
+
 def _fill_reservoir_missing_values(df: pd.DataFrame) -> pd.DataFrame:
-    
-    #safety check deletable
-    
+    # safety check deletable
+
     df = df.copy()
 
     df["damheight_m"] = pd.to_numeric(df["damheight_m"], errors="coerce")
@@ -762,7 +762,8 @@ def _fill_reservoir_missing_values(df: pd.DataFrame) -> pd.DataFrame:
     df.loc[df["damheight_m"] <= 0, "damheight_m"] = 1.0
     df = df.loc[df["p_nom"].notna() & (df["p_nom"] > 0)].copy()
 
-    return df          
+    return df
+
 
 def _safe_corr(x: pd.Series, y: pd.Series) -> float:
     x = pd.to_numeric(x, errors="coerce")
@@ -796,8 +797,8 @@ def _build_hydro_clustering_features(
 
     inflow_plants = inflow_plants.loc[:, plants.index]
 
-    #Metric A: seasonal shape similarity to mean bus shape
-    #Each plant inflow is divided by its own mean inflow, then correlation is computed against the average normalized inflow shape of the bus
+    # Metric A: seasonal shape similarity to mean bus shape
+    # Each plant inflow is divided by its own mean inflow, then correlation is computed against the average normalized inflow shape of the bus
     plant_mean_inflow = inflow_plants.mean(axis=0).replace(0.0, np.nan)
     inflow_norm = inflow_plants.divide(plant_mean_inflow, axis=1)
 
@@ -808,25 +809,25 @@ def _build_hydro_clustering_features(
         _safe_corr(inflow_norm[col], bus_mean_shape) for col in plants.index
     ]
 
-    #Metric B: inflow level relative to nominal capacity
+    # Metric B: inflow level relative to nominal capacity
     # mean(inflow) / p_nom [equivalent to sum(inflow) / (len(inflow) * p_nom)]
     # ---------------------------------------------------------
     n_steps = len(inflow_plants.index)
 
-    inflow_level = (
-        inflow_plants.sum(axis=0) / (n_steps * plants["p_nom"])
-    ).replace([np.inf, -np.inf], np.nan)
+    inflow_level = (inflow_plants.sum(axis=0) / (n_steps * plants["p_nom"])).replace(
+        [np.inf, -np.inf], np.nan
+    )
 
     B = pd.DataFrame(index=plants.index)
     B["inflow_level"] = inflow_level
 
-    #Metric C: max_hours aka the volume
+    # Metric C: max_hours aka the volume
     C = pd.DataFrame(index=plants.index)
-    C["max_hours"] = pd.to_numeric(
-        plants["max_hours_plant"], errors="coerce"
-    ).replace([np.inf, -np.inf], np.nan)
+    C["max_hours"] = pd.to_numeric(plants["max_hours_plant"], errors="coerce").replace(
+        [np.inf, -np.inf], np.nan
+    )
 
-    #Fill NaNs metric-wise
+    # Fill NaNs metric-wise
     for df_block in [A, B, C]:
         for col in df_block.columns:
             if df_block[col].isna().all():
@@ -874,10 +875,9 @@ def _cluster_reservoirs_within_bus(
     inflow_level_weight: float = 1.0,
     max_hours_weight: float = 1.0,
 ) -> pd.Series:
-    
-    #function that creates reservoir clusters to manage their aggregation within the bus. 
-    #when `distance_threshold = 0`, each reservoir remains separate; 
-    #when `distance_threshold = inf`, all reservoirs within the same bus are aggregated together (no clusters).
+    # function that creates reservoir clusters to manage their aggregation within the bus.
+    # when `distance_threshold = 0`, each reservoir remains separate;
+    # when `distance_threshold = inf`, all reservoirs within the same bus are aggregated together (no clusters).
     bus_plants = bus_plants.copy()
 
     if len(bus_plants) == 1:
@@ -907,15 +907,17 @@ def _cluster_reservoirs_within_bus(
 
     return pd.Series(labels, index=bus_plants.index, name="subcluster")
 
+
 def _compute_reservoir_max_hours(
     df: pd.DataFrame,
     reservoir_energy_efficiency: float = 0.70,
 ) -> pd.DataFrame:
-    #function that converts volume into the max_hours parameter
+    # function that converts volume into the max_hours parameter
     df = df.copy()
 
     df["reservoir_mwh"] = (
-        df["volume_mm3"] * 1e6
+        df["volume_mm3"]
+        * 1e6
         * df["damheight_m"]
         * 9.81
         * reservoir_energy_efficiency
@@ -923,12 +925,13 @@ def _compute_reservoir_max_hours(
         / 3600
     )
 
-    df["max_hours_plant"] = (
-        df["reservoir_mwh"] / df["p_nom"]
-    ).replace([np.inf, -np.inf], np.nan)
+    df["max_hours_plant"] = (df["reservoir_mwh"] / df["p_nom"]).replace(
+        [np.inf, -np.inf], np.nan
+    )
 
     return df
 
+
 def attach_hydro_GloFAS(
     n: pypsa.Network,
     costs: pd.DataFrame,
@@ -968,21 +971,23 @@ def attach_hydro_GloFAS(
 
     ds = xr.open_dataset(profile_hydro)
 
-    #Load raw hydro mapping for ROR aggregation
+    # Load raw hydro mapping for ROR aggregation
     ppls_raw = pd.read_csv(
         r"/home/dselva/projects/pypsa-eur/resources/Europe_2019_GloFAS_SABER/powerplants_s_100.csv",  #####PATHHHH
-        index_col=0
+        index_col=0,
     )
 
     ppls_raw.index = ppls_raw.index.astype(str)
     ppls_raw.columns = ppls_raw.columns.str.strip()
     ppls_raw = ppls_raw.rename(columns={"Capacity": "p_nom"})
 
-    ppls_raw["carrier"] = ppls_raw["Technology"].replace({
-        "Run-Of-River": "ror",
-        "Reservoir": "hydro",
-        "Pumped Storage": "PHS",
-    })
+    ppls_raw["carrier"] = ppls_raw["Technology"].replace(
+        {
+            "Run-Of-River": "ror",
+            "Reservoir": "hydro",
+            "Pumped Storage": "PHS",
+        }
+    )
 
     ppls_raw = ppls_raw[ppls_raw["carrier"].isin(["ror", "hydro", "PHS"])].copy()
 
@@ -998,11 +1003,10 @@ def attach_hydro_GloFAS(
         + ppls_raw.index.astype(str)
     )
 
-    #ROR aggregation
+    # ROR aggregation
     p_max_pu_ror_agg = None
 
     if not ror_agg.empty and "p_max_pu_ror" in ds:
-
         p_max_pu_ror_plants = ds["p_max_pu_ror"].to_pandas()
         p_max_pu_ror_plants.columns = p_max_pu_ror_plants.columns.astype(str)
 
@@ -1023,32 +1027,23 @@ def attach_hydro_GloFAS(
 
         p_max_pu_ror_plants = p_max_pu_ror_plants[ror_plants.index]
 
-        inflow_equiv = p_max_pu_ror_plants.multiply(
-            ror_plants["p_nom"], axis=1
-        )
+        inflow_equiv = p_max_pu_ror_plants.multiply(ror_plants["p_nom"], axis=1)
 
-        inflow_equiv_agg = (
-            inflow_equiv.T
-            .groupby(ror_plants["group_key"])
-            .sum()
-            .T
-        )
+        inflow_equiv_agg = inflow_equiv.T.groupby(ror_plants["group_key"]).sum().T
 
         p_nom_ror_agg = ror_plants.groupby("group_key")["p_nom"].sum()
 
-        p_max_pu_ror_agg = (
-            inflow_equiv_agg.divide(p_nom_ror_agg, axis=1)
-            .clip(upper=1.0)
+        p_max_pu_ror_agg = inflow_equiv_agg.divide(p_nom_ror_agg, axis=1).clip(
+            upper=1.0
         )
 
         p_max_pu_ror_agg = p_max_pu_ror_agg.reindex(columns=ror_agg.index)
 
-    #Reservoir custom aggregation inside each final bus
+    # Reservoir custom aggregation inside each final bus
     res_agg = None
     inflow_res_agg = None
 
     if not res_plants_all.empty and "inflow_reservoir" in ds:
-
         if "source_id" not in res_plants_all.columns:
             raise ValueError(
                 "source_id column missing in ppl for disaggregated hydro workflow."
@@ -1059,30 +1054,36 @@ def attach_hydro_GloFAS(
 
         res_plants_all["source_id"] = res_plants_all["source_id"].astype(str)
 
-        #fill missing data and compute plant-level max_hours
+        # fill missing data and compute plant-level max_hours
         res_plants_all = _fill_reservoir_missing_values(res_plants_all)
         res_plants_all = _compute_reservoir_max_hours(res_plants_all)
 
-        #Keep only plants for which inflow exists
+        # Keep only plants for which inflow exists
         common_mask = res_plants_all["source_id"].isin(inflow_res_plants.columns)
         res_plants_use = res_plants_all.loc[common_mask].copy()
 
         if res_plants_use.empty:
-            raise ValueError("No hydro reservoir plants matched inflow_reservoir columns.")
+            raise ValueError(
+                "No hydro reservoir plants matched inflow_reservoir columns."
+            )
 
         inflow_res_use = inflow_res_plants[res_plants_use["source_id"]].copy()
         inflow_res_use.columns = res_plants_use.index
 
         subgroup_records = []
-        
+
         glofas_cfg = params.get("GloFAS_ERA5", {})
-        
-        distance_threshold = glofas_cfg.get("reservoir_subcluster_distance_threshold", 1.0)
+
+        distance_threshold = glofas_cfg.get(
+            "reservoir_subcluster_distance_threshold", 1.0
+        )
         seasonal_weight = glofas_cfg.get("reservoir_subcluster_seasonal_weight", 0.5)
-        inflow_level_weight = glofas_cfg.get("reservoir_subcluster_inflow_level_weight", 1.75)
+        inflow_level_weight = glofas_cfg.get(
+            "reservoir_subcluster_inflow_level_weight", 1.75
+        )
         max_hours_weight = glofas_cfg.get("reservoir_subcluster_max_hours_weight", 0.75)
 
-        #Cluster separately inside each final bus
+        # Cluster separately inside each final bus
         for bus, bus_plants in res_plants_use.groupby("bus"):
             inflow_bus = inflow_res_use[bus_plants.index]
 
@@ -1105,21 +1106,22 @@ def attach_hydro_GloFAS(
 
         res_plants_grouped = pd.concat(subgroup_records, axis=0)
 
-        #Aggregate inflow by subgroup
+        # Aggregate inflow by subgroup
         inflow_res_agg = (
-            inflow_res_use.T
-            .groupby(res_plants_grouped["group_key_final"])
-            .sum()
-            .T
+            inflow_res_use.T.groupby(res_plants_grouped["group_key_final"]).sum().T
         )
 
-        #Aggregate static parameters by subgroup
+        # Aggregate static parameters by subgroup
         p_nom_agg = res_plants_grouped.groupby("group_key_final")["p_nom"].sum()
-        reservoir_energy_agg = res_plants_grouped.groupby("group_key_final")["reservoir_mwh"].sum()
+        reservoir_energy_agg = res_plants_grouped.groupby("group_key_final")[
+            "reservoir_mwh"
+        ].sum()
 
         hydro_max_hours = (
-            reservoir_energy_agg / p_nom_agg
-        ).replace([np.inf, -np.inf], np.nan).fillna(6.0)
+            (reservoir_energy_agg / p_nom_agg)
+            .replace([np.inf, -np.inf], np.nan)
+            .fillna(6.0)
+        )
 
         bus_agg = res_plants_grouped.groupby("group_key_final")["bus"].first()
         country_agg = res_plants_grouped.groupby("group_key_final")["country"].first()
@@ -1135,22 +1137,23 @@ def attach_hydro_GloFAS(
         n_plants_agg = res_plants_grouped.groupby("group_key_final").size()
         inflow_ann_agg = inflow_res_agg.sum(axis=0)
 
-        res_agg = pd.DataFrame({
-            "bus": bus_agg,
-            "country": country_agg,
-            "p_nom": p_nom_agg,
-            "damheight_m": damheight_agg,
-            "volume_mm3": volume_agg,
-            "max_hours": hydro_max_hours,
-            "n_plants_subgroup": n_plants_agg,
-            "annual_inflow_mw_sum": inflow_ann_agg,
-        })
+        res_agg = pd.DataFrame(
+            {
+                "bus": bus_agg,
+                "country": country_agg,
+                "p_nom": p_nom_agg,
+                "damheight_m": damheight_agg,
+                "volume_mm3": volume_agg,
+                "max_hours": hydro_max_hours,
+                "n_plants_subgroup": n_plants_agg,
+                "annual_inflow_mw_sum": inflow_ann_agg,
+            }
+        )
 
         inflow_res_agg = inflow_res_agg.reindex(columns=res_agg.index)
 
-    #Attach ROR
+    # Attach ROR
     if "ror" in carriers and not ror_agg.empty and p_max_pu_ror_agg is not None:
-
         n.add(
             "Generator",
             ror_agg.index,
@@ -1165,7 +1168,6 @@ def attach_hydro_GloFAS(
 
     # 7) Attach Reservoir
     if "hydro" in carriers and res_agg is not None and not res_agg.empty:
-
         n.add(
             "StorageUnit",
             res_agg.index,
@@ -1181,17 +1183,16 @@ def attach_hydro_GloFAS(
             efficiency_store=0.0,
             cyclic_state_of_charge=True,
             inflow=inflow_res_agg.loc[:, res_agg.index]
-            if inflow_res_agg is not None else None,
+            if inflow_res_agg is not None
+            else None,
         )
 
-    #Attach PHS
+    # Attach PHS
     if "PHS" in carriers and not phs_agg.empty:
-
         max_hours_default = params.get("PHS_max_hours", 6)
 
         phs_agg["max_hours"] = (
-            phs_agg["max_hours"]
-            .replace([0, np.nan], max_hours_default)
+            phs_agg["max_hours"].replace([0, np.nan], max_hours_default)
             if "max_hours" in phs_agg
             else max_hours_default
         )
@@ -1209,6 +1210,7 @@ def attach_hydro_GloFAS(
             cyclic_state_of_charge=True,
         )
 
+
 def attach_hydro(
     n: pypsa.Network,
     costs: pd.DataFrame,
diff --git a/scripts/build_hydro_profile.py b/scripts/build_hydro_profile.py
index bcf3a0b3ee..850b0f588f 100644
--- a/scripts/build_hydro_profile.py
+++ b/scripts/build_hydro_profile.py
@@ -141,85 +141,65 @@ def approximate_missing_eia_stats(
     eia_stats_approximated = pd.DataFrame(eia_stats_approximated)
     return pd.concat([eia_stats, eia_stats_approximated]).sort_index()
 
+
 def add_qmax_turb(hydro_ppls, efficiency, q_min_factor=0.2):
-    
-    #function that adds the maximum and minimum turbinable flow rates for each hydro plant
+    # function that adds the maximum and minimum turbinable flow rates for each hydro plant
 
     hydro_ppls["technology"] = hydro_ppls["technology"].fillna("Run-Of-River")
 
-    #Compute country-level median heads
+    # Compute country-level median heads
     ror_mask = hydro_ppls["technology"] == "Run-Of-River"
     res_mask = hydro_ppls["technology"].isin(["Reservoir", "Pumped Storage"])
 
-    median_ror_country = (
-        hydro_ppls[ror_mask]
-        .groupby("country")["damheight_m"]
-        .median()
-    )
+    median_ror_country = hydro_ppls[ror_mask].groupby("country")["damheight_m"].median()
 
-    median_res_country = (
-        hydro_ppls[res_mask]
-        .groupby("country")["damheight_m"]
-        .median()
-    )
+    median_res_country = hydro_ppls[res_mask].groupby("country")["damheight_m"].median()
 
     # Global fallback medians
     median_ror_global = hydro_ppls.loc[ror_mask, "damheight_m"].median(skipna=True)
     median_res_global = hydro_ppls.loc[res_mask, "damheight_m"].median(skipna=True)
 
-    #Replace NaN heads using country medians for safety
+    # Replace NaN heads using country medians for safety
     mask_nan = hydro_ppls["damheight_m"].isna()
 
     for country in hydro_ppls["country"].unique():
-
         # ROR replacement
-        mask_country_ror = (
-            mask_nan &
-            (hydro_ppls["country"] == country) &
-            ror_mask
-        )
+        mask_country_ror = mask_nan & (hydro_ppls["country"] == country) & ror_mask
 
         country_median = median_ror_country.get(country, median_ror_global)
 
         hydro_ppls.loc[mask_country_ror, "damheight_m"] = country_median
 
         # Reservoir + PHS replacement
-        mask_country_res = (
-            mask_nan &
-            (hydro_ppls["country"] == country) &
-            res_mask
-        )
+        mask_country_res = mask_nan & (hydro_ppls["country"] == country) & res_mask
 
         country_median = median_res_country.get(country, median_res_global)
 
         hydro_ppls.loc[mask_country_res, "damheight_m"] = country_median
 
-
-    #Replace zero or negative heads with non zero value
+    # Replace zero or negative heads with non zero value
     mask_zero_or_neg = hydro_ppls["damheight_m"] <= 0
     hydro_ppls.loc[mask_zero_or_neg, "damheight_m"] = 1.0
 
-    #Compute qmax and qmin
-    hydro_ppls["qmax_turb"] = (
-        hydro_ppls["p_nom"] /
-        (9.81 * hydro_ppls["damheight_m"] * 1e-3 * efficiency)
+    # Compute qmax and qmin
+    hydro_ppls["qmax_turb"] = hydro_ppls["p_nom"] / (
+        9.81 * hydro_ppls["damheight_m"] * 1e-3 * efficiency
     )
 
     hydro_ppls["qmin_turb"] = q_min_factor * hydro_ppls["qmax_turb"]
 
     return hydro_ppls
 
+
 def ror_conversion(ror_plants, inflow_m3s):
-    
-    #function that converts the inflows from ROR plants into relative output (p_max_pu)
-    
+    # function that converts the inflows from ROR plants into relative output (p_max_pu)
+
     plant_ids = ror_plants.index.astype(str).values
     inflow_sel = inflow_m3s.sel(plant=plant_ids).load()
 
     out = xr.zeros_like(inflow_sel)
 
     for pid in plant_ids:
-
         qmax = float(ror_plants.loc[pid, "qmax_turb"])
         qmin = float(ror_plants.loc[pid, "qmin_turb"])
 
@@ -227,24 +207,16 @@ def ror_conversion(ror_plants, inflow_m3s):
 
         denom = (qmax - qmin) if (qmax - qmin) != 0 else 1.0
 
-        p = xr.where(
-            s < qmin,
-            0.0,
-            xr.where(
-                s > qmax,
-                1.0,
-                (s - qmin) / denom
-            )
-        )
+        p = xr.where(s < qmin, 0.0, xr.where(s > qmax, 1.0, (s - qmin) / denom))
 
         out.loc[dict(plant=pid)] = p
 
     df = out.to_dataframe(name="p_max_pu").reset_index()
     return df.pivot(index="time", columns="plant", values="p_max_pu")
 
+
 def reservoir_conversion(res_plants, inflow_m3s, eff):
-    
-    #function that converts the inflows of reservoir plants from flow rate to MW
+    # function that converts the inflows of reservoir plants from flow rate to MW
 
     plant_ids = res_plants.index.astype(str).values
     inflow_sel = inflow_m3s.sel(plant=plant_ids).load()
@@ -252,7 +224,6 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
     converted = {}
 
     for pid in plant_ids:
-
         head = float(res_plants.loc[pid, "damheight_m"])
 
         q = inflow_sel.sel(plant=pid).values
@@ -332,8 +303,8 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
 
         # 1️⃣ Load powerplants
         ppls = pd.read_csv(
-            r"/home/dselva/projects/pypsa-eur/resources/Europe_2019_GloFAS_SABER/powerplants_s_100.csv",  #PATHHHHHHH
-            index_col=0
+            r"/home/dselva/projects/pypsa-eur/resources/Europe_2019_GloFAS_SABER/powerplants_s_100.csv",  # PATHHHHHHH
+            index_col=0,
         )
 
         ppls.columns = ppls.columns.str.strip().str.lower()
@@ -344,9 +315,9 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
         hydro_ppls = hydro_ppls.rename(columns={"capacity": "p_nom"})
         hydro_ppls = add_qmax_turb(hydro_ppls, eff, q_min)
 
-        #Load inflow
+        # Load inflow
         inflow_saber = xr.open_dataarray(
-            r"/home/dselva/projects/pypsa-eur/data/Europe_inflow_2019_SABER_regional.nc" #PATHHHHHH
+            r"/home/dselva/projects/pypsa-eur/data/Europe_inflow_2019_SABER_regional.nc"  # PATHHHHHH
         )
 
         inflow_saber = inflow_saber.assign_coords(
@@ -364,23 +335,19 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
         # Intersection only
         common_index = inflow_index.intersection(hydro_index)
 
-        print("Common plants:", len(common_index))  #logger
+        print("Common plants:", len(common_index))  # logger
 
         # Check that all hydro plants have inflow
         missing_inflow = hydro_index.difference(inflow_index)
         if len(missing_inflow) > 0:
-            raise ValueError(
-                f"{len(missing_inflow)} hydro plants have no inflow data."
-            )
+            raise ValueError(f"{len(missing_inflow)} hydro plants have no inflow data.")
 
         # Now restrict inflow to matched plants only
         inflow_matched = inflow_saber.sel(plant=common_index)
         hydro_matched = hydro_ppls.loc[common_index]
 
-        #Split technologies
-        ror_plants = hydro_matched[
-            hydro_matched["technology"] == "Run-Of-River"
-        ]
+        # Split technologies
+        ror_plants = hydro_matched[hydro_matched["technology"] == "Run-Of-River"]
 
         reservoir_plants = hydro_matched[
             hydro_matched["technology"].isin(["Reservoir", "Pumped Storage"])
@@ -389,7 +356,7 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
         inflow_ror = inflow_matched.sel(plant=ror_plants.index)
         inflow_res = inflow_matched.sel(plant=reservoir_plants.index)
 
-        #Convert inflow
+        # Convert inflow
         if not ror_plants.empty:
             p_max_pu_ror = ror_conversion(
                 ror_plants,
@@ -397,13 +364,9 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
             )
         else:
             p_max_pu_ror = pd.DataFrame()
-        
+
         if not reservoir_plants.empty:
-            inflow_res_mw = reservoir_conversion(
-                reservoir_plants,
-                inflow_res,
-                eff=eff
-            )
+            inflow_res_mw = reservoir_conversion(reservoir_plants, inflow_res, eff=eff)
         else:
             inflow_res_mw = pd.DataFrame()
 
@@ -415,10 +378,10 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
                 dims=("time", "plant"),
                 coords={
                     "time": p_max_pu_ror.index,
-                    "plant": p_max_pu_ror.columns.values
-                }
+                    "plant": p_max_pu_ror.columns.values,
+                },
             )
-        
+
         if not inflow_res_mw.empty:
             outputs["inflow_reservoir"] = xr.DataArray(
                 inflow_res_mw.values,
@@ -426,16 +389,15 @@ def reservoir_conversion(res_plants, inflow_m3s, eff):
                 coords={
                     "time": inflow_res_mw.index,
                     "plant": inflow_res_mw.columns.values,
-                }
+                },
             )
 
         inflow = xr.Dataset(outputs)
 
         print("Final dataset dims:", inflow.dims)
-        
-    else:
 
-        #Default PyPSA-Eur behaviour
+    else:
+        # Default PyPSA-Eur behaviour
         inflow = cutout.runoff(
             shapes=country_shapes,
             smooth=True,