From a76005cdd05c5d92139028b0cf559ff5f6479fec Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Wed, 18 Feb 2026 09:36:56 +0100 Subject: [PATCH 01/26] Add NetworkModelResult and supporting classes for network data handling --- src/modelskill/model/network.py | 146 ++++++++++++++++++++++++++ src/modelskill/timeseries/__init__.py | 5 +- src/modelskill/timeseries/_coords.py | 14 +++ src/modelskill/timeseries/_network.py | 89 ++++++++++++++++ src/modelskill/timeseries/_point.py | 19 +++- src/modelskill/types.py | 4 + 6 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 src/modelskill/model/network.py create mode 100644 src/modelskill/timeseries/_network.py diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py new file mode 100644 index 000000000..8c32884ee --- /dev/null +++ b/src/modelskill/model/network.py @@ -0,0 +1,146 @@ +from __future__ import annotations +from typing import Optional, Sequence, Any +import numpy as np + +import xarray as xr +import pandas as pd + +from ..obs import Observation +from ..types import PointType +from ..quantity import Quantity +from ..timeseries import TimeSeries, _parse_network_input + + +class NetworkModelResult(TimeSeries): + """Model result for a single point location. + + Construct a PointModelResult from a 0d data source: + dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series + or xarray.Dataset/DataArray + + Parameters + ---------- + data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray + filename (.dfs0 or .nc) or object with the data + name : Optional[str], optional + The name of the model result, + by default None (will be set to file name or item name) + x : float, optional + first coordinate of point position, inferred from data if not given, else None + y : float, optional + second coordinate of point position, inferred from data if not given, else None + z : float, optional + third coordinate of point position, inferred from data if not given, else None + item : str | int | None, optional + If multiple items/arrays are present in the input an item + must be given (as either an index or a string), by default None + quantity : Quantity, optional + Model quantity, for MIKE files this is inferred from the EUM information + aux_items : Optional[list[int | str]], optional + Auxiliary items, by default None + """ + + def __init__( + self, + data: PointType, + *, + name: Optional[str] = None, + item: str | int | None = None, + quantity: Optional[Quantity] = None, + aux_items: Optional[Sequence[int | str]] = None, + ) -> None: + if not self._is_input_validated(data): + data = _parse_network_input( + data, + name=name, + item=item, + quantity=quantity, + aux_items=aux_items, + ) + + assert isinstance(data, xr.Dataset) + + data_var = str(list(data.data_vars)[0]) + data[data_var].attrs["kind"] = "model" + super().__init__(data=data) + + def interp_time( + self, observation: Observation, **kwargs: Any + ) -> NetworkModelResult: + """ + Interpolate model result to the time of the observation + + wrapper around xarray.Dataset.interp() + + Parameters + ---------- + observation : Observation + The observation to interpolate to + **kwargs + + Additional keyword arguments passed to xarray.interp + + Returns + ------- + PointModelResult + Interpolated model result + """ + ds = self.align(observation, **kwargs) + return NetworkModelResult(ds) + + def align( + self, + observation: Observation, + *, + max_gap: float | None = None, + **kwargs: Any, + ) -> xr.Dataset: + new_time = observation.time + + dati = self.data.dropna("time").interp( + time=new_time, assume_sorted=True, **kwargs + ) + + pmr = NetworkModelResult(dati) + if max_gap is not None: + pmr = pmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap) + return pmr.data + + def _remove_model_gaps( + self, + mod_index: pd.DatetimeIndex, + max_gap: float | None = None, + ) -> NetworkModelResult: + """Remove model gaps longer than max_gap from TimeSeries""" + max_gap_delta = pd.Timedelta(max_gap, "s") + valid_times = self._get_valid_times(mod_index, max_gap_delta) + ds = self.data.sel(time=valid_times) + return NetworkModelResult(ds) + + def _get_valid_times( + self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta + ) -> pd.DatetimeIndex: + """Used only by _remove_model_gaps""" + obs_index = self.time + # init dataframe of available timesteps and their index + df = pd.DataFrame(index=mod_index) + df["idx"] = range(len(df)) + + # for query times get available left and right index of source times + df = ( + df.reindex(df.index.union(obs_index)) + .interpolate(method="time", limit_area="inside") + .reindex(obs_index) + .dropna() + ) + df["idxa"] = np.floor(df.idx).astype(int) + df["idxb"] = np.ceil(df.idx).astype(int) + + # time of left and right source times and time delta + df["ta"] = mod_index[df.idxa] + df["tb"] = mod_index[df.idxb] + df["dt"] = df.tb - df.ta + + # valid query times where time delta is less than max_gap + valid_idx = df.dt <= max_gap + return df[valid_idx].index diff --git a/src/modelskill/timeseries/__init__.py b/src/modelskill/timeseries/__init__.py index cb1ca3ffc..df74a0846 100644 --- a/src/modelskill/timeseries/__init__.py +++ b/src/modelskill/timeseries/__init__.py @@ -1,9 +1,12 @@ from ._timeseries import TimeSeries -from ._point import _parse_xyz_point_input +from ._point import _parse_xyz_point_input, _parse_network_node_input from ._track import _parse_track_input +from ._network import _parse_network_input __all__ = [ "TimeSeries", "_parse_xyz_point_input", "_parse_track_input", + "_parse_network_input", + "_parse_network_node_input", ] diff --git a/src/modelskill/timeseries/_coords.py b/src/modelskill/timeseries/_coords.py index 899dd5e63..48076dd84 100644 --- a/src/modelskill/timeseries/_coords.py +++ b/src/modelskill/timeseries/_coords.py @@ -17,3 +17,17 @@ def __init__( @property def as_dict(self) -> dict: return {"x": self.x, "y": self.y, "z": self.z} + + +class NetworkCoords: + def __init__( + self, + node: Optional[int] = None, + boundary: Optional[str] = None, + ): + self.node = node if node is not None else np.nan + self.boundary = boundary if boundary is not None else np.nan + + @property + def as_dict(self) -> dict: + return {"node": self.node, "boundary": self.boundary} diff --git a/src/modelskill/timeseries/_network.py b/src/modelskill/timeseries/_network.py new file mode 100644 index 000000000..4000fd04b --- /dev/null +++ b/src/modelskill/timeseries/_network.py @@ -0,0 +1,89 @@ +from __future__ import annotations +from collections.abc import Hashable +from dataclasses import dataclass +from typing import get_args, Optional, List, Sequence +import xarray as xr + +from ..types import GeometryType, NetworkType +from ..quantity import Quantity +from ..utils import _get_name +from ._timeseries import _validate_data_var_name + + +@dataclass +class NetworkItem: + node: int + values: str + aux: list[str] + + def __post_init__(self): + # Check for duplicates + if len(set(self.all)) != len(self.all): + raise ValueError(f"Duplicate items! {self.all}") + + @property + def all(self) -> List[str]: + return [self.node, self.values] + self.aux + + +def _parse_network_items( + items: Sequence[Hashable], + node_item: int + | str + | None, # the item in the data that signals to the node -> is it necessary in our case? + item: int | str | None, + aux_items: Optional[Sequence[int | str]] = None, +) -> NetworkItem: + item = _get_name(item, valid_names=items) + node_item = _get_name(node_item, valid_names=items) + if isinstance(aux_items, (str, int)): + aux_items = [aux_items] + aux_items_str = [_get_name(i, valid_names=items) for i in aux_items or []] + + return NetworkItem(x=node_item, values=item, aux=aux_items_str) + + +def _parse_network_input( + data: NetworkType, # Which types should this accept? Res1D would make mikeio1d a dependency... + name: Optional[str], + item: str | int | None, + node_item: str | int | None, + quantity: Optional[Quantity], + aux_items: Optional[Sequence[int | str]] = None, +) -> xr.Dataset: + assert isinstance( + data, get_args(NetworkType) + ), "Could not construct NetworkModelResult from provided data." + + data.index.name = "time" + data = data.reorder_levels(["quantity", "node"], axis=1).stack() + data = data.reset_index().set_index("time") + + # parse items + valid_items = list(data.columns) + sel_items = _parse_network_items( + valid_items, node_item=node_item, item=item, aux_items=aux_items + ) + name = name or sel_items.values + name = _validate_data_var_name(name) + + # parse quantity + model_quantity = Quantity.undefined() if quantity is None else quantity + + # select relevant items and convert to xr.Dataset + data = data[sel_items.all] + + ds = data.to_xarray() + ds = ds.rename({sel_items.node: "node"}) + ds = ds.dropna(dim="time", subset=["node"]) + ds[name].attrs["long_name"] = model_quantity.name + ds[name].attrs["units"] = model_quantity.unit + + if "node" in ds: + ds = ds.set_coords(node_item) + for aux_item in sel_items.aux: + ds[aux_item].attrs["kind"] = "aux" + + ds.attrs["gtype"] = str(GeometryType.NETWORK) + assert isinstance(ds, xr.Dataset) + return ds diff --git a/src/modelskill/timeseries/_point.py b/src/modelskill/timeseries/_point.py index 5e2a0c86a..5ce18e5d0 100644 --- a/src/modelskill/timeseries/_point.py +++ b/src/modelskill/timeseries/_point.py @@ -13,7 +13,7 @@ from ..quantity import Quantity from ..utils import _get_name from ._timeseries import _validate_data_var_name -from ._coords import XYZCoords +from ._coords import XYZCoords, NetworkCoords @dataclass @@ -144,7 +144,7 @@ def _convert_to_dataset( def _include_coords( ds: xr.Dataset, *, - coords: Optional[XYZCoords] = None, + coords: Optional[XYZCoords | NetworkCoords] = None, ) -> xr.Dataset: ds = ds.copy() if coords is not None: @@ -237,7 +237,7 @@ def _parse_point_input( quantity: Optional[Quantity], aux_items: Optional[Sequence[int | str]], *, - coords: XYZCoords, + coords: XYZCoords | NetworkCoords, ) -> xr.Dataset: """Convert accepted input data to an xr.Dataset""" @@ -265,3 +265,16 @@ def _parse_xyz_point_input( coords = XYZCoords(x, y, z) ds = _parse_point_input(data, name, item, quantity, aux_items, coords=coords) return ds + + +def _parse_network_node_input( + data: PointType, + name: Optional[str], + item: str | int | None, + quantity: Optional[Quantity], + node: Optional[int], + aux_items: Optional[Sequence[int | str]], +) -> xr.Dataset: + coords = NetworkCoords(node=node) + ds = _parse_point_input(data, name, item, quantity, aux_items, coords=coords) + return ds diff --git a/src/modelskill/types.py b/src/modelskill/types.py index 0ba3dcd63..205786abf 100644 --- a/src/modelskill/types.py +++ b/src/modelskill/types.py @@ -14,6 +14,7 @@ class GeometryType(Enum): TRACK = "track" UNSTRUCTURED = "unstructured" GRID = "grid" + NETWORK = "network" def __str__(self) -> str: return self.name.lower() @@ -37,6 +38,8 @@ def from_string(s: str) -> "GeometryType": >>> GeometryType.from_string("grid") + >>> GeometryType.from_string("network") + """ try: @@ -87,6 +90,7 @@ def from_string(s: str) -> "GeometryType": xr.DataArray, ] TrackType = Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset] +NetworkType = Union[pd.DataFrame] @dataclass(frozen=True) From e4d9362d9b36efb9bfdf23a7936d6f86b3098a2c Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Wed, 18 Feb 2026 09:47:04 +0100 Subject: [PATCH 02/26] Simplify parsing logic --- src/modelskill/timeseries/_network.py | 54 +++------------------------ 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/src/modelskill/timeseries/_network.py b/src/modelskill/timeseries/_network.py index 4000fd04b..c7d415725 100644 --- a/src/modelskill/timeseries/_network.py +++ b/src/modelskill/timeseries/_network.py @@ -1,53 +1,15 @@ from __future__ import annotations -from collections.abc import Hashable -from dataclasses import dataclass -from typing import get_args, Optional, List, Sequence +from typing import get_args, Optional, Sequence import xarray as xr from ..types import GeometryType, NetworkType from ..quantity import Quantity -from ..utils import _get_name -from ._timeseries import _validate_data_var_name - - -@dataclass -class NetworkItem: - node: int - values: str - aux: list[str] - - def __post_init__(self): - # Check for duplicates - if len(set(self.all)) != len(self.all): - raise ValueError(f"Duplicate items! {self.all}") - - @property - def all(self) -> List[str]: - return [self.node, self.values] + self.aux - - -def _parse_network_items( - items: Sequence[Hashable], - node_item: int - | str - | None, # the item in the data that signals to the node -> is it necessary in our case? - item: int | str | None, - aux_items: Optional[Sequence[int | str]] = None, -) -> NetworkItem: - item = _get_name(item, valid_names=items) - node_item = _get_name(node_item, valid_names=items) - if isinstance(aux_items, (str, int)): - aux_items = [aux_items] - aux_items_str = [_get_name(i, valid_names=items) for i in aux_items or []] - - return NetworkItem(x=node_item, values=item, aux=aux_items_str) def _parse_network_input( data: NetworkType, # Which types should this accept? Res1D would make mikeio1d a dependency... name: Optional[str], item: str | int | None, - node_item: str | int | None, quantity: Optional[Quantity], aux_items: Optional[Sequence[int | str]] = None, ) -> xr.Dataset: @@ -61,27 +23,21 @@ def _parse_network_input( # parse items valid_items = list(data.columns) - sel_items = _parse_network_items( - valid_items, node_item=node_item, item=item, aux_items=aux_items - ) - name = name or sel_items.values - name = _validate_data_var_name(name) # parse quantity model_quantity = Quantity.undefined() if quantity is None else quantity # select relevant items and convert to xr.Dataset - data = data[sel_items.all] + relevant_items = valid_items + aux_items + data = data[relevant_items] ds = data.to_xarray() - ds = ds.rename({sel_items.node: "node"}) ds = ds.dropna(dim="time", subset=["node"]) ds[name].attrs["long_name"] = model_quantity.name ds[name].attrs["units"] = model_quantity.unit - if "node" in ds: - ds = ds.set_coords(node_item) - for aux_item in sel_items.aux: + ds = ds.set_coords("node") + for aux_item in aux_items: ds[aux_item].attrs["kind"] = "aux" ds.attrs["gtype"] = str(GeometryType.NETWORK) From d0c0f923e3ba492821b3af1843d2127ffa7fd728 Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 09:36:58 +0100 Subject: [PATCH 03/26] feat: Add NetworkModelResult for handling network data in model skill --- notebooks/Collection_systems_res1d.ipynb | 644 +++++++++++++++++++++++ src/modelskill/comparison/_comparison.py | 8 +- src/modelskill/model/__init__.py | 3 + src/modelskill/model/network.py | 232 ++++---- src/modelskill/timeseries/_network.py | 8 +- src/modelskill/types.py | 2 +- 6 files changed, 786 insertions(+), 111 deletions(-) create mode 100644 notebooks/Collection_systems_res1d.ipynb diff --git a/notebooks/Collection_systems_res1d.ipynb b/notebooks/Collection_systems_res1d.ipynb new file mode 100644 index 000000000..a0e1b9dd1 --- /dev/null +++ b/notebooks/Collection_systems_res1d.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "fdb0d0b9", + "metadata": {}, + "outputs": [], + "source": [ + "from mikeio1d.experimental import NetworkMapper\n", + "\n", + "from modelskill.model.network import NetworkModelResult" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7b198071", + "metadata": {}, + "outputs": [], + "source": [ + "res1d_path = \"../tests/testdata/network.res1d\"\n", + "mapper = NetworkMapper(res1d_path)\n", + "network = mapper.map_network()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "006d7b16", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 231kB\n",
+       "Dimensions:     (time: 110, node: 259)\n",
+       "Coordinates:\n",
+       "  * time        (time) datetime64[ns] 880B 1994-08-07T16:35:00 ... 1994-08-07...\n",
+       "  * node        (node) int64 2kB 0 1 2 3 4 5 6 7 ... 252 253 254 255 256 257 258\n",
+       "Data variables:\n",
+       "    Discharge   (time, node) float32 114kB nan nan 5.72e-06 ... nan 0.01692 0.0\n",
+       "    WaterLevel  (time, node) float32 114kB 195.4 194.7 nan ... 188.5 nan nan
" + ], + "text/plain": [ + " Size: 231kB\n", + "Dimensions: (time: 110, node: 259)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 880B 1994-08-07T16:35:00 ... 1994-08-07...\n", + " * node (node) int64 2kB 0 1 2 3 4 5 6 7 ... 252 253 254 255 256 257 258\n", + "Data variables:\n", + " Discharge (time, node) float32 114kB nan nan 5.72e-06 ... nan 0.01692 0.0\n", + " WaterLevel (time, node) float32 114kB 195.4 194.7 nan ... 188.5 nan nan" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "network.to_dataset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "modelskill", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/modelskill/comparison/_comparison.py b/src/modelskill/comparison/_comparison.py index f8fedef75..1c3cb5bf4 100644 --- a/src/modelskill/comparison/_comparison.py +++ b/src/modelskill/comparison/_comparison.py @@ -25,7 +25,7 @@ from .. import Quantity from ..types import GeometryType from ..obs import PointObservation, TrackObservation -from ..model import PointModelResult, TrackModelResult +from ..model import PointModelResult, TrackModelResult, NetworkModelResult from ..timeseries._timeseries import _validate_data_var_name from ._comparer_plotter import ComparerPlotter from ..metrics import _parse_metric @@ -444,7 +444,7 @@ class Comparer: def __init__( self, matched_data: xr.Dataset, - raw_mod_data: dict[str, PointModelResult | TrackModelResult] | None = None, + raw_mod_data: dict[str, PointModelResult | TrackModelResult | NetworkModelResult] | None = None, ) -> None: self.data = _parse_dataset(matched_data) self.raw_mod_data = ( @@ -464,7 +464,7 @@ def __init__( @staticmethod def from_matched_data( data: xr.Dataset | pd.DataFrame, - raw_mod_data: Optional[Dict[str, PointModelResult | TrackModelResult]] = None, + raw_mod_data: Optional[Dict[str, PointModelResult | TrackModelResult | NetworkModelResult]] = None, obs_item: str | int | None = None, mod_items: Optional[Iterable[str | int]] = None, aux_items: Optional[Iterable[str | int]] = None, @@ -1258,7 +1258,7 @@ def load(filename: Union[str, Path]) -> "Comparer": return Comparer(matched_data=data) if data.gtype == "point": - raw_mod_data: Dict[str, PointModelResult | TrackModelResult] = {} + raw_mod_data: Dict[str, PointModelResult | TrackModelResult | NetworkModelResult] = {} for var in data.data_vars: var_name = str(var) diff --git a/src/modelskill/model/__init__.py b/src/modelskill/model/__init__.py index b0ba468ae..a7a09e34d 100644 --- a/src/modelskill/model/__init__.py +++ b/src/modelskill/model/__init__.py @@ -9,6 +9,7 @@ * SpatialField (extractable) - [`GridModelResult`](`modelskill.GridModelResult`) - a spatial field from a dfs2/nc file or a Xarray Dataset - [`DfsuModelResult`](`modelskill.DfsuModelResult`) - a spatial field from a dfsu file + - [`NetworkModelResult`](`modelskill.NetworkModelResult`) - a network field from xarray Dataset with time and node coordinates A model result can be created by explicitly invoking one of the above classes or using the [`model_result()`](`modelskill.model_result`) function which will return the appropriate type based on the input data (if possible). """ @@ -20,6 +21,7 @@ from .track import TrackModelResult from .dfsu import DfsuModelResult from .grid import GridModelResult +from .network import NetworkModelResult from .dummy import DummyModelResult __all__ = [ @@ -27,6 +29,7 @@ "TrackModelResult", "DfsuModelResult", "GridModelResult", + "NetworkModelResult", "model_result", "DummyModelResult", ] diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py index 8c32884ee..674d8e530 100644 --- a/src/modelskill/model/network.py +++ b/src/modelskill/model/network.py @@ -1,146 +1,178 @@ from __future__ import annotations -from typing import Optional, Sequence, Any +from typing import Optional, Sequence import numpy as np import xarray as xr import pandas as pd -from ..obs import Observation -from ..types import PointType +from ._base import SpatialField, _validate_overlap_in_time, SelectedItems +from ..obs import PointObservation from ..quantity import Quantity -from ..timeseries import TimeSeries, _parse_network_input +from .point import PointModelResult -class NetworkModelResult(TimeSeries): - """Model result for a single point location. +class NetworkModelResult(SpatialField): + """Model result for network data with time and node dimensions. - Construct a PointModelResult from a 0d data source: - dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series - or xarray.Dataset/DataArray + Construct a NetworkModelResult from an xarray.Dataset with time and node coordinates + and arbitrary number of data variables. Users must provide exact node IDs (integers) + when creating observations - no spatial interpolation is performed. Parameters ---------- - data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray - filename (.dfs0 or .nc) or object with the data + data : xr.Dataset + xarray.Dataset with time and node coordinates name : Optional[str], optional The name of the model result, - by default None (will be set to file name or item name) - x : float, optional - first coordinate of point position, inferred from data if not given, else None - y : float, optional - second coordinate of point position, inferred from data if not given, else None - z : float, optional - third coordinate of point position, inferred from data if not given, else None + by default None (will be set to first data variable name) item : str | int | None, optional If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None quantity : Quantity, optional - Model quantity, for MIKE files this is inferred from the EUM information + Model quantity aux_items : Optional[list[int | str]], optional Auxiliary items, by default None + + Notes + ----- + For point observations, specify the node ID as the x-coordinate (integer). + No spatial interpolation between nodes is performed. """ def __init__( self, - data: PointType, + data: xr.Dataset, *, name: Optional[str] = None, item: str | int | None = None, quantity: Optional[Quantity] = None, aux_items: Optional[Sequence[int | str]] = None, ) -> None: - if not self._is_input_validated(data): - data = _parse_network_input( - data, - name=name, - item=item, - quantity=quantity, - aux_items=aux_items, - ) - - assert isinstance(data, xr.Dataset) - - data_var = str(list(data.data_vars)[0]) - data[data_var].attrs["kind"] = "model" - super().__init__(data=data) - - def interp_time( - self, observation: Observation, **kwargs: Any - ) -> NetworkModelResult: - """ - Interpolate model result to the time of the observation + assert isinstance( + data, xr.Dataset + ), "NetworkModelResult requires xarray.Dataset" + assert "time" in data.dims, "Dataset must have time dimension" + assert "node" in data.dims, "Dataset must have node dimension" + assert len(data.data_vars) > 0, "Dataset must have at least one data variable" + + sel_items = SelectedItems.parse( + list(data.data_vars), item=item, aux_items=aux_items + ) + name = name or sel_items.values + + self.data: xr.Dataset = data[sel_items.all] + self.name = name + self.sel_items = sel_items + + # use long_name and units from data if not provided + if quantity is None: + da = self.data[sel_items.values] + quantity = Quantity.from_cf_attrs(da.attrs) + + self.quantity = quantity + + # Mark data variables as model data + data_var = sel_items.values + self.data[data_var].attrs["kind"] = "model" + + def __repr__(self) -> str: + res = [] + res.append(f"<{self.__class__.__name__}>: {self.name}") + res.append(f"Time: {self.time[0]} - {self.time[-1]}") + res.append(f"Nodes: {len(self.data.node)} nodes") + res.append(f"Quantity: {self.quantity}") + if len(self.sel_items.aux) > 0: + res.append(f"Auxiliary variables: {', '.join(self.sel_items.aux)}") + return "\n".join(res) + + @property + def time(self) -> pd.DatetimeIndex: + return pd.DatetimeIndex(self.data.time) + + def extract( + self, + observation: PointObservation, + spatial_method: Optional[str] = None, + ) -> PointModelResult: + """Extract ModelResult at exact node locations - wrapper around xarray.Dataset.interp() + Note: this method is typically not called directly, but through the match() method. + The observation's x-coordinate must specify the exact node ID (integer). Parameters ---------- - observation : Observation - The observation to interpolate to - **kwargs - - Additional keyword arguments passed to xarray.interp + observation : + observation where x-coordinate specifies the node ID (as integer) + spatial_method : Optional[str], optional + Not used for network extraction (exact node selection) Returns ------- PointModelResult - Interpolated model result + extracted modelresult """ - ds = self.align(observation, **kwargs) - return NetworkModelResult(ds) + _validate_overlap_in_time(self.time, observation) + if isinstance(observation, PointObservation): + return self._extract_point(observation, spatial_method) + else: + raise NotImplementedError( + f"NetworkModelResult only supports PointObservation extraction, not {type(observation).__name__}." + ) - def align( - self, - observation: Observation, - *, - max_gap: float | None = None, - **kwargs: Any, - ) -> xr.Dataset: - new_time = observation.time + def _extract_point( + self, observation: PointObservation, spatial_method: Optional[str] = None + ) -> PointModelResult: + """Extract point from network data using exact node ID. - dati = self.data.dropna("time").interp( - time=new_time, assume_sorted=True, **kwargs - ) + The observation's x-coordinate should specify the node ID (as integer). + No spatial interpolation is performed - the exact node is selected. + """ + node_id = observation.x + if node_id is None: + raise ValueError( + f"PointObservation '{observation.name}' must specify node ID in x-coordinate." + ) - pmr = NetworkModelResult(dati) - if max_gap is not None: - pmr = pmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap) - return pmr.data + # Convert to integer node ID + try: + node_id = int(node_id) + except (ValueError, TypeError): + raise ValueError( + f"Node ID (x-coordinate) must be an integer, got {node_id} for observation '{observation.name}'" + ) - def _remove_model_gaps( - self, - mod_index: pd.DatetimeIndex, - max_gap: float | None = None, - ) -> NetworkModelResult: - """Remove model gaps longer than max_gap from TimeSeries""" - max_gap_delta = pd.Timedelta(max_gap, "s") - valid_times = self._get_valid_times(mod_index, max_gap_delta) - ds = self.data.sel(time=valid_times) - return NetworkModelResult(ds) - - def _get_valid_times( - self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta - ) -> pd.DatetimeIndex: - """Used only by _remove_model_gaps""" - obs_index = self.time - # init dataframe of available timesteps and their index - df = pd.DataFrame(index=mod_index) - df["idx"] = range(len(df)) - - # for query times get available left and right index of source times - df = ( - df.reindex(df.index.union(obs_index)) - .interpolate(method="time", limit_area="inside") - .reindex(obs_index) - .dropna() - ) - df["idxa"] = np.floor(df.idx).astype(int) - df["idxb"] = np.ceil(df.idx).astype(int) + # Check if node exists in the network + if node_id not in self.data.node.values: + available_nodes = list(self.data.node.values) + raise ValueError( + f"Node ID {node_id} not found in network. Available nodes: {available_nodes[:10]}{'...' if len(available_nodes) > 10 else ''}" + ) - # time of left and right source times and time delta - df["ta"] = mod_index[df.idxa] - df["tb"] = mod_index[df.idxb] - df["dt"] = df.tb - df.ta + # Extract data at the specified node + ds = self.data.sel(node=node_id) - # valid query times where time delta is less than max_gap - valid_idx = df.dt <= max_gap - return df[valid_idx].index + # Convert to dataframe and create PointModelResult + df = ds.to_dataframe().dropna() + if len(df) == 0: + raise ValueError( + f"No data available for node {node_id} in NetworkModelResult '{self.name}'" + ) + df = df.rename(columns={self.sel_items.values: self.name}) + + # Use observation coordinates or extract from data if available + x_coord = observation.x if observation.x is not None else node_id + y_coord = ( + observation.y + if observation.y is not None + else (float(ds.y.item()) if hasattr(ds, "y") and ds.y.size == 1 else np.nan) + ) + + return PointModelResult( + data=df, + x=x_coord, + y=y_coord, + item=self.name, + name=self.name, + quantity=self.quantity, + aux_items=self.sel_items.aux, + ) diff --git a/src/modelskill/timeseries/_network.py b/src/modelskill/timeseries/_network.py index c7d415725..3c91802bc 100644 --- a/src/modelskill/timeseries/_network.py +++ b/src/modelskill/timeseries/_network.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import get_args, Optional, Sequence +from typing import Optional, Sequence import xarray as xr from ..types import GeometryType, NetworkType @@ -14,13 +14,9 @@ def _parse_network_input( aux_items: Optional[Sequence[int | str]] = None, ) -> xr.Dataset: assert isinstance( - data, get_args(NetworkType) + data, NetworkType ), "Could not construct NetworkModelResult from provided data." - data.index.name = "time" - data = data.reorder_levels(["quantity", "node"], axis=1).stack() - data = data.reset_index().set_index("time") - # parse items valid_items = list(data.columns) diff --git a/src/modelskill/types.py b/src/modelskill/types.py index 205786abf..ed70d53c3 100644 --- a/src/modelskill/types.py +++ b/src/modelskill/types.py @@ -90,7 +90,7 @@ def from_string(s: str) -> "GeometryType": xr.DataArray, ] TrackType = Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset] -NetworkType = Union[pd.DataFrame] +NetworkType = pd.DataFrame @dataclass(frozen=True) From 91d45131bd55999da7ea2173ee1e54e0666a907f Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 09:43:20 +0100 Subject: [PATCH 04/26] small fixes --- src/modelskill/model/network.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py index 674d8e530..3c5822c95 100644 --- a/src/modelskill/model/network.py +++ b/src/modelskill/model/network.py @@ -87,12 +87,11 @@ def __repr__(self) -> str: @property def time(self) -> pd.DatetimeIndex: - return pd.DatetimeIndex(self.data.time) + return self.data.time def extract( self, observation: PointObservation, - spatial_method: Optional[str] = None, ) -> PointModelResult: """Extract ModelResult at exact node locations @@ -103,8 +102,6 @@ def extract( ---------- observation : observation where x-coordinate specifies the node ID (as integer) - spatial_method : Optional[str], optional - Not used for network extraction (exact node selection) Returns ------- @@ -113,15 +110,13 @@ def extract( """ _validate_overlap_in_time(self.time, observation) if isinstance(observation, PointObservation): - return self._extract_point(observation, spatial_method) + return self._extract_point(observation) else: raise NotImplementedError( f"NetworkModelResult only supports PointObservation extraction, not {type(observation).__name__}." ) - def _extract_point( - self, observation: PointObservation, spatial_method: Optional[str] = None - ) -> PointModelResult: + def _extract_point(self, observation: PointObservation) -> PointModelResult: """Extract point from network data using exact node ID. The observation's x-coordinate should specify the node ID (as integer). From 3d7df6c04f0a2e4afdc1b510235cd81e76d1dc25 Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 09:58:58 +0100 Subject: [PATCH 05/26] feat: Add NodeModelResult and NodeObservation for network node handling --- src/modelskill/__init__.py | 4 + src/modelskill/comparison/_comparison.py | 22 ++- src/modelskill/model/__init__.py | 3 + src/modelskill/model/network.py | 203 ++++++++++++++++++----- src/modelskill/obs.py | 86 +++++++++- 5 files changed, 272 insertions(+), 46 deletions(-) diff --git a/src/modelskill/__init__.py b/src/modelskill/__init__.py index 0b3ce52f2..23ae1892a 100644 --- a/src/modelskill/__init__.py +++ b/src/modelskill/__init__.py @@ -38,12 +38,14 @@ TrackModelResult, GridModelResult, DfsuModelResult, + NetworkModelResult, DummyModelResult, ) from .obs import ( observation, PointObservation, TrackObservation, + NodeObservation, ) from .matching import from_matched, match from .configuration import from_config @@ -94,9 +96,11 @@ def load(filename: Union[str, Path]) -> Comparer | ComparerCollection: "GridModelResult", "DfsuModelResult", "DummyModelResult", + "NetworkModelResult", "observation", "PointObservation", "TrackObservation", + "NodeObservation", "TimeSeries", "match", "from_matched", diff --git a/src/modelskill/comparison/_comparison.py b/src/modelskill/comparison/_comparison.py index 1c3cb5bf4..a15222beb 100644 --- a/src/modelskill/comparison/_comparison.py +++ b/src/modelskill/comparison/_comparison.py @@ -20,6 +20,8 @@ import xarray as xr from copy import deepcopy +from ..model.network import NodeModelResult + from .. import metrics as mtr from .. import Quantity @@ -444,7 +446,11 @@ class Comparer: def __init__( self, matched_data: xr.Dataset, - raw_mod_data: dict[str, PointModelResult | TrackModelResult | NetworkModelResult] | None = None, + raw_mod_data: dict[ + str, + PointModelResult | TrackModelResult | NetworkModelResult | NodeModelResult, + ] + | None = None, ) -> None: self.data = _parse_dataset(matched_data) self.raw_mod_data = ( @@ -464,7 +470,15 @@ def __init__( @staticmethod def from_matched_data( data: xr.Dataset | pd.DataFrame, - raw_mod_data: Optional[Dict[str, PointModelResult | TrackModelResult | NetworkModelResult]] = None, + raw_mod_data: Optional[ + Dict[ + str, + PointModelResult + | TrackModelResult + | NetworkModelResult + | NodeModelResult, + ] + ] = None, obs_item: str | int | None = None, mod_items: Optional[Iterable[str | int]] = None, aux_items: Optional[Iterable[str | int]] = None, @@ -1258,7 +1272,9 @@ def load(filename: Union[str, Path]) -> "Comparer": return Comparer(matched_data=data) if data.gtype == "point": - raw_mod_data: Dict[str, PointModelResult | TrackModelResult | NetworkModelResult] = {} + raw_mod_data: Dict[ + str, PointModelResult | TrackModelResult | NetworkModelResult + ] = {} for var in data.data_vars: var_name = str(var) diff --git a/src/modelskill/model/__init__.py b/src/modelskill/model/__init__.py index a7a09e34d..9840e64fc 100644 --- a/src/modelskill/model/__init__.py +++ b/src/modelskill/model/__init__.py @@ -6,6 +6,7 @@ * Timeseries - [`PointModelResult`](`modelskill.PointModelResult`) - a point result from a dfs0/nc file or a DataFrame - [`TrackModelResult`](`modelskill.TrackModelResult`) - a track (moving point) result from a dfs0/nc file or a DataFrame + - [`NodeModelResult`](`modelskill.NodeModelResult`) - a network node result for specific node IDs * SpatialField (extractable) - [`GridModelResult`](`modelskill.GridModelResult`) - a spatial field from a dfs2/nc file or a Xarray Dataset - [`DfsuModelResult`](`modelskill.DfsuModelResult`) - a spatial field from a dfsu file @@ -22,6 +23,7 @@ from .dfsu import DfsuModelResult from .grid import GridModelResult from .network import NetworkModelResult +from .network import NodeModelResult from .dummy import DummyModelResult __all__ = [ @@ -30,6 +32,7 @@ "DfsuModelResult", "GridModelResult", "NetworkModelResult", + "NodeModelResult", "model_result", "DummyModelResult", ] diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py index 3c5822c95..6a48c9b05 100644 --- a/src/modelskill/model/network.py +++ b/src/modelskill/model/network.py @@ -1,14 +1,160 @@ from __future__ import annotations -from typing import Optional, Sequence +from typing import Any, Optional, Sequence import numpy as np import xarray as xr import pandas as pd +from modelskill.obs import Observation +from modelskill.timeseries import TimeSeries, _parse_network_node_input + from ._base import SpatialField, _validate_overlap_in_time, SelectedItems -from ..obs import PointObservation +from ..obs import NodeObservation from ..quantity import Quantity -from .point import PointModelResult +from ..types import PointType + + +class NodeModelResult(TimeSeries): + """Model result for a single network node. + + Construct a NodeModelResult from timeseries data for a specific node ID. + This is a simple timeseries class designed for network node data. + + Parameters + ---------- + data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray + filename (.dfs0 or .nc) or object with the data + name : Optional[str], optional + The name of the model result, + by default None (will be set to file name or item name) + node : int, optional + node ID (integer), by default None + item : str | int | None, optional + If multiple items/arrays are present in the input an item + must be given (as either an index or a string), by default None + quantity : Quantity, optional + Model quantity, for MIKE files this is inferred from the EUM information + aux_items : Optional[list[int | str]], optional + Auxiliary items, by default None + + Examples + -------- + >>> import modelskill as ms + >>> mr = ms.NodeModelResult(data, node=123, name="Node_123") + >>> mr2 = ms.NodeModelResult(df, item="Water Level", node=456) + """ + + def __init__( + self, + data: PointType, + *, + name: Optional[str] = None, + node: Optional[int] = None, + item: str | int | None = None, + quantity: Optional[Quantity] = None, + aux_items: Optional[Sequence[int | str]] = None, + ) -> None: + if not self._is_input_validated(data): + data = _parse_network_node_input( + data, + name=name, + item=item, + quantity=quantity, + node=node, + aux_items=aux_items, + ) + + assert isinstance(data, xr.Dataset) + + data_var = str(list(data.data_vars)[0]) + data[data_var].attrs["kind"] = "model" + super().__init__(data=data) + + @property + def node(self) -> int: + """Node ID of model result""" + node_val = self.data.coords.get("node") + if node_val is not None: + return int(node_val.item()) + return None + + def interp_time(self, observation: Observation, **kwargs: Any) -> NodeModelResult: + """ + Interpolate model result to the time of the observation + + wrapper around xarray.Dataset.interp() + + Parameters + ---------- + observation : Observation + The observation to interpolate to + **kwargs + Additional keyword arguments passed to xarray.interp + + Returns + ------- + NodeModelResult + Interpolated model result + """ + ds = self.align(observation, **kwargs) + return NodeModelResult(ds) + + def align( + self, + observation: Observation, + *, + max_gap: float | None = None, + **kwargs: Any, + ) -> xr.Dataset: + new_time = observation.time + + dati = self.data.dropna("time").interp( + time=new_time, assume_sorted=True, **kwargs + ) + + nmr = NodeModelResult(dati) + if max_gap is not None: + nmr = nmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap) + return nmr.data + + def _remove_model_gaps( + self, + mod_index: pd.DatetimeIndex, + max_gap: float | None = None, + ) -> NodeModelResult: + """Remove model gaps longer than max_gap from TimeSeries""" + max_gap_delta = pd.Timedelta(max_gap, "s") + valid_times = self._get_valid_times(mod_index, max_gap_delta) + ds = self.data.sel(time=valid_times) + return NodeModelResult(ds) + + def _get_valid_times( + self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta + ) -> pd.DatetimeIndex: + """Used only by _remove_model_gaps""" + obs_index = self.time + # init dataframe of available timesteps and their index + df = pd.DataFrame(index=mod_index) + df["idx"] = range(len(df)) + + # for query times get available left and right index of source times + df = ( + df.reindex(df.index.union(obs_index)) + .interpolate(method="time", limit_area="inside") + .reindex(obs_index) + .dropna() + ) + df["idxa"] = np.floor(df.idx).astype(int) + df["idxb"] = np.ceil(df.idx).astype(int) + + # time of left and right source times and time delta + df["ta"] = mod_index[df.idxa] + df["tb"] = mod_index[df.idxb] + df["dt"] = df.tb - df.ta + + # valid query times where time delta is less than max_gap + valid_idx = df.dt <= max_gap + return df[valid_idx].index class NetworkModelResult(SpatialField): @@ -91,49 +237,41 @@ def time(self) -> pd.DatetimeIndex: def extract( self, - observation: PointObservation, - ) -> PointModelResult: + observation: NodeObservation, + ) -> NodeModelResult: """Extract ModelResult at exact node locations Note: this method is typically not called directly, but through the match() method. - The observation's x-coordinate must specify the exact node ID (integer). + The observation must specify the exact node ID. Parameters ---------- - observation : - observation where x-coordinate specifies the node ID (as integer) + observation : + observation with node ID Returns ------- - PointModelResult + NodeModelResult extracted modelresult """ _validate_overlap_in_time(self.time, observation) - if isinstance(observation, PointObservation): - return self._extract_point(observation) + if isinstance(observation, NodeObservation): + return self._extract_node(observation) else: raise NotImplementedError( - f"NetworkModelResult only supports PointObservation extraction, not {type(observation).__name__}." + f"NetworkModelResult only supports NodeObservation extraction, not {type(observation).__name__}." ) - def _extract_point(self, observation: PointObservation) -> PointModelResult: - """Extract point from network data using exact node ID. + def _extract_node(self, observation: NodeObservation) -> NodeModelResult: + """Extract node data from network. - The observation's x-coordinate should specify the node ID (as integer). + The observation specifies the exact node ID. No spatial interpolation is performed - the exact node is selected. """ - node_id = observation.x + node_id = observation.node if node_id is None: raise ValueError( - f"PointObservation '{observation.name}' must specify node ID in x-coordinate." - ) - - # Convert to integer node ID - try: - node_id = int(node_id) - except (ValueError, TypeError): - raise ValueError( - f"Node ID (x-coordinate) must be an integer, got {node_id} for observation '{observation.name}'" + f"NodeObservation '{observation.name}' must specify a valid node ID." ) # Check if node exists in the network @@ -146,7 +284,7 @@ def _extract_point(self, observation: PointObservation) -> PointModelResult: # Extract data at the specified node ds = self.data.sel(node=node_id) - # Convert to dataframe and create PointModelResult + # Convert to dataframe and create NodeModelResult df = ds.to_dataframe().dropna() if len(df) == 0: raise ValueError( @@ -154,18 +292,9 @@ def _extract_point(self, observation: PointObservation) -> PointModelResult: ) df = df.rename(columns={self.sel_items.values: self.name}) - # Use observation coordinates or extract from data if available - x_coord = observation.x if observation.x is not None else node_id - y_coord = ( - observation.y - if observation.y is not None - else (float(ds.y.item()) if hasattr(ds, "y") and ds.y.size == 1 else np.nan) - ) - - return PointModelResult( + return NodeModelResult( data=df, - x=x_coord, - y=y_coord, + node=node_id, item=self.name, name=self.name, quantity=self.quantity, diff --git a/src/modelskill/obs.py b/src/modelskill/obs.py index 5828b2b03..e0007b6e6 100644 --- a/src/modelskill/obs.py +++ b/src/modelskill/obs.py @@ -1,10 +1,11 @@ """ # Observations -ModelSkill supports two types of observations: +ModelSkill supports three types of observations: * [`PointObservation`](`modelskill.PointObservation`) - a point timeseries from a dfs0/nc file or a DataFrame * [`TrackObservation`](`modelskill.TrackObservation`) - a track (moving point) timeseries from a dfs0/nc file or a DataFrame +* [`NodeObservation`](`modelskill.NodeObservation`) - a network node timeseries for specific node IDs An observation can be created by explicitly invoking one of the above classes or using the [`observation()`](`modelskill.observation`) function which will return the appropriate type based on the input data (if possible). """ @@ -22,6 +23,7 @@ TimeSeries, _parse_xyz_point_input, _parse_track_input, + _parse_network_node_input, ) # NetCDF attributes can only be str, int, float https://unidata.github.io/netcdf4-python/#attributes-in-a-netcdf-file @@ -31,9 +33,9 @@ def observation( data: DataInputType, *, - gtype: Optional[Literal["point", "track"]] = None, + gtype: Optional[Literal["point", "track", "node"]] = None, **kwargs, -) -> PointObservation | TrackObservation: +) -> PointObservation | TrackObservation | NodeObservation: """Create an appropriate observation object. A factory function for creating an appropriate observation object @@ -41,19 +43,20 @@ def observation( If 'x' or 'y' is given, a PointObservation is created. If 'x_item' or 'y_item' is given, a TrackObservation is created. + If 'node' is given, a NodeObservation is created. Parameters ---------- data : DataInputType The data to be used for creating the Observation object. - gtype : Optional[Literal["point", "track"]] + gtype : Optional[Literal["point", "track", "node"]] The geometry type of the data. If not specified, it will be guessed from the data. **kwargs Additional keyword arguments to be passed to the Observation constructor. Returns ------- - PointObservation or TrackObservation + PointObservation or TrackObservation or NodeObservation An observation object of the appropriate type Examples @@ -61,6 +64,7 @@ def observation( >>> import modelskill as ms >>> o_pt = ms.observation(df, item=0, x=366844, y=6154291, name="Klagshamn") >>> o_tr = ms.observation("lon_after_lat.dfs0", item="wl", x_item=1, y_item=0) + >>> o_node = ms.observation(df, item="Water Level", node=123, name="Node_123") """ if gtype is None: geometry = _guess_gtype(**kwargs) @@ -80,9 +84,11 @@ def _guess_gtype(**kwargs) -> GeometryType: return GeometryType.POINT elif "x_item" in kwargs or "y_item" in kwargs: return GeometryType.TRACK + elif "node" in kwargs: + return GeometryType.NETWORK else: warnings.warn( - "Could not guess geometry type from data or args, assuming POINT geometry. Use PointObservation or TrackObservation to be explicit." + "Could not guess geometry type from data or args, assuming POINT geometry. Use PointObservation, TrackObservation, or NodeObservation to be explicit." ) return GeometryType.POINT @@ -340,6 +346,73 @@ def __init__( super().__init__(data=data, weight=weight, attrs=attrs) +class NodeObservation(Observation): + """Class for observations at network nodes + + Create a NodeObservation from a DataFrame or other data source. + The node ID is specified as an integer. + + Parameters + ---------- + data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray + data source with time series for the node + item : (int, str), optional + index or name of the wanted item/column, by default None + if data contains more than one item, item must be given + node : int, optional + node ID (integer), by default None + name : str, optional + user-defined name for easy identification in plots etc, by default derived from data + weight : float, optional + weighting factor for skill scores, by default 1.0 + quantity : Quantity, optional + The quantity of the observation, for validation with model results + aux_items : list, optional + list of names or indices of auxiliary items, by default None + attrs : dict, optional + additional attributes to be added to the data, by default None + + Examples + -------- + >>> import modelskill as ms + >>> o1 = ms.NodeObservation(data, node=123, name="Node_123") + >>> o2 = ms.NodeObservation(df, item="Water Level", node=456) + """ + + def __init__( + self, + data: PointType, + *, + item: Optional[int | str] = None, + node: Optional[int] = None, + name: Optional[str] = None, + weight: float = 1.0, + quantity: Optional[Quantity] = None, + aux_items: Optional[list[int | str]] = None, + attrs: Optional[dict] = None, + ) -> None: + if not self._is_input_validated(data): + data = _parse_network_node_input( + data, + name=name, + item=item, + quantity=quantity, + node=node, + aux_items=aux_items, + ) + + assert isinstance(data, xr.Dataset) + super().__init__(data=data, weight=weight, attrs=attrs) + + @property + def node(self) -> int: + """Node ID of observation""" + node_val = self.data.coords.get("node") + if node_val is not None: + return int(node_val.item()) + return None + + def unit_display_name(name: str) -> str: """Display name @@ -364,4 +437,5 @@ def unit_display_name(name: str) -> str: _obs_class_lookup = { GeometryType.POINT: PointObservation, GeometryType.TRACK: TrackObservation, + GeometryType.NETWORK: NodeObservation, } From 1fc1ecf78c84af48b3ef7eb6152aa8e57df46b2f Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 10:55:34 +0100 Subject: [PATCH 06/26] feat: Add extract_multiple method for batch extraction of NodeModelResults --- src/modelskill/model/network.py | 46 ++++++++++++++++++++++++ src/modelskill/timeseries/_timeseries.py | 44 ++++++++++++++++------- 2 files changed, 78 insertions(+), 12 deletions(-) diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py index 6a48c9b05..537a43082 100644 --- a/src/modelskill/model/network.py +++ b/src/modelskill/model/network.py @@ -300,3 +300,49 @@ def _extract_node(self, observation: NodeObservation) -> NodeModelResult: quantity=self.quantity, aux_items=self.sel_items.aux, ) + + def extract_multiple( + self, + observations: Sequence[NodeObservation], + ) -> list[NodeModelResult]: + """Extract ModelResult for multiple node observations + + This method allows efficient extraction of data for multiple nodes at once, + which is useful when comparing multiple observation points against the same network model. + + Parameters + ---------- + observations : Sequence[NodeObservation] + Sequence of node observations with node IDs + + Returns + ------- + list[NodeModelResult] + List of extracted model results, one per observation + + Examples + -------- + >>> import modelskill as ms + >>> obs1 = ms.NodeObservation(data1, node=123) + >>> obs2 = ms.NodeObservation(data2, node=456) + >>> network_model = ms.NetworkModelResult(network_data) + >>> results = network_model.extract_multiple([obs1, obs2]) + """ + if not observations: + raise ValueError("observations sequence cannot be empty") + + results = [] + for obs in observations: + if not isinstance(obs, NodeObservation): + raise TypeError( + f"All observations must be NodeObservation instances, got {type(obs).__name__}" + ) + try: + result = self._extract_node(obs) + results.append(result) + except Exception as e: + raise ValueError( + f"Failed to extract data for observation '{obs.name}' at node {obs.node}: {str(e)}" + ) + + return results diff --git a/src/modelskill/timeseries/_timeseries.py b/src/modelskill/timeseries/_timeseries.py index 52ff26ca4..fe02ed5d8 100644 --- a/src/modelskill/timeseries/_timeseries.py +++ b/src/modelskill/timeseries/_timeseries.py @@ -63,13 +63,17 @@ def _validate_dataset(ds: xr.Dataset) -> xr.Dataset: ), "time must be increasing (please check for duplicate times))" # Validate coordinates - if "x" not in ds.coords: - raise ValueError("data must have an x-coordinate") - if "y" not in ds.coords: - raise ValueError("data must have a y-coordinate") - if "z" not in ds.coords: - ds.coords["z"] = np.nan - # assert "z" in ds.coords, "data must have a z-coordinate" + # Check for either traditional x,y coordinates OR node-based coordinates + has_spatial_coords = "x" in ds.coords and "y" in ds.coords + has_node_coord = "node" in ds.coords + + if not has_spatial_coords and not has_node_coord: + raise ValueError("data must have either x,y coordinates or a node coordinate") + + if has_spatial_coords: + # Traditional spatial data - ensure z coordinate exists + if "z" not in ds.coords: + ds.coords["z"] = np.nan # Validate data vars = [v for v in ds.data_vars] @@ -207,6 +211,8 @@ def time(self) -> pd.DatetimeIndex: @property def x(self) -> Any: # TODO should this be a float? """x-coordinate""" + if "x" not in self.data.coords: + return None # Node-based data doesn't have x coordinate return self._coordinate_values("x") @x.setter @@ -216,6 +222,8 @@ def x(self, value: Any) -> None: @property def y(self) -> Any: """y-coordinate""" + if "y" not in self.data.coords: + return None # Node-based data doesn't have y coordinate return self._coordinate_values("y") @y.setter @@ -248,7 +256,12 @@ def __repr__(self) -> str: res = [] res.append(f"<{self.__class__.__name__}>: {self.name}") if self.gtype == str(GeometryType.POINT): - res.append(f"Location: {self.x}, {self.y}") + # Show location based on available coordinates + if "node" in self.data.coords: + node_id = self.data.coords["node"].item() + res.append(f"Node: {node_id}") + elif self.x is not None and self.y is not None: + res.append(f"Location: {self.x}, {self.y}") res.append(f"Time: {self.time[0]} - {self.time[-1]}") res.append(f"Quantity: {self.quantity}") if len(self._aux_vars) > 0: @@ -298,11 +311,18 @@ def to_dataframe(self) -> pd.DataFrame: if self.gtype == str(GeometryType.POINT): # we remove the scalar coordinate variables as they # will otherwise be columns in the dataframe - return self.data.drop_vars(["x", "y", "z"]).to_dataframe() + # Only drop coordinates that exist + coords_to_drop = [] + for coord in ["x", "y", "z"]: + if coord in self.data.coords: + coords_to_drop.append(coord) + return self.data.drop_vars(coords_to_drop).to_dataframe() elif self.gtype == str(GeometryType.TRACK): - df = self.data.drop_vars(["z"]).to_dataframe() - # make sure that x, y cols are first - cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]] + coords_to_drop = ["z"] if "z" in self.data.coords else [] + df = self.data.drop_vars(coords_to_drop).to_dataframe() + # makes sure that x comes first, then y, then other columns alphabetically + priority = {"x": 0, "y": 1} + cols = sorted(df.columns, key=lambda col: (priority.get(col, 999), col)) return df[cols] else: raise NotImplementedError(f"Unknown gtype: {self.gtype}") From 35882ab9ecdac15626e2a6faf61dc3a278a02d30 Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 15:35:07 +0100 Subject: [PATCH 07/26] feat: Enhance matching functionality to support NodeModelResult and NodeObservation --- src/modelskill/matching.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/modelskill/matching.py b/src/modelskill/matching.py index b182f0ad7..b92dec6d3 100644 --- a/src/modelskill/matching.py +++ b/src/modelskill/matching.py @@ -27,8 +27,9 @@ from .model.dfsu import DfsuModelResult from .model.dummy import DummyModelResult from .model.grid import GridModelResult +from .model.network import NetworkModelResult, NodeModelResult from .model.track import TrackModelResult -from .obs import Observation, PointObservation, TrackObservation +from .obs import Observation, PointObservation, TrackObservation, NodeObservation from .timeseries import TimeSeries from .types import Period @@ -40,6 +41,7 @@ GridModelResult, DfsuModelResult, TrackModelResult, + NetworkModelResult, DummyModelResult, ] MRInputType = Union[ @@ -56,7 +58,7 @@ TimeSeries, MRTypes, ] -ObsTypes = Union[PointObservation, TrackObservation] +ObsTypes = Union[PointObservation, TrackObservation, NodeObservation] ObsInputType = Union[ str, Path, @@ -267,7 +269,15 @@ def match( if len(obs) > 1 and isinstance(mod, Collection) and len(mod) > 1: if not all( - isinstance(m, (DfsuModelResult, GridModelResult, DummyModelResult)) + isinstance( + m, + ( + DfsuModelResult, + GridModelResult, + NetworkModelResult, + DummyModelResult, + ), + ) for m in mod ): raise ValueError( @@ -320,7 +330,15 @@ def _match_single_obs( raw_mod_data = { m.name: ( m.extract(obs, spatial_method=spatial_method) - if isinstance(m, (DfsuModelResult, GridModelResult, DummyModelResult)) + if isinstance( + m, + ( + DfsuModelResult, + GridModelResult, + NetworkModelResult, + DummyModelResult, + ), + ) else m ) for m in models @@ -351,7 +369,7 @@ def _get_global_start_end(idxs: Iterable[pd.DatetimeIndex]) -> Period: def _match_space_time( observation: Observation, - raw_mod_data: Mapping[str, PointModelResult | TrackModelResult], + raw_mod_data: Mapping[str, PointModelResult | TrackModelResult | NodeModelResult], max_model_gap: float | None, spatial_tolerance: float, obs_no_overlap: Literal["ignore", "error", "warn"], @@ -375,6 +393,9 @@ def _match_space_time( ) case PointModelResult() as pmr, PointObservation(): aligned = pmr.align(observation, max_gap=max_model_gap) + case NodeModelResult() as nmr, NodeObservation(): + # mr is the extracted NodeModelResult + aligned = nmr.align(observation, max_gap=max_model_gap) case _: raise TypeError( f"Matching not implemented for model type {type(mr)} and observation type {type(observation)}" From 5a692ef3515981245622401664c4c80313cc8f4a Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 15:47:06 +0100 Subject: [PATCH 08/26] feat: Update matching logic to extract observations from NetworkModelResult --- src/modelskill/matching.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/modelskill/matching.py b/src/modelskill/matching.py index b92dec6d3..b0d756945 100644 --- a/src/modelskill/matching.py +++ b/src/modelskill/matching.py @@ -335,11 +335,10 @@ def _match_single_obs( ( DfsuModelResult, GridModelResult, - NetworkModelResult, DummyModelResult, ), ) - else m + else m.extract(obs) if isinstance(m, NetworkModelResult) else m ) for m in models } From e15ae639670444c8791b17537a0febd1bb241c10 Mon Sep 17 00:00:00 2001 From: jpalm3r Date: Thu, 19 Feb 2026 16:29:52 +0100 Subject: [PATCH 09/26] feat: Add NetworkObservation class for handling collections of node observations --- notebooks/Collection_systems_res1d.ipynb | 324 +++++++++++++++++++++-- src/modelskill/__init__.py | 2 + src/modelskill/matching.py | 57 ++-- src/modelskill/obs.py | 101 +++++++ 4 files changed, 446 insertions(+), 38 deletions(-) diff --git a/notebooks/Collection_systems_res1d.ipynb b/notebooks/Collection_systems_res1d.ipynb index a0e1b9dd1..791b9bb57 100644 --- a/notebooks/Collection_systems_res1d.ipynb +++ b/notebooks/Collection_systems_res1d.ipynb @@ -2,33 +2,23 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "fdb0d0b9", "metadata": {}, "outputs": [], "source": [ - "from mikeio1d.experimental import NetworkMapper\n", + "import modelskill as ms\n", + "import pandas as pd\n", + "import numpy as np\n", "\n", - "from modelskill.model.network import NetworkModelResult" + "from mikeio1d.experimental import NetworkMapper" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "7b198071", "metadata": {}, - "outputs": [], - "source": [ - "res1d_path = \"../tests/testdata/network.res1d\"\n", - "mapper = NetworkMapper(res1d_path)\n", - "network = mapper.map_network()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "006d7b16", - "metadata": {}, "outputs": [ { "data": { @@ -518,7 +508,7 @@ " * node (node) int64 2kB 0 1 2 3 4 5 6 7 ... 252 253 254 255 256 257 258\n", "Data variables:\n", " Discharge (time, node) float32 114kB nan nan 5.72e-06 ... nan 0.01692 0.0\n", - " WaterLevel (time, node) float32 114kB 195.4 194.7 nan ... 188.5 nan nan