From aa33783d2674fd42fe7f289390da5b0ec63d2458 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Mon, 18 May 2026 18:02:10 +0200 Subject: [PATCH] docs: improve VerticalModelResult and VerticalObservation docstrings Bring both vertical-profile docstrings to parity: state the long-format (time, z) input contract explicitly, mention the dfs0 repeated-timestamp convention, and switch to runnable Quarto {python} examples covering both a long-format DataFrame and a dfs0 from docs/data/vertical/. Also fixes several errors in the previous VerticalModelResult docstring: wrong class name, duplicate mikeio.Dfs0 in the type list, missing mikeio.Dataset, swapped lateral/zonal wording for x/y, opaque z_item description, and a bogus "offset" option for keep_duplicates. --- src/modelskill/model/vertical.py | 82 +++++++++++++++++------- src/modelskill/obs.py | 106 ++++++++++++++++--------------- 2 files changed, 113 insertions(+), 75 deletions(-) diff --git a/src/modelskill/model/vertical.py b/src/modelskill/model/vertical.py index b1e529ccb..406e86432 100644 --- a/src/modelskill/model/vertical.py +++ b/src/modelskill/model/vertical.py @@ -12,36 +12,72 @@ class VerticalModelResult(TimeSeries): - """Model result for a vertical column. + """Model result for a vertical profile at a fixed (x, y) location. - Construct a VerticalColumnModelResult from a dfs0 file, - mikeio.Dataset, pandas.DataFrame or a xarray.Datasets + The input must be in long format: one row per (time, z) pair, with a + column/item for the vertical coordinate and a column/item for the + modelled value. At least two items are required (z + value); if more are + present, ``item`` must be given. Parameters ---------- - data : str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dfs0, xr.Dataset - The input data or file path - name : str | None, optional - The name of the model result, - by default None (will be set to file name or item name) - item : str | int | None, optional - If multiple items/arrays are present in the input an item - must be given (as either an index or a string), by default None - z_item : str | int | None, optional - Item of the first coordinate of positions, by default None + data : str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset + Input data or path to a dfs0 file. + name : str, optional + Name of the model result, by default the file or item name. + item : str or int, optional + Index or name of the value item. Required if the input has more than + two items. + z_item : str or int, optional + Index or name of the item holding the vertical coordinate, by default 0. x : float, optional - lateral coordinate of point position, inferred from data if not given, else None + x-coordinate of the profile location, inferred from data when possible. y : float, optional - zonal coordinate of point position, inferred from data if not given, else None + y-coordinate of the profile location, inferred from data when possible. quantity : Quantity, optional - Model quantity, for MIKE files this is inferred from the EUM information - keep_duplicates : (str, bool), optional - Strategy for handling duplicate timestamps (wraps xarray.Dataset.drop_duplicates) - "first" to keep first occurrence, "last" to keep last occurrence, - False to drop all duplicates, "offset" to add milliseconds to - consecutive duplicates, by default "first" - aux_items : list[int | str] | None, optional - Auxiliary items, by default None + Model quantity. For MIKE files this is inferred from EUM information. + keep_duplicates : {"first", "last", False}, optional + Strategy for handling duplicate (time, z) pairs, by default "first". + aux_items : list[int | str], optional + Auxiliary items to keep alongside the value item. + + Notes + ----- + The input must be in long format: one row per (time, z) pair, with one + item/column holding the vertical coordinate and another holding the + modelled value. A dfs0 with N depth levels has its profile timestamps + repeated N times on a non-equidistant time axis. + + Examples + -------- + From a `pandas.DataFrame` in long format: + + ```{python} + import modelskill as ms + import pandas as pd + + times = pd.to_datetime( + ["2010-01-01 01:00"] * 3 + ["2010-01-01 02:00"] * 3 + ) + df = pd.DataFrame( + {"z": [0.0, -5.0, -10.0, 0.0, -5.0, -10.0], + "Salinity": [30.1, 30.3, 30.4, 30.5, 30.3, 30.3]}, + index=times, + ) + ms.VerticalModelResult(df, item="Salinity", z_item="z", x=12.0, y=55.0) + ``` + + From a dfs0 file (with z, Salinity and Temperature items): + + ```{python} + ms.VerticalModelResult( + "../data/vertical/VerticalModel_at_obs.dfs0", + item="Salinity", + z_item="z", + x=12.0, + y=55.0, + ) + ``` """ def __init__( diff --git a/src/modelskill/obs.py b/src/modelskill/obs.py index e4f653bbe..d4bc5d301 100644 --- a/src/modelskill/obs.py +++ b/src/modelskill/obs.py @@ -367,72 +367,74 @@ def __init__( class VerticalObservation(Observation): - """Class for observations of vertical profiles. + """Observation of a vertical profile at a fixed (x, y) location. - Create a VerticalObservation from a dfs0/nc file or tabular data - containing time, vertical coordinate, and observed values. + The input must be in long format: one row per (time, z) pair, with one + item/column holding the vertical coordinate and another holding the + observed value. At least two items are required (z + value); if more are + present, ``item`` must be given. Parameters ---------- - data : (str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset) - Input data with vertical profile observations. - item : int or str, optional - Index or name of the primary observation item. - If the input contains more than one candidate value item, - this argument must be provided. + data : str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset + Input data or path to a dfs0 file. + item : str or int, optional + Index or name of the value item. Required if the input has more than + two items. + z_item : str or int, optional + Index or name of the item holding the vertical coordinate, by default 0. x : float, optional - x-coordinate of the observation location. If not provided, - it is inferred from data when possible. + x-coordinate of the profile location, inferred from data when possible. y : float, optional - y-coordinate of the observation location. If not provided, - it is inferred from data when possible. - z_item : int or str, optional - Index or name of the vertical coordinate item, by default 0. + y-coordinate of the profile location, inferred from data when possible. name : str, optional - User-defined name for identification in plots and summaries. + Name of the observation, by default the file or item name. weight : float, optional - Weighting factor for skill scores, by default 1.0. - keep_duplicates : {"first", "last", False}, optional - Strategy for handling duplicate timestamps/z pairs. + Weighting factor for skill scores in `ComparerCollection`, by default 1.0. quantity : Quantity, optional - Physical quantity metadata used for validation against model results. + Observed quantity. For MIKE files this is inferred from EUM information. + keep_duplicates : {"first", "last", False}, optional + Strategy for handling duplicate (time, z) pairs, by default "first". aux_items : list[int | str], optional - List of auxiliary item names or indices to keep in the dataset. + Auxiliary items to keep alongside the value item. attrs : dict, optional - Additional attributes to be added to the underlying dataset. + Additional attributes to attach to the underlying dataset. + + Notes + ----- + A dfs0 with N depth levels has its profile timestamps repeated N times on + a non-equidistant time axis. Examples -------- - >>> import modelskill as ms - >>> import pandas as pd - >>> df = pd.DataFrame( - ... { - ... "z": [0.0, -5.0, -10.0, 0.0, -5.0, -10.0], - ... "value": [0.1, 0.3, 0.4, 0.5, 0.3, 0.3], - ... }, - ... index=pd.to_datetime( - ... [ - ... "2010-01-01 01:00:00", - ... "2010-01-01 01:00:00", - ... "2010-01-01 01:00:00", - ... "2010-01-01 02:00:00", - ... "2010-01-01 02:00:00", - ... "2010-01-01 02:00:00", - ... ] - ... ), - ... ) - >>> df.index.name = "t" - >>> print(df.to_string()) - z value - t - 2010-01-01 01:00:00 0.0 0.1 - 2010-01-01 01:00:00 -5.0 0.3 - 2010-01-01 01:00:00 -10.0 0.4 - 2010-01-01 02:00:00 0.0 0.5 - 2010-01-01 02:00:00 -5.0 0.3 - 2010-01-01 02:00:00 -10.0 0.3 - - >>> o = ms.VerticalObservation(df, item="value", z_item="z", x=12.0, y=55.0) + From a `pandas.DataFrame` in long format: + + ```{python} + import modelskill as ms + import pandas as pd + + times = pd.to_datetime( + ["2010-01-01 01:00"] * 3 + ["2010-01-01 02:00"] * 3 + ) + df = pd.DataFrame( + {"z": [0.0, -5.0, -10.0, 0.0, -5.0, -10.0], + "Salinity": [30.0, 30.2, 30.3, 30.4, 30.2, 30.2]}, + index=times, + ) + ms.VerticalObservation(df, item="Salinity", z_item="z", x=12.0, y=55.0) + ``` + + From a dfs0 file (with z and Salinity items): + + ```{python} + ms.VerticalObservation( + "../data/vertical/VerticalProfile_obs1.dfs0", + item="Salinity", + z_item="z", + x=12.0, + y=55.0, + ) + ``` """ def __init__(