Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions .github/workflows/full_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@ on:
branches: [main]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14"]
pandas-version: ["pandas2", "pandas3"]
exclude:
- python-version: "3.10"
pandas-version: "pandas3"

steps:
- uses: actions/checkout@v4

- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

- name: Set up uv
uses: astral-sh/setup-uv@v6
with:
Expand All @@ -30,6 +38,17 @@ jobs:
- name: Install dependencies
run: uv sync --group test --no-dev

- name: Install pandas 2.x
if: matrix.pandas-version == 'pandas2'
run: uv run pip install "pandas>=2.0,<3.0"

- name: Install pandas 3.x
if: matrix.pandas-version == 'pandas3'
run: uv run pip install "pandas>=3.0,<4.0"

- name: Show pandas version
run: uv run python -c "import pandas; print(f'pandas {pandas.__version__}')"

- name: Type check
run: make typecheck

Expand Down
16 changes: 14 additions & 2 deletions src/modelskill/comparison/_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset:
# matched_data = self._matched_data_to_xarray(matched_data)
assert "Observation" in data.data_vars

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources may have different precisions (datetime64[s], datetime64[us], etc.)
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
# Note: The dtype.kind == "M" check is required because some datasets use
# non-datetime indexes (e.g., RangeIndex in tests). Only DatetimeIndex has
# the .as_unit() method, so we must skip normalization for other index types.
if data.time.dtype.kind == "M": # M = datetime64
time_pd = data.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
data = data.assign_coords(time=time_index)

# no missing values allowed in Observation
if data["Observation"].isnull().any():
raise ValueError("Observation data must not contain missing values.")
Expand Down Expand Up @@ -331,12 +343,12 @@ def _matched_data_to_xarray(
)

# check that items.obs and items.model are numeric
if not np.issubdtype(df[items.obs].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[items.obs].dtype):
raise ValueError(
"Observation data is of type {df[items.obs].dtype}, it must be numeric"
)
for m in items.model:
if not np.issubdtype(df[m].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[m].dtype):
raise ValueError(
f"Model data: {m} is of type {df[m].dtype}, it must be numeric"
)
Expand Down
4 changes: 2 additions & 2 deletions src/modelskill/comparison/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def _add_spatial_grid_to_df(
bins_y = np.arange(y_start, y_end + binsize / 2, binsize)
# cut and get bin centre
df["xBin"] = pd.cut(df.x, bins=bins_x)
df["xBin"] = df["xBin"].apply(lambda x: x.mid)
df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x)
df["yBin"] = pd.cut(df.y, bins=bins_y)
df["yBin"] = df["yBin"].apply(lambda x: x.mid)
df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x)

return df

Expand Down
8 changes: 5 additions & 3 deletions src/modelskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,11 @@ def peak_ratio(
time = obs.index

# Calculate number of years
dt_int = (time[1:].values - time[0:-1].values).view("int64")
dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
# Use total_seconds() to handle any datetime precision (ns, us, ms, s)
dt = time[1:] - time[:-1]
dt_seconds = dt.total_seconds().values
dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0])
N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time)
peak_index, AAP_ = _partial_duration_series(
time,
obs,
Expand Down
2 changes: 1 addition & 1 deletion src/modelskill/model/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DummyModelResult:
--------
>>> import pandas as pd
>>> import modelskill as ms
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2))
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2))
>>> obs = ms.PointObservation(df, name="foo")
>>> mr = ms.DummyModelResult(strategy='mean')
>>> pmr = mr.extract(obs)
Expand Down
11 changes: 11 additions & 0 deletions src/modelskill/timeseries/_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ def _convert_to_dataset(
data = data.rename({time_dim_name: "time"})
ds = data

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources (dfs0 files, DataFrames) may have different precisions
# (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp()
# fails when interpolating between datasets with mismatched precisions.
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
if ds.time.dtype.kind == "M": # M = datetime
time_pd = ds.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
ds = ds.assign_coords(time=time_index)

name = _validate_data_var_name(varname)

n_unique_times = len(ds.time.to_index().unique())
Expand Down
31 changes: 25 additions & 6 deletions tests/test_comparercollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function):
assert a, b == figsize


def test_peak_ratio(cc):
"""Non existent peak ratio"""
cc = cc.sel(model="m1")
sk = cc.skill(metrics=["peak_ratio"])

assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999)
def test_peak_ratio():
"""Test peak_ratio with synthetic data containing clear, verifiable peaks"""
# Create data with 2 clear peaks:
# Peak 1: obs=5.0, model=5.5 → ratio=1.1
# Peak 2: obs=6.0, model=6.6 → ratio=1.1
# Expected peak_ratio = mean([1.1, 1.1]) = 1.1
times = pd.date_range("2020-01-01", periods=100, freq="h")
obs_vals = np.zeros(100)
mod_vals = np.zeros(100)

# Create peak 1 around index 10
obs_vals[8:13] = [0, 1, 5, 1, 0]
mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0]

# Create peak 2 around index 50
obs_vals[48:53] = [0, 1, 6, 1, 0]
mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0]

df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times)

cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks")
sk = cmp.skill(metrics=["peak_ratio"])

# Model peaks are 1.1x observation peaks
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01)


def test_peak_ratio_2(cc_pr):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_simple_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible():
# ignore the data
tdf = pd.DataFrame(
{"x": [1, 2], "y": [1, 2], "m1": [0, 0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y")
pdf = pd.DataFrame(
data={"level": [0.0, 0.0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
obs = ms.PointObservation(pdf, item="level")
with pytest.raises(TypeError, match="TrackModelResult"):
Expand Down