From 4b2d1c732b69cc98e300cac2cfa7e075528bf48f Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Wed, 15 Oct 2025 14:04:24 +0100 Subject: [PATCH 01/34] Update pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 104433a..f095c9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "res-wind-up" -version = "0.4.3" +version = "0.4.4" authors = [ { name = "Alex Clerc", email = "alex.clerc@res-group.com" } ] @@ -69,7 +69,6 @@ examples = [ 'ephem', 'flaml[automl]', ] -all = ["res-wind-up[dev,examples]"] [tool.setuptools.packages.find] where = ["."] From 20ea2ab50283d0f7f0686f20b01f57833abb8a48 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Wed, 15 Oct 2025 15:31:17 +0100 Subject: [PATCH 02/34] update lock --- pyproject.toml | 2 +- uv.lock | 24 +----------------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f095c9f..c4450f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,7 +126,7 @@ module = [ "geographiclib.geodesic", "pandas", "pandas.testing", - "ruptures", + "ruptures.*", "scipy.stats", "seaborn", "utm", diff --git a/uv.lock b/uv.lock index ce036c9..229fe7b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.9, <4.0" resolution-markers = [ "python_full_version >= '3.14'", @@ -3117,7 +3116,7 @@ wheels = [ [[package]] name = "res-wind-up" -version = "0.4.3" +version = "0.4.4" source = { editable = "." } dependencies = [ { name = "eval-type-backport" }, @@ -3141,25 +3140,6 @@ dependencies = [ ] [package.optional-dependencies] -all = [ - { name = "coverage" }, - { name = "ephem" }, - { name = "flaml", extra = ["automl"] }, - { name = "ipywidgets" }, - { name = "jupyterlab" }, - { name = "mypy" }, - { name = "notebook" }, - { name = "poethepoet" }, - { name = "pytest" }, - { name = "pytest-env" }, - { name = "requests" }, - { name = "ruff" }, - { name = "types-pyyaml" }, - { name = "types-requests" }, - { name = "types-tabulate" }, - { name = "types-toml" }, - { name = "types-tqdm" }, -] dev = [ { name = "coverage" }, { name = "mypy" }, @@ -3205,7 +3185,6 @@ requires-dist = [ { name = "pyyaml" }, { name = "requests", marker = "extra == 'dev'" }, { name = "requests", marker = "extra == 'examples'" }, - { name = "res-wind-up", extras = ["dev", "examples"], marker = "extra == 'all'" }, { name = "ruff", marker = "extra == 'dev'" }, { name = "ruptures" }, { name = "scipy" }, @@ -3220,7 +3199,6 @@ requires-dist = [ { name = "types-tqdm", marker = "extra == 'dev'" }, { name = "utm" }, ] -provides-extras = ["dev", "examples", "all"] [[package]] name = "rfc3339-validator" From f6fe5b8c958e9fb1e6b1c5652e6af6dba80ee39e Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Wed, 15 Oct 2025 15:32:26 +0100 Subject: [PATCH 03/34] add CostCircularL1 also improve scoring logic --- tests/test_optimize_northing.py | 4 +- wind_up/optimize_northing.py | 146 ++++++++++++++++++++++++++++---- 2 files changed, 132 insertions(+), 18 deletions(-) diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py index 991ca74..b40700c 100644 --- a/tests/test_optimize_northing.py +++ b/tests/test_optimize_northing.py @@ -154,5 +154,5 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: assert median_yaw_before_northing["HMR_T01"] == pytest.approx(178.0) assert median_yaw_before_northing["HMR_T02"] == pytest.approx(206.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(269.2) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(269.2) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(268.3, abs=0.5) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(268.0, abs=0.5) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 2bf2fd7..d37b709 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -6,8 +6,12 @@ import math from typing import TYPE_CHECKING +import numpy as np +import numpy.typing as npt import pandas as pd import ruptures as rpt +from ruptures.base import BaseCost +from scipy.stats import circmean from wind_up.backporting import strict_zip from wind_up.circular_math import circ_diff @@ -34,8 +38,6 @@ if TYPE_CHECKING: from pathlib import Path - import numpy as np - from wind_up.models import PlotConfig, WindUpConfig logger = logging.getLogger(__name__) @@ -43,12 +45,94 @@ DECAY_FRACTION = 0.4 -def _northing_score_changepoint_component(changepoint_count: int) -> float: - return float(changepoint_count) +class CostCircularL1(BaseCost): + """Custom cost function for detecting changes in circular data (e.g., wind directions). + + Uses L1 norm with circular distance for robustness to outliers. + """ + + model = "circular_l1" + min_size = 2 + + def __init__(self, period: float = 360.0): + """Initialize the circular cost function. + + Parameters + ---------- + period : float, default=360.0 + The period of the circular data (360 for degrees, 2*pi for radians) + + """ + self.period = period + self.signal = None + + def fit(self, signal: npt.NDArray) -> CostCircularL1: + """Set the internal signal parameter. + + Parameters + ---------- + signal : array-like, shape (n_samples,) or (n_samples, 1) + The signal to analyze + + Returns + ------- + self + + """ + if signal.ndim == 1: + self.signal = signal + else: + # Handle 2D array with single column + self.signal = signal.ravel() + return self + + def error(self, start: int, end: int) -> float: + """Return the approximation cost on the segment [start:end]. + + Parameters + ---------- + start : int + Start index of the segment + end : int + End index of the segment + + Returns + ------- + float + Segment cost (sum of circular L1 distances from median) + + """ + if end - start < self.min_size: + raise rpt.exceptions.NotEnoughPoints + + if self.signal is None: + msg = "CostCircularL1 not fitted with signal data" + raise RuntimeError(msg) + + sub_signal = self.signal[start:end] + + # cheap and cheerful circ median + sub_signal_circmean = circmean(sub_signal, high=self.period / 2, low=-self.period / 2, nan_policy="omit") + sub_signal_zero_centred = sub_signal - sub_signal_circmean + circmedian = np.nanmedian(sub_signal_zero_centred) + sub_signal_circmean + + circdiffs = circ_diff(sub_signal, circmedian) + + return np.sum(circdiffs) + + +def _northing_score_changepoint_component(changepoint_count: int, *, years_of_data: float) -> float: + return 10 * max(float(changepoint_count - 1), 0) / max(years_of_data, 1) def _northing_score( - wtg_df: pd.DataFrame, *, north_ref_wd_col: str, changepoint_count: int, rated_power: float, timebase_s: int + wtg_df: pd.DataFrame, + *, + north_ref_wd_col: str, + changepoint_count: int, + rated_power: float, + timebase_s: int, + north_offset_df: pd.DataFrame | None, ) -> float: # this component penalizes long-ish, large north errors max_component = max(0, wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"].abs().max() - 4) ** 2 @@ -64,10 +148,29 @@ def _northing_score( * wtg_df[RAW_POWER_COL].clip(lower=min_weight, upper=max_weight) ).abs().mean() / max_weight - # this component encourages the correction list to be as short as possible - changepoint_component = _northing_score_changepoint_component(changepoint_count) + # this component encourages the correction list to be short + changepoint_component = _northing_score_changepoint_component( + changepoint_count, + years_of_data=(wtg_df.index.max() - wtg_df.index.min()).total_seconds() / (3600 * 24 * 365.25), + ) + + # this component adds a penalty if any adjacent changepoints in north_offset_df have a small abs circ_diff + small_adjacent_diffs_component = 0 + if north_offset_df is not None and len(north_offset_df) > 1: + abs_adjacent_diffs = ( + circ_diff(north_offset_df["north_offset"], north_offset_df["north_offset"].shift()).dropna().abs() # type:ignore[union-attr] + ) + smallest_acceptable_diff = 3 + small_adjacent_diffs_component = ( + 10 + * ( + 1 / abs_adjacent_diffs.clip(lower=1e-6, upper=smallest_acceptable_diff) - 1 / smallest_acceptable_diff + ).sum() + ) - return max_component + median_component + raw_wmean_component + changepoint_component + return ( + max_component + median_component + raw_wmean_component + changepoint_component + small_adjacent_diffs_component + ) def _add_northing_ok_and_diff_cols(wtg_df: pd.DataFrame, *, north_ref_wd_col: str, northed_col: str) -> pd.DataFrame: @@ -213,17 +316,20 @@ def _score_wtg_north_table( changepoint_count=len(output_north_table), rated_power=rated_power, timebase_s=timebase_s, + north_offset_df=output_north_table, ) return output_north_table, score, output_wtg_df -def _calc_max_changepoints_to_add(changepoint_count: int, *, score: float) -> int: +def _calc_max_changepoints_to_add(changepoint_count: int, *, score: float, years_of_data: float) -> int: max_changepoints_to_add = 0 - headroom = score - _northing_score_changepoint_component(changepoint_count + 1) + headroom = score - _northing_score_changepoint_component(changepoint_count + 1, years_of_data=years_of_data) while headroom > 0: max_changepoints_to_add += 1 - headroom = score - _northing_score_changepoint_component(max_changepoints_to_add + 1) + headroom = score - _northing_score_changepoint_component( + max_changepoints_to_add + 1, years_of_data=years_of_data + ) return max_changepoints_to_add @@ -248,11 +354,11 @@ def _get_changepoint_objects( north_ref_wd_col: str, ) -> tuple[rpt.base.BaseEstimator, np.ndarray]: col = f"filt_diff_to_{north_ref_wd_col}" - model = "l1" dropna_df = prev_best_wtg_df.dropna(subset=[col]) signal = dropna_df[col].to_numpy() timestamps = dropna_df.index.to_numpy() - algo = rpt.BottomUp(model=model).fit(signal) + custom_cost = CostCircularL1(period=360) + algo = rpt.BottomUp(custom_cost=custom_cost).fit(signal) return algo, timestamps @@ -386,6 +492,7 @@ def _prep_for_optimize_wtg_north_table( changepoint_count=0, rated_power=rated_power, timebase_s=timebase_s, + north_offset_df=None, ) logger.info(f"\n\nwtg_name={wtg_name}, north_ref_wd_col={north_ref_wd_col}, initial_score={initial_score:.2f}") @@ -423,6 +530,7 @@ def _optimize_wtg_north_table( north_ref_wd_col: str, timebase_s: int, plot_cfg: PlotConfig | None, + years_of_data: float, initial_wtg_north_table: pd.DataFrame | None = None, best_score_margin: float = 0, ) -> tuple[pd.DataFrame, pd.DataFrame, float, float]: @@ -460,7 +568,9 @@ def _optimize_wtg_north_table( ) done_optimizing = False - max_changepoints_to_add = min(5, _calc_max_changepoints_to_add(len(best_north_table), score=best_score)) + max_changepoints_to_add = min( + 5, _calc_max_changepoints_to_add(len(best_north_table), score=best_score, years_of_data=years_of_data) + ) initial_step_size: int = 100 shift_step_size: int = initial_step_size tries_left: int = 1 @@ -511,13 +621,15 @@ def _optimize_wtg_north_table( else: tries_left += 1 logger.info(f"tries_left increased to {tries_left}") - max_changepoints_to_add = min(2, _calc_max_changepoints_to_add(len(best_north_table), score=best_score)) + max_changepoints_to_add = min( + 2, _calc_max_changepoints_to_add(len(best_north_table), score=best_score, years_of_data=years_of_data) + ) if not best_move_found_this_loop: shift_step_size = min(shift_step_size - 1, round(shift_step_size * DECAY_FRACTION)) if shift_step_size < 1: max_changepoints_to_add = min( 2, - _calc_max_changepoints_to_add(len(best_north_table), score=best_score), + _calc_max_changepoints_to_add(len(best_north_table), score=best_score, years_of_data=years_of_data), ) shift_step_size = initial_step_size + 1 + (1 / (DECAY_FRACTION ** (math.pi * (tries_left + 1))) % 10) shift_step_size = round(shift_step_size) @@ -582,6 +694,7 @@ def _optimize_wf_north_table( plot_cfg=plot_cfg, initial_wtg_north_table=initial_wtg_north_table, best_score_margin=best_score_margin, + years_of_data=(wtg_df.index.max() - wtg_df.index.min()).total_seconds() / (3600 * 24 * 365.25), ) northed_col = calc_northed_col_name(north_ref_wd_col) @@ -606,6 +719,7 @@ def _optimize_wf_north_table( f"max_northing_error changed from {max_northing_error_before:.1f} to {max_northing_error_after:.1f} " f"[{max_northing_error_after - max_northing_error_before:.1f}]", ) + logger.info(f"\n{wtg_north_table=}\n") wtg_north_table["TurbineName"] = wtg_name optimized_wf_north_table = ( From 446005f3a298d5e73ebdf320acaaa6d7552698d0 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Wed, 15 Oct 2025 15:40:48 +0100 Subject: [PATCH 04/34] try to fix mypy 3.9 issue --- wind_up/optimize_northing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index d37b709..9012a25 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -64,7 +64,7 @@ def __init__(self, period: float = 360.0): """ self.period = period - self.signal = None + self.signal: npt.NDArray | None = None def fit(self, signal: npt.NDArray) -> CostCircularL1: """Set the internal signal parameter. From e6e2aaf58b747d3dc831ab51c1c65b28f4780868 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Wed, 15 Oct 2025 18:22:33 +0100 Subject: [PATCH 05/34] improve _calc_good_north_offset was just using median before --- tests/test_optimize_northing.py | 10 ++++++---- wind_up/optimize_northing.py | 21 +++++++++++++++------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py index b40700c..6f7d685 100644 --- a/tests/test_optimize_northing.py +++ b/tests/test_optimize_northing.py @@ -126,10 +126,12 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: median_yaw_before_northing = wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() + expected_t1_yaw_after_northing = 269.6 + expected_t2_yaw_after_northing = 267.6 assert median_yaw_before_northing["HMR_T01"] == pytest.approx(191.0) assert median_yaw_before_northing["HMR_T02"] == pytest.approx(173.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(269.6) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(267.6) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing) # try to mess up the yaw angles further and run again wf_df[RAW_YAWDIR_COL] = (wf_df[RAW_YAWDIR_COL] + 180) % 360 @@ -154,5 +156,5 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: assert median_yaw_before_northing["HMR_T01"] == pytest.approx(178.0) assert median_yaw_before_northing["HMR_T02"] == pytest.approx(206.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(268.3, abs=0.5) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(268.0, abs=0.5) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing, abs=1.5) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing, abs=1.0) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 9012a25..2eaa63d 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -112,12 +112,13 @@ def error(self, start: int, end: int) -> float: sub_signal = self.signal[start:end] # cheap and cheerful circ median + ## calc circmean, noting that sub_signal ranges from -180 to +180 sub_signal_circmean = circmean(sub_signal, high=self.period / 2, low=-self.period / 2, nan_policy="omit") - sub_signal_zero_centred = sub_signal - sub_signal_circmean - circmedian = np.nanmedian(sub_signal_zero_centred) + sub_signal_circmean - + ## rotate the data so 0 represents the circmean, calculate median, then rotate back + sub_signal_zero_centred = (sub_signal - sub_signal_circmean + 180) % 360 - 180 + circmedian = (np.nanmedian(sub_signal_zero_centred) + sub_signal_circmean + 180) % 360 - 180 + # return sum of circ diffs from circ median circdiffs = circ_diff(sub_signal, circmedian) - return np.sum(circdiffs) @@ -162,7 +163,7 @@ def _northing_score( ) smallest_acceptable_diff = 3 small_adjacent_diffs_component = ( - 10 + 20 # arbitrary gain * ( 1 / abs_adjacent_diffs.clip(lower=1e-6, upper=smallest_acceptable_diff) - 1 / smallest_acceptable_diff ).sum() @@ -206,6 +207,7 @@ def _add_northed_ok_diff_and_rolling_cols( wtg_df = _add_northing_ok_and_diff_cols(wtg_df, north_ref_wd_col=north_ref_wd_col, northed_col=northed_col) rolling_hours = 6 rows_per_hour = 3600 / timebase_s + # circular medians would be better but are harder to implement with good performance wtg_df[f"short_rolling_diff_to_{north_ref_wd_col}"] = ( wtg_df[f"filt_diff_to_{north_ref_wd_col}"] .rolling( @@ -229,7 +231,14 @@ def _add_northed_ok_diff_and_rolling_cols( def _calc_good_north_offset(section_df: pd.DataFrame, north_ref_wd_col: str) -> float: - return -section_df[f"filt_diff_to_{north_ref_wd_col}"].median() + # cheap and cheerful circ median + north_errors = section_df[f"filt_diff_to_{north_ref_wd_col}"] + north_errors_circmean = circmean(north_errors, high=180, low=-180, nan_policy="omit") + ## rotate the data so 0 represents the circmean, calculate median, then rotate back + north_errors_zero_centred = (north_errors - north_errors_circmean + 180) % 360 - 180 + circmedian = (np.nanmedian(north_errors_zero_centred) + north_errors_circmean + 180) % 360 - 180 + # return negative circmedian (the offset to add to make median north error 0) + return -circmedian def _calc_north_offset_col( From d7aa7069f0c69572db0df7b31ba0fbdaedadca5f Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 06:14:08 +0100 Subject: [PATCH 06/34] Update test_math_funcs.py --- tests/test_math_funcs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_math_funcs.py b/tests/test_math_funcs.py index b78950a..e3d515c 100644 --- a/tests/test_math_funcs.py +++ b/tests/test_math_funcs.py @@ -24,6 +24,7 @@ def test_circ_diff(angle1: float | np.generic, angle2: float | np.generic, expected: float | np.generic) -> None: if isinstance(expected, pd.Series): assert_series_equal(circ_diff(angle1, angle2), (expected)) + assert_series_equal(circ_diff(angle1 - 360, angle2 + 360), (expected)) else: assert circ_diff(angle1, angle2) == pytest.approx(expected) From 9e20952c7c63cd52af845866576fc6494173485d Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 06:14:44 +0100 Subject: [PATCH 07/34] improve CostCircularL1 --- wind_up/optimize_northing.py | 59 ++++++++++++++---------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 2eaa63d..63a83bd 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -46,7 +46,7 @@ class CostCircularL1(BaseCost): - """Custom cost function for detecting changes in circular data (e.g., wind directions). + """Custom cost function for detecting changes in circular data which ranges from -180 to 180. Uses L1 norm with circular distance for robustness to outliers. """ @@ -54,52 +54,37 @@ class CostCircularL1(BaseCost): model = "circular_l1" min_size = 2 - def __init__(self, period: float = 360.0): - """Initialize the circular cost function. - - Parameters - ---------- - period : float, default=360.0 - The period of the circular data (360 for degrees, 2*pi for radians) - - """ - self.period = period + def __init__(self) -> None: + """Initialize the circular cost function.""" self.signal: npt.NDArray | None = None + self.min_size = 2 def fit(self, signal: npt.NDArray) -> CostCircularL1: - """Set the internal signal parameter. + """Set parameters of the instance. - Parameters - ---------- - signal : array-like, shape (n_samples,) or (n_samples, 1) - The signal to analyze + Args: + signal (array): signal. Shape (n_samples,) or (n_samples, n_features) - Returns - ------- - self + Returns: + self """ if signal.ndim == 1: - self.signal = signal + self.signal = signal.reshape(-1, 1) else: - # Handle 2D array with single column - self.signal = signal.ravel() + self.signal = signal + return self def error(self, start: int, end: int) -> float: """Return the approximation cost on the segment [start:end]. - Parameters - ---------- - start : int - Start index of the segment - end : int - End index of the segment + Args: + start (int): start of the segment + end (int): end of the segment - Returns - ------- - float - Segment cost (sum of circular L1 distances from median) + Returns: + segment cost """ if end - start < self.min_size: @@ -113,13 +98,13 @@ def error(self, start: int, end: int) -> float: # cheap and cheerful circ median ## calc circmean, noting that sub_signal ranges from -180 to +180 - sub_signal_circmean = circmean(sub_signal, high=self.period / 2, low=-self.period / 2, nan_policy="omit") + sub_signal_circmean = circmean(sub_signal, low=-180, high=180, nan_policy="omit") ## rotate the data so 0 represents the circmean, calculate median, then rotate back sub_signal_zero_centred = (sub_signal - sub_signal_circmean + 180) % 360 - 180 circmedian = (np.nanmedian(sub_signal_zero_centred) + sub_signal_circmean + 180) % 360 - 180 # return sum of circ diffs from circ median - circdiffs = circ_diff(sub_signal, circmedian) - return np.sum(circdiffs) + abs_circdiffs = abs(circ_diff(sub_signal, circmedian)) + return np.sum(abs_circdiffs) def _northing_score_changepoint_component(changepoint_count: int, *, years_of_data: float) -> float: @@ -366,7 +351,7 @@ def _get_changepoint_objects( dropna_df = prev_best_wtg_df.dropna(subset=[col]) signal = dropna_df[col].to_numpy() timestamps = dropna_df.index.to_numpy() - custom_cost = CostCircularL1(period=360) + custom_cost = CostCircularL1() algo = rpt.BottomUp(custom_cost=custom_cost).fit(signal) return algo, timestamps @@ -728,7 +713,7 @@ def _optimize_wf_north_table( f"max_northing_error changed from {max_northing_error_before:.1f} to {max_northing_error_after:.1f} " f"[{max_northing_error_after - max_northing_error_before:.1f}]", ) - logger.info(f"\n{wtg_north_table=}\n") + logger.info(f"\n{wtg_north_table=}\n\n") wtg_north_table["TurbineName"] = wtg_name optimized_wf_north_table = ( From 5a1d5e6782eaa958312da15947c4ec495fb271a5 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 06:14:55 +0100 Subject: [PATCH 08/34] Update optimize_northing_plots.py --- wind_up/plots/optimize_northing_plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wind_up/plots/optimize_northing_plots.py b/wind_up/plots/optimize_northing_plots.py index 953501b..8004053 100644 --- a/wind_up/plots/optimize_northing_plots.py +++ b/wind_up/plots/optimize_northing_plots.py @@ -45,7 +45,7 @@ def plot_yaw_diff_vs_power(wtg_df: pd.DataFrame, *, wtg_name: str, north_ref_wd_ plt.ylabel(f"yaw_diff_to_{north_ref_wd_col}") title = f"{wtg_name} yaw_diff_to_{north_ref_wd_col} vs {RAW_POWER_COL}" plt.tight_layout() - (plot_cfg.plots_dir / wtg_name).mkdir(exist_ok=True) + (plot_cfg.plots_dir / wtg_name).mkdir(exist_ok=True, parents=True) plt.savefig(plot_cfg.plots_dir / wtg_name / f"{title}.png") plt.close() From bcb5e3d0b7634dfa4249cf75b604ff75c5dbc0e7 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 06:16:13 +0100 Subject: [PATCH 09/34] Update test_optimize_northing.py --- tests/test_optimize_northing.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py index 6f7d685..60053e8 100644 --- a/tests/test_optimize_northing.py +++ b/tests/test_optimize_northing.py @@ -126,12 +126,11 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: median_yaw_before_northing = wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() - expected_t1_yaw_after_northing = 269.6 - expected_t2_yaw_after_northing = 267.6 + expected_yaw_after_northing = 268.6 assert median_yaw_before_northing["HMR_T01"] == pytest.approx(191.0) assert median_yaw_before_northing["HMR_T02"] == pytest.approx(173.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_yaw_after_northing, abs=1.0) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_yaw_after_northing, abs=1.0) # try to mess up the yaw angles further and run again wf_df[RAW_YAWDIR_COL] = (wf_df[RAW_YAWDIR_COL] + 180) % 360 @@ -156,5 +155,5 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: assert median_yaw_before_northing["HMR_T01"] == pytest.approx(178.0) assert median_yaw_before_northing["HMR_T02"] == pytest.approx(206.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing, abs=1.5) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing, abs=1.0) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_yaw_after_northing, abs=1.0) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_yaw_after_northing, abs=1.0) From 16ef0053881ae10de126bc97ef6c28ebf1257f57 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 08:02:46 +0100 Subject: [PATCH 10/34] Update optimize_northing.py remove small_adjacent_diffs_component --- wind_up/optimize_northing.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 63a83bd..d44a020 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -118,7 +118,6 @@ def _northing_score( changepoint_count: int, rated_power: float, timebase_s: int, - north_offset_df: pd.DataFrame | None, ) -> float: # this component penalizes long-ish, large north errors max_component = max(0, wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"].abs().max() - 4) ** 2 @@ -140,23 +139,7 @@ def _northing_score( years_of_data=(wtg_df.index.max() - wtg_df.index.min()).total_seconds() / (3600 * 24 * 365.25), ) - # this component adds a penalty if any adjacent changepoints in north_offset_df have a small abs circ_diff - small_adjacent_diffs_component = 0 - if north_offset_df is not None and len(north_offset_df) > 1: - abs_adjacent_diffs = ( - circ_diff(north_offset_df["north_offset"], north_offset_df["north_offset"].shift()).dropna().abs() # type:ignore[union-attr] - ) - smallest_acceptable_diff = 3 - small_adjacent_diffs_component = ( - 20 # arbitrary gain - * ( - 1 / abs_adjacent_diffs.clip(lower=1e-6, upper=smallest_acceptable_diff) - 1 / smallest_acceptable_diff - ).sum() - ) - - return ( - max_component + median_component + raw_wmean_component + changepoint_component + small_adjacent_diffs_component - ) + return max_component + median_component + raw_wmean_component + changepoint_component def _add_northing_ok_and_diff_cols(wtg_df: pd.DataFrame, *, north_ref_wd_col: str, northed_col: str) -> pd.DataFrame: @@ -310,7 +293,6 @@ def _score_wtg_north_table( changepoint_count=len(output_north_table), rated_power=rated_power, timebase_s=timebase_s, - north_offset_df=output_north_table, ) return output_north_table, score, output_wtg_df @@ -486,7 +468,6 @@ def _prep_for_optimize_wtg_north_table( changepoint_count=0, rated_power=rated_power, timebase_s=timebase_s, - north_offset_df=None, ) logger.info(f"\n\nwtg_name={wtg_name}, north_ref_wd_col={north_ref_wd_col}, initial_score={initial_score:.2f}") From 1d73fbc9c44cab2442b1ede0379cb6d4cb8f96ba Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 08:05:21 +0100 Subject: [PATCH 11/34] ignore mypy issue for 3.9 --- wind_up/optimize_northing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index d44a020..d6c100f 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -104,7 +104,7 @@ def error(self, start: int, end: int) -> float: circmedian = (np.nanmedian(sub_signal_zero_centred) + sub_signal_circmean + 180) % 360 - 180 # return sum of circ diffs from circ median abs_circdiffs = abs(circ_diff(sub_signal, circmedian)) - return np.sum(abs_circdiffs) + return np.sum(abs_circdiffs) # type: ignore[call-overload] def _northing_score_changepoint_component(changepoint_count: int, *, years_of_data: float) -> float: From 102fa6f774307e42e38cb1916158a0614d829cde Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 10:21:57 +0100 Subject: [PATCH 12/34] add circ_median --- tests/test_math_funcs.py | 152 ++++++++++++++++++++++++++++++++++++++- wind_up/circular_math.py | 51 +++++++++++++ 2 files changed, 202 insertions(+), 1 deletion(-) diff --git a/tests/test_math_funcs.py b/tests/test_math_funcs.py index e3d515c..f79a445 100644 --- a/tests/test_math_funcs.py +++ b/tests/test_math_funcs.py @@ -4,8 +4,9 @@ import pandas as pd import pytest from pandas.testing import assert_series_equal +from scipy.stats import circmean -from wind_up.circular_math import circ_diff +from wind_up.circular_math import circ_diff, circ_median test_circ_diff_data = [ (0, 0, 0), @@ -34,3 +35,152 @@ def test_within_bin() -> None: d = 242 dir_bin_width = 10.0 assert not np.abs(circ_diff(d - 5, d)) < dir_bin_width / 2 + + +def circ_median_exact(angles: np.ndarray | list) -> float: + """Exact circular median using O(n²) approach for testing. + + In case of a tie, returns the circular mean of the tied angles. + """ + + angles = np.asarray(angles).flatten() + angles = angles[~np.isnan(angles)] + + if len(angles) == 0: + return np.nan + + angles = np.mod(angles, 360) + + def _sum_circ_dist(candidate: float) -> float: + diffs = circ_diff(angles, candidate) + return np.sum(np.abs(diffs)) + + distances = np.array([_sum_circ_dist(angle) for angle in angles]) + min_distance = np.min(distances) + + # Find all angles that have the minimum distance (handle ties) + tied_indices = np.where(distances == min_distance)[0] + + if len(tied_indices) == 1: + return angles[tied_indices[0]] + # Return circular mean of tied angles + tied_angles = angles[tied_indices] + return circmean(tied_angles, high=360, low=0) + + +test_circ_median_data = [ + # Simple cases + ([0], 0, True), + ([0, 20], 10, True), + ([0, 15, 20], 15, True), + ([350, 0, 15], 0, True), + ([170, 181, 190], 181, True), + # Edge cases around 0/360 boundary + ([355, 0, 15], 0, True), + ([350, 351, 9, 10], 0, True), + # Symmetric cases + ([0, 90, 180, 270], None, True), # Any answer is valid due to symmetry + ([0, 120, 240], None, True), + # Single value + ([42], 42, True), + # Two values + ([10, 20], None, True), # Either 10 or 20 is valid + ([350, 10], None, True), # Could be either + # Larger datasets + (list(range(10, 351, 1)), 180, True), # Should be near middle + ([i % 360 for i in range(-178, 181, 1)], 1, True), + # Test range_360=False + ([170, 180, 190], 180, False), + ([350, 0, 10], 0, False), + ([-10, 0, 10], 0, False), +] + + +@pytest.mark.parametrize(("angles", "expected", "range_360"), test_circ_median_data) +def test_circ_median(angles: list, *, expected: float | None, range_360: bool) -> None: + result = circ_median(angles, range_360=range_360) + if expected is None: + assert not np.isnan(result) + return + + exact_result = circ_median_exact(angles) + exact_result = np.mod(exact_result, 360) if range_360 else np.mod(exact_result + 180, 360) - 180 + + # Check that fast and exact methods give similar results + # Allow for small differences due to approximation (within 10 degrees) + abs_circ_distance = abs(circ_diff(result, exact_result)) + assert abs_circ_distance < 1e-3, ( + f"Fast method result {result} differs from exact {exact_result} by {abs_circ_distance} degrees" + ) + + expected = np.mod(expected, 360) if range_360 else np.mod(expected + 180, 360) - 180 + abs_circ_distance_expected = abs(circ_diff(result, expected)) + assert abs_circ_distance_expected < 1e-3, ( + f"Result {result} differs from expected {expected} by {abs_circ_distance_expected} degrees" + ) + + +def test_circ_median_with_series() -> None: + """Test that pandas Series work correctly.""" + angles = pd.Series([350, 0, 10, 5]) + result = circ_median(angles) + assert isinstance(result, (float, np.floating)) + assert result == pytest.approx(2.5) + + +def test_circ_median_with_nan() -> None: + """Test that NaN values are handled correctly.""" + angles = [0, 10, np.nan, 20] + result = circ_median(angles) + assert not np.isnan(result) + assert result == pytest.approx(10) + + +def test_circ_median_all_nan() -> None: + """Test that all NaN returns NaN.""" + angles = [np.nan, np.nan, np.nan] + result = circ_median(angles) + assert np.isnan(result) + + +@pytest.mark.parametrize("range_360", [True, False]) +def test_circ_median_groupby(*, range_360: bool) -> None: + """Test usage with pandas groupby.""" + df = pd.DataFrame({"group": ["A", "A", "A", "B", "B", "B"], "angle": [350, 0, 15, 170, 181, 195]}) + result = df.groupby("group")["angle"].apply(lambda x: circ_median(x, range_360=range_360)) + + assert len(result) == 2 + if range_360: + assert result["A"] == pytest.approx(0, abs=1e-3) + assert result["B"] == pytest.approx(181, abs=1e-3) + else: + assert result["A"] == pytest.approx(0, abs=1e-3) + assert result["B"] == pytest.approx(-179, abs=1e-3) + + +def test_circ_median_performance_comparison() -> None: + """Verify that results are consistent between fast and exact methods on larger dataset.""" + rng = np.random.default_rng(0) + # Generate lots of angles near 0 degrees + angles = np.concatenate([rng.normal(4, 20, 500) % 360, rng.normal(354, 20, 500) % 360]) + + result_fast = circ_median(angles) + result_exact = circ_median_exact(angles) + + abs_circ_distance = abs(circ_diff(result_fast, result_exact)) + assert abs_circ_distance < 1e-1, f"Fast and exact methods differ by {abs_circ_distance} degrees on large dataset" + + +def test_circ_median_range_conversion() -> None: + """Test that range_360 parameter works correctly.""" + angles = [350, 0, 10] + + result_360 = circ_median(angles, range_360=True) + assert 0 <= result_360 < 360 + + result_180 = circ_median(angles, range_360=False) + assert -180 <= result_180 < 180 + + # They should represent the same angle + abs_circ_distance = abs(circ_diff(result_360, result_180)) + assert abs_circ_distance < 1e-3 diff --git a/wind_up/circular_math.py b/wind_up/circular_math.py index bddff74..1568ec4 100644 --- a/wind_up/circular_math.py +++ b/wind_up/circular_math.py @@ -4,6 +4,7 @@ import numpy as np import numpy.typing as npt +from scipy.stats import circmean def circ_diff(angle1: float | npt.NDArray | list, angle2: float | npt.NDArray | list) -> float | npt.NDArray: @@ -20,3 +21,53 @@ def circ_diff(angle1: float | npt.NDArray | list, angle2: float | npt.NDArray | angle2 = np.array(angle2) return np.mod(angle1 - angle2 + 180, 360) - 180 + + +def circ_median(angles: npt.NDArray, axis: int | None = None, *, range_360: bool = True) -> float | npt.NDArray: + """Calculate the circular median of angles. + + Uses an efficient approximation: centers data around the circular mean, + computes ordinary median, then rotates back. + + :param angles: Array of angles in degrees. Can be a numpy array, list, or pandas Series. + Input can be in any range; it will be normalized internally. + :param axis: Axis along which to compute the median. If None, compute over flattened array. + :param range_360: If True, return result in [0, 360). If False, return result in [-180, 180). + :return: Circular median in degrees + """ + # Convert to numpy array (handles lists, Series, etc.) + angles = np.asarray(angles) + + # Handle axis parameter + if axis is not None: + return np.apply_along_axis(lambda x: circ_median(x, axis=None, range_360=range_360), axis, angles) + + # Flatten if needed + angles = angles.flatten() + + # Remove NaN values + angles = angles[~np.isnan(angles)] + + if len(angles) == 0: + return np.nan + + # Normalize angles to [0, 360) for computation + angles_normalized = np.mod(angles, 360) + + # Calculate circular mean (in radians for scipy, convert back to degrees) + mean_angle = circmean(angles_normalized, high=360, low=0) + + # Center the data around 180 (subtract mean, add 180) + centered_angles = np.mod(angles_normalized - mean_angle + 180, 360) + + # Compute ordinary median on centered data + median_centered = np.median(centered_angles) + + # Rotate back (subtract 180, add mean back) + median_angle = np.mod(median_centered - 180 + mean_angle, 360) + + # Convert to requested range + if range_360: + return median_angle + # Convert to [-180, 180) + return np.mod(median_angle + 180, 360) - 180 From 9e6690dfed9962cb05efbf695e9af2c7211a8fe3 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 10:59:32 +0100 Subject: [PATCH 13/34] add rolling_circ_mean --- tests/test_rolling_circ_mean.py | 105 ++++++++++++++++++++++++++++++++ wind_up/circular_math.py | 26 ++++++++ 2 files changed, 131 insertions(+) create mode 100644 tests/test_rolling_circ_mean.py diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py new file mode 100644 index 0000000..6b7395a --- /dev/null +++ b/tests/test_rolling_circ_mean.py @@ -0,0 +1,105 @@ +import timeit + +import numpy as np +import pandas as pd +import pytest +from pandas.testing import assert_series_equal +from scipy.stats import circmean + +from wind_up.circular_math import rolling_circ_mean + + +@pytest.mark.parametrize("range_360", [True, False]) +def test_rolling_circ_mean(*, range_360: bool) -> None: + timestamps = pd.date_range(start="2024-01-01", periods=8, freq="600s") + input_df = pd.DataFrame( + { + "wind_direction_1": 2 * [359, 2.1, np.nan, 1], + "wind_direction_2": 2 * [345, 30, np.nan, np.nan], + "nacelle_direction": 2 * [359, 3, 358, 1], + }, + index=timestamps, + ) + + for col in input_df.columns: + result = rolling_circ_mean(input_df[col], window=4, min_periods=1, range_360=range_360) + expected = ( + ( + input_df[col] + .rolling(window=4, min_periods=1) + .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) + ) + if range_360 + else ( + input_df[col] + .rolling(window=4, min_periods=1) + .apply(lambda x: (circmean(x, low=-180, high=180, nan_policy="omit"))) + ) + ) + assert_series_equal(result, expected) + + +def test_circ_mean_resample_degrees_all_nans() -> None: + """Test circ_mean_resample_degrees with data which is all NaN.""" + timestamps = pd.date_range(start="2024-01-01", periods=8, freq="1s") + input_df = pd.DataFrame( + {"wind_direction": 8 * [np.nan], "nacelle_direction": 8 * [np.nan]}, + index=timestamps, + ) + + for col in input_df.columns: + result = rolling_circ_mean(input_df[col], window=4, min_periods=1) + expected = ( + input_df[col] + .rolling(window=4, min_periods=1) + .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) + ) + assert_series_equal(result, expected) + + +def test_performance_comparison() -> None: + """Test that circ_mean_resample_degrees is faster than using circmean directly.""" + # Generate a large dataset + n_rows = 10_000 + n_cols = 1 + timestamps = pd.date_range(start="2024-01-01", periods=n_rows, freq="1s") + + # Generate random angles between 0 and 360 + rng = np.random.default_rng(0) + data = rng.uniform(0, 359.9, size=(n_rows, n_cols)) + # Add some NaN values (5% of data) + nan_rate = 0.05 + nan_mask = rng.random(size=data.shape) < nan_rate + data[nan_mask] = np.nan + + input_df = pd.DataFrame(data, columns=[f"direction_{i}" for i in range(n_cols)], index=timestamps) + + window_size = 40 + min_periods = 10 + + col = "direction_0" + + def new_method() -> float: + return rolling_circ_mean(input_df[col], window=window_size, min_periods=min_periods) + + def scipy_method() -> float: + return ( + input_df[col] + .rolling(window=window_size, min_periods=min_periods) + .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) + ) + + assert_series_equal(new_method(), scipy_method()) + + # compare speed of new vs scipy_method + number_of_runs = 5 + rolling_circ_mean_time = timeit.timeit(new_method, number=number_of_runs) + apply_scipy_circmean_time = timeit.timeit( + scipy_method, + number=number_of_runs, + ) + minimum_speed_up = 10 + assert rolling_circ_mean_time < apply_scipy_circmean_time / minimum_speed_up, ( + f"New method ({rolling_circ_mean_time:.2f}s) should be at least {minimum_speed_up}x faster than " + f"old method ({apply_scipy_circmean_time:.2f}s)" + ) diff --git a/wind_up/circular_math.py b/wind_up/circular_math.py index 1568ec4..6050619 100644 --- a/wind_up/circular_math.py +++ b/wind_up/circular_math.py @@ -4,6 +4,7 @@ import numpy as np import numpy.typing as npt +import pandas as pd from scipy.stats import circmean @@ -71,3 +72,28 @@ def circ_median(angles: npt.NDArray, axis: int | None = None, *, range_360: bool return median_angle # Convert to [-180, 180) return np.mod(median_angle + 180, 360) - 180 + + +def rolling_circ_mean(series: pd.Series, window: int, min_periods: int, *, range_360: bool = True) -> pd.Series: + """Efficient rolling circular mean for angles in degrees. + + :param series: Series of angles in degrees. + :param window: Size of the rolling window. + :param min_periods: Minimum number of observations required to have a value. + :param range_360: If True, return result in [0, 360). If False, return result in [-180, 180). + :return: Series with rolling circular mean. + """ + rad_values = np.deg2rad(series) + sin_series = pd.Series(np.sin(rad_values), index=series.index) + cos_series = pd.Series(np.cos(rad_values), index=series.index) + + sin_rolling = sin_series.rolling(window=window, min_periods=min_periods).mean() + cos_rolling = cos_series.rolling(window=window, min_periods=min_periods).mean() + + result = (np.rad2deg(np.arctan2(sin_rolling, cos_rolling)) + 360) % 360 + + if not range_360: + # Convert to [-180, 180) + result = np.mod(result + 180, 360) - 180 + + return result From f8c84bd3460f1ed8681737446d2f2939496b6c76 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 11:33:07 +0100 Subject: [PATCH 14/34] add center to rolling_circ_mean --- wind_up/circular_math.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wind_up/circular_math.py b/wind_up/circular_math.py index 6050619..9cb56c1 100644 --- a/wind_up/circular_math.py +++ b/wind_up/circular_math.py @@ -74,7 +74,9 @@ def circ_median(angles: npt.NDArray, axis: int | None = None, *, range_360: bool return np.mod(median_angle + 180, 360) - 180 -def rolling_circ_mean(series: pd.Series, window: int, min_periods: int, *, range_360: bool = True) -> pd.Series: +def rolling_circ_mean( + series: pd.Series, window: int, min_periods: int, *, center: bool = False, range_360: bool = True +) -> pd.Series: """Efficient rolling circular mean for angles in degrees. :param series: Series of angles in degrees. @@ -87,8 +89,8 @@ def rolling_circ_mean(series: pd.Series, window: int, min_periods: int, *, range sin_series = pd.Series(np.sin(rad_values), index=series.index) cos_series = pd.Series(np.cos(rad_values), index=series.index) - sin_rolling = sin_series.rolling(window=window, min_periods=min_periods).mean() - cos_rolling = cos_series.rolling(window=window, min_periods=min_periods).mean() + sin_rolling = sin_series.rolling(window=window, min_periods=min_periods, center=center).mean() + cos_rolling = cos_series.rolling(window=window, min_periods=min_periods, center=center).mean() result = (np.rad2deg(np.arctan2(sin_rolling, cos_rolling)) + 360) % 360 From bd39b14de397f7da573bb2bfd2b0316a62055377 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 11:33:51 +0100 Subject: [PATCH 15/34] use circ_median and rolling_circ_mean --- wind_up/northing.py | 20 ++++++++------------ wind_up/optimize_northing.py | 33 ++++++++++++++------------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/wind_up/northing.py b/wind_up/northing.py index 9489e2d..ea47842 100644 --- a/wind_up/northing.py +++ b/wind_up/northing.py @@ -7,9 +7,8 @@ import numpy as np import pandas as pd -from scipy.stats import circmean -from wind_up.circular_math import circ_diff +from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean from wind_up.constants import ( RAW_YAWDIR_COL, REANALYSIS_WD_COL, @@ -31,10 +30,12 @@ def _add_rolling_northing_error(wf_df: pd.DataFrame, *, north_ref_wd_col: str, t wf_df.loc[~(wf_df["WindSpeedMean"] >= ws_ll), "apparent_northing_error"] = np.nan rolling_days = 20 rows_per_day = 24 * 3600 / timebase_s - wf_df["rolling_northing_error"] = ( - wf_df["apparent_northing_error"] - .rolling(window=round(rolling_days * rows_per_day), min_periods=round(rolling_days * rows_per_day // 3)) - .median() + wf_df["rolling_northing_error"] = rolling_circ_mean( + wf_df["apparent_northing_error"], + window=round(rolling_days * rows_per_day), + min_periods=round(rolling_days * rows_per_day // 3), + center=True, + range_360=False, ) return wf_df @@ -187,12 +188,7 @@ def _calc_wf_yawdir_df( wf_yawdir_df = wf_df.groupby(TIMESTAMP_COL).agg( wf_yawdir=pd.NamedAgg( column=best_yaw_dir_col, - aggfunc=lambda x: ( - ((x - circmean(x, low=0, high=360, nan_policy="omit") + 180) % 360).median(skipna=True) - + circmean(x, low=0, high=360, nan_policy="omit") - - 180 - ) - % 360, + aggfunc=lambda x: circ_median(x), ), num_turbines=pd.NamedAgg(column=best_yaw_dir_col, aggfunc=lambda x: x.count()), ) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index d6c100f..0d57363 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -14,7 +14,7 @@ from scipy.stats import circmean from wind_up.backporting import strict_zip -from wind_up.circular_math import circ_diff +from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean from wind_up.constants import ( RAW_POWER_COL, RAW_YAWDIR_COL, @@ -123,7 +123,7 @@ def _northing_score( max_component = max(0, wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"].abs().max() - 4) ** 2 # this component penalizes the median north error of filtered data being far from 0 - median_component = max(0, abs(wtg_df[f"filt_diff_to_{north_ref_wd_col}"].median()) - 0.1) ** 2 + median_component = max(0, abs(circ_median(wtg_df[f"filt_diff_to_{north_ref_wd_col}"], range_360=False)) - 0.1) ** 2 # this component penalizes raw data having any north errors max_weight = rated_power * YAW_OK_PW_FRACTION @@ -175,25 +175,20 @@ def _add_northed_ok_diff_and_rolling_cols( wtg_df = _add_northing_ok_and_diff_cols(wtg_df, north_ref_wd_col=north_ref_wd_col, northed_col=northed_col) rolling_hours = 6 rows_per_hour = 3600 / timebase_s - # circular medians would be better but are harder to implement with good performance - wtg_df[f"short_rolling_diff_to_{north_ref_wd_col}"] = ( - wtg_df[f"filt_diff_to_{north_ref_wd_col}"] - .rolling( - center=True, - window=round(rolling_hours * rows_per_hour), - min_periods=round(rolling_hours * rows_per_hour // 3), - ) - .median() + wtg_df[f"short_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_mean( + wtg_df[f"filt_diff_to_{north_ref_wd_col}"], + center=True, + window=round(rolling_hours * rows_per_hour), + min_periods=round(rolling_hours * rows_per_hour // 3), + range_360=False, ) rolling_hours = 15 * 24 - wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"] = ( - wtg_df[f"filt_diff_to_{north_ref_wd_col}"] - .rolling( - center=True, - window=round(rolling_hours * rows_per_hour), - min_periods=round(rolling_hours * rows_per_hour // 3), - ) - .median() + wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_mean( + wtg_df[f"filt_diff_to_{north_ref_wd_col}"], + center=True, + window=round(rolling_hours * rows_per_hour), + min_periods=round(rolling_hours * rows_per_hour // 3), + range_360=False, ) return wtg_df From 9ebdf2833cb0db34abfbb7f8949d505499756bf2 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 11:34:10 +0100 Subject: [PATCH 16/34] update tests --- tests/test_northing.py | 33 +++++++++++++------------ tests/test_optimize_northing.py | 44 ++++++++++++++++++++++----------- tests/test_rolling_circ_mean.py | 14 +++++------ 3 files changed, 53 insertions(+), 38 deletions(-) diff --git a/tests/test_northing.py b/tests/test_northing.py index a3cfd48..a2a433f 100644 --- a/tests/test_northing.py +++ b/tests/test_northing.py @@ -3,6 +3,7 @@ import pandas as pd import pytest +from wind_up.circular_math import circ_median from wind_up.constants import REANALYSIS_WD_COL from wind_up.models import WindUpConfig from wind_up.northing import _calc_max_abs_north_errs, apply_northing_corrections @@ -22,15 +23,15 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None: wf_df = _scada_multi_index(wf_df) wf_df_after_northing = apply_northing_corrections(wf_df, cfg=cfg, north_ref_wd_col=REANALYSIS_WD_COL, plot_cfg=None) - median_yaw_before_northing = wf_df.groupby("TurbineName")["YawAngleMean"].median() - median_yaw_after_northing = wf_df_after_northing.groupby("TurbineName")["YawAngleMean"].median() - assert median_yaw_before_northing.min() == pytest.approx(232.67355346679688) - assert median_yaw_before_northing.max() == pytest.approx(232.67355346679688) - assert median_yaw_after_northing["LSA_T07"] == pytest.approx(219.69126892089844) - assert median_yaw_after_northing["LSA_T09"] == pytest.approx(245.49956512451172) - assert median_yaw_after_northing["LSA_T12"] == pytest.approx(177.40547943115234) - assert median_yaw_after_northing["LSA_T13"] == pytest.approx(235.22855377197266) - assert median_yaw_after_northing["LSA_T14"] == pytest.approx(224.92881774902344) + median_yaw_before_northing = wf_df.groupby("TurbineName")["YawAngleMean"].apply(circ_median) + median_yaw_after_northing = wf_df_after_northing.groupby("TurbineName")["YawAngleMean"].apply(circ_median) + assert median_yaw_before_northing.min() == pytest.approx(237.45018005371094) + assert median_yaw_before_northing.max() == pytest.approx(237.45018005371094) + assert median_yaw_after_northing["LSA_T07"] == pytest.approx(222.450180) + assert median_yaw_after_northing["LSA_T09"] == pytest.approx(253.450180) + assert median_yaw_after_northing["LSA_T12"] == pytest.approx(77.712486) + assert median_yaw_after_northing["LSA_T13"] == pytest.approx(240.450180) + assert median_yaw_after_northing["LSA_T14"] == pytest.approx(228.450180) abs_north_errs_before_northing = _calc_max_abs_north_errs( wf_df, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s @@ -38,10 +39,10 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None: abs_north_errs_after_northing = _calc_max_abs_north_errs( wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s ) - assert abs_north_errs_before_northing.min() == pytest.approx(7.88920288085938) - assert abs_north_errs_before_northing.max() == pytest.approx(7.88920288085938) - assert abs_north_errs_after_northing["LSA_T07"] == pytest.approx(18.400006103515604) - assert abs_north_errs_after_northing["LSA_T09"] == pytest.approx(23.889203) - assert abs_north_errs_after_northing["LSA_T12"] == pytest.approx(162.918431) - assert abs_north_errs_after_northing["LSA_T13"] == pytest.approx(10.889203) - assert abs_north_errs_after_northing["LSA_T14"] == pytest.approx(12.400006) + assert abs_north_errs_before_northing.min() == pytest.approx(7.978039620049401) + assert abs_north_errs_before_northing.max() == pytest.approx(7.978039620049401) + assert abs_north_errs_after_northing["LSA_T07"] == pytest.approx(18.130420466185228) + assert abs_north_errs_after_northing["LSA_T09"] == pytest.approx(126.70998424500277) + assert abs_north_errs_after_northing["LSA_T12"] == pytest.approx(164.815273171483) + assert abs_north_errs_after_northing["LSA_T13"] == pytest.approx(10.978039620049401) + assert abs_north_errs_after_northing["LSA_T14"] == pytest.approx(12.130420466185228) diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py index 60053e8..a04343d 100644 --- a/tests/test_optimize_northing.py +++ b/tests/test_optimize_northing.py @@ -6,6 +6,7 @@ from pandas.testing import assert_frame_equal from tests.conftest import TEST_DATA_FLD +from wind_up.circular_math import circ_median from wind_up.constants import RAW_DOWNTIME_S_COL, RAW_POWER_COL, RAW_YAWDIR_COL, TIMESTAMP_COL from wind_up.models import WindUpConfig from wind_up.optimize_northing import _clip_wtg_north_table, auto_northing_corrections @@ -105,7 +106,15 @@ def test_clip_wtg_north_table_entry_after_start() -> None: assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table) -def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: +wind_direction_offsets = [ + 0, + 343, # chosen to create lots of 0-360 wraps in the original data + 290, # chosen to create lots of 0-360 wraps in the northed data +] + + +@pytest.mark.parametrize(("wind_direction_offset"), wind_direction_offsets) +def test_auto_northing_corrections(test_homer_config: WindUpConfig, wind_direction_offset: float) -> None: cfg = test_homer_config cfg.lt_first_dt_utc_start = pd.Timestamp("2023-07-01 00:00:00", tz="UTC") cfg.analysis_last_dt_utc_start = pd.Timestamp("2023-07-31 23:50:00", tz="UTC") @@ -121,16 +130,25 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: wf_df[RAW_YAWDIR_COL] = wf_df["YawAngleMean"] wf_df[RAW_DOWNTIME_S_COL] = wf_df["ShutdownDuration"] + # add wind_direction_offset to direction columns + for col in {RAW_YAWDIR_COL, "YawAngleMean", "reanalysis_wd"}: + wf_df[col] = (wf_df[col] + wind_direction_offset) % 360 + if wind_direction_offset != 0: + # in this case YawAngleMin and YawAngleMax will be incorrect, so nan them out + wf_df["YawAngleMin"] = np.nan + wf_df["YawAngleMax"] = np.nan + northed_wf_df = auto_northing_corrections(wf_df, cfg=cfg, plot_cfg=None) - median_yaw_before_northing = wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() - median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() + median_yaw_before_northing = wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].apply(circ_median) + median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].apply(circ_median) - expected_yaw_after_northing = 268.6 - assert median_yaw_before_northing["HMR_T01"] == pytest.approx(191.0) - assert median_yaw_before_northing["HMR_T02"] == pytest.approx(173.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_yaw_after_northing, abs=1.0) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_yaw_after_northing, abs=1.0) + expected_t1_yaw_after_northing = (290 + wind_direction_offset) % 360 + expected_t2_yaw_after_northing = (295 + wind_direction_offset) % 360 + assert median_yaw_before_northing["HMR_T01"] == pytest.approx((343 + wind_direction_offset) % 360) + assert median_yaw_before_northing["HMR_T02"] == pytest.approx((173 + wind_direction_offset) % 360) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing, abs=1.0) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing, abs=1.0) # try to mess up the yaw angles further and run again wf_df[RAW_YAWDIR_COL] = (wf_df[RAW_YAWDIR_COL] + 180) % 360 @@ -150,10 +168,6 @@ def test_auto_northing_corrections(test_homer_config: WindUpConfig) -> None: northed_wf_df = auto_northing_corrections(wf_df, cfg=cfg, plot_cfg=None) - median_yaw_before_northing = wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() - median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].median() - - assert median_yaw_before_northing["HMR_T01"] == pytest.approx(178.0) - assert median_yaw_before_northing["HMR_T02"] == pytest.approx(206.0) - assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_yaw_after_northing, abs=1.0) - assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_yaw_after_northing, abs=1.0) + median_yaw_after_northing = northed_wf_df.groupby("TurbineName", observed=True)["YawAngleMean"].apply(circ_median) + assert median_yaw_after_northing["HMR_T01"] == pytest.approx(expected_t1_yaw_after_northing, abs=1.5) + assert median_yaw_after_northing["HMR_T02"] == pytest.approx(expected_t2_yaw_after_northing, abs=1.5) diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py index 6b7395a..18efedb 100644 --- a/tests/test_rolling_circ_mean.py +++ b/tests/test_rolling_circ_mean.py @@ -22,17 +22,17 @@ def test_rolling_circ_mean(*, range_360: bool) -> None: ) for col in input_df.columns: - result = rolling_circ_mean(input_df[col], window=4, min_periods=1, range_360=range_360) + result = rolling_circ_mean(input_df[col], window=4, min_periods=1, center=True, range_360=range_360) expected = ( ( input_df[col] - .rolling(window=4, min_periods=1) + .rolling(window=4, min_periods=1, center=True) .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) ) if range_360 else ( input_df[col] - .rolling(window=4, min_periods=1) + .rolling(window=4, min_periods=1, center=True) .apply(lambda x: (circmean(x, low=-180, high=180, nan_policy="omit"))) ) ) @@ -48,10 +48,10 @@ def test_circ_mean_resample_degrees_all_nans() -> None: ) for col in input_df.columns: - result = rolling_circ_mean(input_df[col], window=4, min_periods=1) + result = rolling_circ_mean(input_df[col], window=4, min_periods=1, center=True) expected = ( input_df[col] - .rolling(window=4, min_periods=1) + .rolling(window=4, min_periods=1, center=True) .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) ) assert_series_equal(result, expected) @@ -80,12 +80,12 @@ def test_performance_comparison() -> None: col = "direction_0" def new_method() -> float: - return rolling_circ_mean(input_df[col], window=window_size, min_periods=min_periods) + return rolling_circ_mean(input_df[col], window=window_size, min_periods=min_periods, center=True) def scipy_method() -> float: return ( input_df[col] - .rolling(window=window_size, min_periods=min_periods) + .rolling(window=window_size, min_periods=min_periods, center=True) .apply(lambda x: circmean(x, low=0, high=360, nan_policy="omit")) ) From 7111dfa79277bedeab93a3fde15fc3385e3c6680 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 11:36:56 +0100 Subject: [PATCH 17/34] ignore 3.9 mypy issue --- wind_up/optimize_northing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 0d57363..d6b14dd 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -123,7 +123,7 @@ def _northing_score( max_component = max(0, wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"].abs().max() - 4) ** 2 # this component penalizes the median north error of filtered data being far from 0 - median_component = max(0, abs(circ_median(wtg_df[f"filt_diff_to_{north_ref_wd_col}"], range_360=False)) - 0.1) ** 2 + median_component = max(0, abs(circ_median(wtg_df[f"filt_diff_to_{north_ref_wd_col}"], range_360=False)) - 0.1) ** 2 # type:ignore[operator] # this component penalizes raw data having any north errors max_weight = rated_power * YAW_OK_PW_FRACTION From 754ef725d684b6285901cc4f0c204270f24b6e42 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Thu, 16 Oct 2025 20:42:04 +0100 Subject: [PATCH 18/34] rolling_circ_median WIP --- tests/test_rolling_circ_mean.py | 4 +- tests/test_rolling_circ_median.py | 97 +++++++++++++++++++++++++++++++ wind_up/circular_math.py | 34 ++++++++++- wind_up/northing.py | 4 +- 4 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 tests/test_rolling_circ_median.py diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py index 18efedb..ab2ecd9 100644 --- a/tests/test_rolling_circ_mean.py +++ b/tests/test_rolling_circ_mean.py @@ -39,7 +39,7 @@ def test_rolling_circ_mean(*, range_360: bool) -> None: assert_series_equal(result, expected) -def test_circ_mean_resample_degrees_all_nans() -> None: +def test_rolling_circ_mean_all_nans() -> None: """Test circ_mean_resample_degrees with data which is all NaN.""" timestamps = pd.date_range(start="2024-01-01", periods=8, freq="1s") input_df = pd.DataFrame( @@ -57,7 +57,7 @@ def test_circ_mean_resample_degrees_all_nans() -> None: assert_series_equal(result, expected) -def test_performance_comparison() -> None: +def test_rolling_circ_mean_performance() -> None: """Test that circ_mean_resample_degrees is faster than using circmean directly.""" # Generate a large dataset n_rows = 10_000 diff --git a/tests/test_rolling_circ_median.py b/tests/test_rolling_circ_median.py new file mode 100644 index 0000000..25035ad --- /dev/null +++ b/tests/test_rolling_circ_median.py @@ -0,0 +1,97 @@ +import timeit + +import numpy as np +import pandas as pd +import pytest +from pandas.testing import assert_series_equal +from scipy.stats import circmean + +from wind_up.circular_math import rolling_circ_median, circ_median + + +@pytest.mark.parametrize("range_360", [True, False]) +def test_rolling_circ_median(*, range_360: bool) -> None: + timestamps = pd.date_range(start="2024-01-01", periods=8, freq="600s") + input_df = pd.DataFrame( + { + "wind_direction_1": 2 * [359, 2.1, np.nan, 1], + "wind_direction_2": 2 * [345, 30, np.nan, np.nan], + "nacelle_direction": 2 * [359, 3, 358, 1], + }, + index=timestamps, + ) + window = 8 + min_periods=4 + for col in input_df.columns: + result = rolling_circ_median(input_df[col], window=window, min_periods=min_periods, center=True, range_360=range_360) + expected = (input_df[col] + .rolling(window=window, min_periods=min_periods, center=True) + .apply(lambda x: circ_median(x, range_360=range_360)) + ) + assert_series_equal(result, expected) + + +def test_rolling_circ_median_all_nans() -> None: + """Test circ_mean_resample_degrees with data which is all NaN.""" + timestamps = pd.date_range(start="2024-01-01", periods=8, freq="1s") + input_df = pd.DataFrame( + {"wind_direction": 8 * [np.nan], "nacelle_direction": 8 * [np.nan]}, + index=timestamps, + ) + + for col in input_df.columns: + result = rolling_circ_median(input_df[col], window=4, min_periods=3, center=True, range_360=True) + expected = ( + input_df[col] + .rolling(window=4, min_periods=3, center=True) + .apply(lambda x: circ_median(x, range_360=True)) + ) + assert_series_equal(result, expected) + + +def test_rolling_circ_median_performance() -> None: + """Test that circ_mean_resample_degrees is faster than using circmean directly.""" + # Generate a large dataset + n_rows = 10_000 + n_cols = 1 + timestamps = pd.date_range(start="2024-01-01", periods=n_rows, freq="1s") + + # Generate random angles between 0 and 360 + rng = np.random.default_rng(0) + data = rng.uniform(0, 359.9, size=(n_rows, n_cols)) + # Add some NaN values (5% of data) + nan_rate = 0.05 + nan_mask = rng.random(size=data.shape) < nan_rate + data[nan_mask] = np.nan + + input_df = pd.DataFrame(data, columns=[f"direction_{i}" for i in range(n_cols)], index=timestamps) + + window_size = 40 + min_periods = 30 + + col = "direction_0" + + def new_method() -> float: + return rolling_circ_median(input_df[col], window=window_size, min_periods=min_periods, center=True) + + def scipy_method() -> float: + return ( + input_df[col] + .rolling(window=window_size, min_periods=min_periods, center=True) + .apply(lambda x: circ_median(x, range_360=True)) + ) + + assert_series_equal(new_method(), scipy_method(),atol=1) + + # compare speed of new vs scipy_method + number_of_runs = 5 + rolling_circ_mean_time = timeit.timeit(new_method, number=number_of_runs) + apply_scipy_circmean_time = timeit.timeit( + scipy_method, + number=number_of_runs, + ) + minimum_speed_up = 10 + assert rolling_circ_mean_time < apply_scipy_circmean_time / minimum_speed_up, ( + f"New method ({rolling_circ_mean_time:.2f}s) should be at least {minimum_speed_up}x faster than " + f"old method ({apply_scipy_circmean_time:.2f}s)" + ) diff --git a/wind_up/circular_math.py b/wind_up/circular_math.py index 9cb56c1..574066c 100644 --- a/wind_up/circular_math.py +++ b/wind_up/circular_math.py @@ -75,13 +75,14 @@ def circ_median(angles: npt.NDArray, axis: int | None = None, *, range_360: bool def rolling_circ_mean( - series: pd.Series, window: int, min_periods: int, *, center: bool = False, range_360: bool = True + series: pd.Series, *,window: int, min_periods: int, center: bool = False, range_360: bool = True ) -> pd.Series: """Efficient rolling circular mean for angles in degrees. :param series: Series of angles in degrees. :param window: Size of the rolling window. :param min_periods: Minimum number of observations required to have a value. + :param center: If True, set the labels at the center of the window. :param range_360: If True, return result in [0, 360). If False, return result in [-180, 180). :return: Series with rolling circular mean. """ @@ -99,3 +100,34 @@ def rolling_circ_mean( result = np.mod(result + 180, 360) - 180 return result + + +def rolling_circ_median( + series: pd.Series, *, window: int, min_periods: int, center: bool = False, range_360: bool = True +) -> pd.Series: + """Efficient rolling circular (approximate) median for angles in degrees. + + :param series: Series of angles in degrees. + :param window: Size of the rolling window. + :param min_periods: Minimum number of observations required to have a value. + :param center: If True, set the labels at the center of the window. + :param range_360: If True, return result in [0, 360). If False, return result in [-180, 180). + :return: Series with rolling circular mean. + """ + # Calculate circular mean + circular_mean = rolling_circ_mean(series, window=window, min_periods=1, center=center, range_360=range_360) + + # Center the data around 180 (subtract mean, add 180) + centered_series = (series - circular_mean + 180).mod(360) + + # Compute ordinary median on centered data + median_centered = centered_series.rolling(window=window, min_periods=min_periods, center=center).median() + + # Rotate back (subtract 180, add mean back) + result = (median_centered - 180 + circular_mean).mod(360) + + # Convert to requested range + if range_360: + return result + # Convert to [-180, 180) + return np.mod(result + 180, 360) - 180 \ No newline at end of file diff --git a/wind_up/northing.py b/wind_up/northing.py index ea47842..632673b 100644 --- a/wind_up/northing.py +++ b/wind_up/northing.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean +from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean, rolling_circ_median from wind_up.constants import ( RAW_YAWDIR_COL, REANALYSIS_WD_COL, @@ -30,7 +30,7 @@ def _add_rolling_northing_error(wf_df: pd.DataFrame, *, north_ref_wd_col: str, t wf_df.loc[~(wf_df["WindSpeedMean"] >= ws_ll), "apparent_northing_error"] = np.nan rolling_days = 20 rows_per_day = 24 * 3600 / timebase_s - wf_df["rolling_northing_error"] = rolling_circ_mean( + wf_df["rolling_northing_error"] = rolling_circ_median( wf_df["apparent_northing_error"], window=round(rolling_days * rows_per_day), min_periods=round(rolling_days * rows_per_day // 3), From acfcac5d565fa0695c32e410cb57ce2d3f0909e3 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:47:15 +0100 Subject: [PATCH 19/34] Update test_math_funcs.py --- tests/test_math_funcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_math_funcs.py b/tests/test_math_funcs.py index f79a445..a9cfd8f 100644 --- a/tests/test_math_funcs.py +++ b/tests/test_math_funcs.py @@ -65,7 +65,7 @@ def _sum_circ_dist(candidate: float) -> float: return angles[tied_indices[0]] # Return circular mean of tied angles tied_angles = angles[tied_indices] - return circmean(tied_angles, high=360, low=0) + return circmean(tied_angles, high=360, low=0) % 360 test_circ_median_data = [ From 7ec31f9ca16e768d5cd2e4089c949ac42d37eae2 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:48:10 +0100 Subject: [PATCH 20/34] Update test_rolling_circ_mean.py --- tests/test_rolling_circ_mean.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py index ab2ecd9..ef295cb 100644 --- a/tests/test_rolling_circ_mean.py +++ b/tests/test_rolling_circ_mean.py @@ -40,7 +40,6 @@ def test_rolling_circ_mean(*, range_360: bool) -> None: def test_rolling_circ_mean_all_nans() -> None: - """Test circ_mean_resample_degrees with data which is all NaN.""" timestamps = pd.date_range(start="2024-01-01", periods=8, freq="1s") input_df = pd.DataFrame( {"wind_direction": 8 * [np.nan], "nacelle_direction": 8 * [np.nan]}, @@ -56,17 +55,15 @@ def test_rolling_circ_mean_all_nans() -> None: ) assert_series_equal(result, expected) - +@pytest.mark.slow def test_rolling_circ_mean_performance() -> None: - """Test that circ_mean_resample_degrees is faster than using circmean directly.""" # Generate a large dataset n_rows = 10_000 n_cols = 1 timestamps = pd.date_range(start="2024-01-01", periods=n_rows, freq="1s") - # Generate random angles between 0 and 360 rng = np.random.default_rng(0) - data = rng.uniform(0, 359.9, size=(n_rows, n_cols)) + data = np.concatenate([rng.normal(4, 20, n_rows // 2) % 360, rng.normal(354, 20, n_rows // 2) % 360]) # Add some NaN values (5% of data) nan_rate = 0.05 nan_mask = rng.random(size=data.shape) < nan_rate @@ -98,7 +95,7 @@ def scipy_method() -> float: scipy_method, number=number_of_runs, ) - minimum_speed_up = 10 + minimum_speed_up = 100 assert rolling_circ_mean_time < apply_scipy_circmean_time / minimum_speed_up, ( f"New method ({rolling_circ_mean_time:.2f}s) should be at least {minimum_speed_up}x faster than " f"old method ({apply_scipy_circmean_time:.2f}s)" From 89f259f817ea62912446c27d82b6e18f657b25dd Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:49:28 +0100 Subject: [PATCH 21/34] Update test_rolling_circ_mean.py --- tests/test_rolling_circ_mean.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py index ef295cb..fd15dd2 100644 --- a/tests/test_rolling_circ_mean.py +++ b/tests/test_rolling_circ_mean.py @@ -55,6 +55,7 @@ def test_rolling_circ_mean_all_nans() -> None: ) assert_series_equal(result, expected) + @pytest.mark.slow def test_rolling_circ_mean_performance() -> None: # Generate a large dataset From 3e57d0707a9bcd3cdbe21aafa4eae2ec1ed16e1a Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:52:24 +0100 Subject: [PATCH 22/34] Update test_rolling_circ_median.py --- tests/test_rolling_circ_median.py | 65 ++++++++++++++++--------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/tests/test_rolling_circ_median.py b/tests/test_rolling_circ_median.py index 25035ad..242d932 100644 --- a/tests/test_rolling_circ_median.py +++ b/tests/test_rolling_circ_median.py @@ -4,9 +4,9 @@ import pandas as pd import pytest from pandas.testing import assert_series_equal -from scipy.stats import circmean -from wind_up.circular_math import rolling_circ_median, circ_median +from tests.test_math_funcs import circ_median_exact +from wind_up.circular_math import circ_diff, rolling_circ_median_approx @pytest.mark.parametrize("range_360", [True, False]) @@ -20,19 +20,19 @@ def test_rolling_circ_median(*, range_360: bool) -> None: }, index=timestamps, ) - window = 8 - min_periods=4 + window = 4 + min_periods = 3 for col in input_df.columns: - result = rolling_circ_median(input_df[col], window=window, min_periods=min_periods, center=True, range_360=range_360) - expected = (input_df[col] - .rolling(window=window, min_periods=min_periods, center=True) - .apply(lambda x: circ_median(x, range_360=range_360)) - ) - assert_series_equal(result, expected) + result = rolling_circ_median_approx( + input_df[col], window=window, min_periods=min_periods, center=True, range_360=range_360 + ) + expected = input_df[col].rolling(window=window, min_periods=min_periods, center=True).apply(circ_median_exact) + if not range_360: + expected = ((expected + 180) % 360) - 180 + assert_series_equal(result, expected, atol=0.1) def test_rolling_circ_median_all_nans() -> None: - """Test circ_mean_resample_degrees with data which is all NaN.""" timestamps = pd.date_range(start="2024-01-01", periods=8, freq="1s") input_df = pd.DataFrame( {"wind_direction": 8 * [np.nan], "nacelle_direction": 8 * [np.nan]}, @@ -40,25 +40,19 @@ def test_rolling_circ_median_all_nans() -> None: ) for col in input_df.columns: - result = rolling_circ_median(input_df[col], window=4, min_periods=3, center=True, range_360=True) - expected = ( - input_df[col] - .rolling(window=4, min_periods=3, center=True) - .apply(lambda x: circ_median(x, range_360=True)) - ) + result = rolling_circ_median_approx(input_df[col], window=4, min_periods=3, center=True, range_360=True) + expected = input_df[col].rolling(window=4, min_periods=3, center=True).apply(lambda x: circ_median_exact(x)) assert_series_equal(result, expected) def test_rolling_circ_median_performance() -> None: - """Test that circ_mean_resample_degrees is faster than using circmean directly.""" # Generate a large dataset n_rows = 10_000 n_cols = 1 timestamps = pd.date_range(start="2024-01-01", periods=n_rows, freq="1s") - # Generate random angles between 0 and 360 rng = np.random.default_rng(0) - data = rng.uniform(0, 359.9, size=(n_rows, n_cols)) + data = np.concatenate([rng.normal(4, 20, n_rows // 2) % 360, rng.normal(354, 20, n_rows // 2) % 360]) # Add some NaN values (5% of data) nan_rate = 0.05 nan_mask = rng.random(size=data.shape) < nan_rate @@ -72,26 +66,33 @@ def test_rolling_circ_median_performance() -> None: col = "direction_0" def new_method() -> float: - return rolling_circ_median(input_df[col], window=window_size, min_periods=min_periods, center=True) + return rolling_circ_median_approx(input_df[col], window=window_size, min_periods=min_periods, center=True) - def scipy_method() -> float: + def exact_method() -> float: return ( input_df[col] .rolling(window=window_size, min_periods=min_periods, center=True) - .apply(lambda x: circ_median(x, range_360=True)) + .apply(lambda x: circ_median_exact(x)) ) - assert_series_equal(new_method(), scipy_method(),atol=1) + new_method_results = new_method() + exact_method_results = exact_method() + residuals = circ_diff(new_method_results, exact_method_results) + # results are not expected to exactly match + assert abs(residuals.mean()) < 2e-2 + assert residuals.abs().mean() < 3e-1 + assert residuals.abs().max() < 6 + assert_series_equal(new_method_results, exact_method_results, atol=360) # big atol to avoid wrap error - # compare speed of new vs scipy_method + # check speed number_of_runs = 5 - rolling_circ_mean_time = timeit.timeit(new_method, number=number_of_runs) - apply_scipy_circmean_time = timeit.timeit( - scipy_method, + rolling_circ_median_time = timeit.timeit(new_method, number=number_of_runs) + apply_exact_time = timeit.timeit( + exact_method, number=number_of_runs, ) - minimum_speed_up = 10 - assert rolling_circ_mean_time < apply_scipy_circmean_time / minimum_speed_up, ( - f"New method ({rolling_circ_mean_time:.2f}s) should be at least {minimum_speed_up}x faster than " - f"old method ({apply_scipy_circmean_time:.2f}s)" + minimum_speed_up = 100 + assert rolling_circ_median_time < apply_exact_time / minimum_speed_up, ( + f"New method ({rolling_circ_median_time:.2f}s) should be at least {minimum_speed_up}x faster than " + f"old method ({apply_exact_time:.2f}s)" ) From 2d5be8088bdafc7d8f71689c9535755e796a4c0d Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:52:51 +0100 Subject: [PATCH 23/34] update rolling_circ_median_approx --- wind_up/circular_math.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/wind_up/circular_math.py b/wind_up/circular_math.py index 574066c..c96dc35 100644 --- a/wind_up/circular_math.py +++ b/wind_up/circular_math.py @@ -75,7 +75,7 @@ def circ_median(angles: npt.NDArray, axis: int | None = None, *, range_360: bool def rolling_circ_mean( - series: pd.Series, *,window: int, min_periods: int, center: bool = False, range_360: bool = True + series: pd.Series, *, window: int, min_periods: int, center: bool = False, range_360: bool = True ) -> pd.Series: """Efficient rolling circular mean for angles in degrees. @@ -102,7 +102,7 @@ def rolling_circ_mean( return result -def rolling_circ_median( +def rolling_circ_median_approx( series: pd.Series, *, window: int, min_periods: int, center: bool = False, range_360: bool = True ) -> pd.Series: """Efficient rolling circular (approximate) median for angles in degrees. @@ -112,22 +112,19 @@ def rolling_circ_median( :param min_periods: Minimum number of observations required to have a value. :param center: If True, set the labels at the center of the window. :param range_360: If True, return result in [0, 360). If False, return result in [-180, 180). - :return: Series with rolling circular mean. + :return: Series with rolling circular median. """ - # Calculate circular mean - circular_mean = rolling_circ_mean(series, window=window, min_periods=1, center=center, range_360=range_360) + rad_values = np.deg2rad(series) + sin_series = pd.Series(np.sin(rad_values), index=series.index) + cos_series = pd.Series(np.cos(rad_values), index=series.index) - # Center the data around 180 (subtract mean, add 180) - centered_series = (series - circular_mean + 180).mod(360) + sin_rolling = sin_series.rolling(window=window, min_periods=min_periods, center=center).median() + cos_rolling = cos_series.rolling(window=window, min_periods=min_periods, center=center).median() - # Compute ordinary median on centered data - median_centered = centered_series.rolling(window=window, min_periods=min_periods, center=center).median() + result = (np.rad2deg(np.arctan2(sin_rolling, cos_rolling)) + 360) % 360 - # Rotate back (subtract 180, add mean back) - result = (median_centered - 180 + circular_mean).mod(360) + if not range_360: + # Convert to [-180, 180) + result = np.mod(result + 180, 360) - 180 - # Convert to requested range - if range_360: - return result - # Convert to [-180, 180) - return np.mod(result + 180, 360) - 180 \ No newline at end of file + return result From a0c04636dd82b74005036feefbb431e9e90887c7 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:53:05 +0100 Subject: [PATCH 24/34] Update northing.py --- wind_up/northing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wind_up/northing.py b/wind_up/northing.py index 632673b..17ccb4d 100644 --- a/wind_up/northing.py +++ b/wind_up/northing.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean, rolling_circ_median +from wind_up.circular_math import circ_diff, circ_median, rolling_circ_median_approx from wind_up.constants import ( RAW_YAWDIR_COL, REANALYSIS_WD_COL, @@ -30,7 +30,7 @@ def _add_rolling_northing_error(wf_df: pd.DataFrame, *, north_ref_wd_col: str, t wf_df.loc[~(wf_df["WindSpeedMean"] >= ws_ll), "apparent_northing_error"] = np.nan rolling_days = 20 rows_per_day = 24 * 3600 / timebase_s - wf_df["rolling_northing_error"] = rolling_circ_median( + wf_df["rolling_northing_error"] = rolling_circ_median_approx( wf_df["apparent_northing_error"], window=round(rolling_days * rows_per_day), min_periods=round(rolling_days * rows_per_day // 3), From 5f602a18b003a80dcebdf3a71e3a9584b5619b41 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:53:34 +0100 Subject: [PATCH 25/34] Update optimize_northing.py --- wind_up/optimize_northing.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index d6b14dd..1da2977 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -14,7 +14,7 @@ from scipy.stats import circmean from wind_up.backporting import strict_zip -from wind_up.circular_math import circ_diff, circ_median, rolling_circ_mean +from wind_up.circular_math import circ_diff, circ_median, rolling_circ_median_approx from wind_up.constants import ( RAW_POWER_COL, RAW_YAWDIR_COL, @@ -175,7 +175,7 @@ def _add_northed_ok_diff_and_rolling_cols( wtg_df = _add_northing_ok_and_diff_cols(wtg_df, north_ref_wd_col=north_ref_wd_col, northed_col=northed_col) rolling_hours = 6 rows_per_hour = 3600 / timebase_s - wtg_df[f"short_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_mean( + wtg_df[f"short_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_median_approx( wtg_df[f"filt_diff_to_{north_ref_wd_col}"], center=True, window=round(rolling_hours * rows_per_hour), @@ -183,7 +183,7 @@ def _add_northed_ok_diff_and_rolling_cols( range_360=False, ) rolling_hours = 15 * 24 - wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_mean( + wtg_df[f"long_rolling_diff_to_{north_ref_wd_col}"] = rolling_circ_median_approx( wtg_df[f"filt_diff_to_{north_ref_wd_col}"], center=True, window=round(rolling_hours * rows_per_hour), @@ -194,14 +194,8 @@ def _add_northed_ok_diff_and_rolling_cols( def _calc_good_north_offset(section_df: pd.DataFrame, north_ref_wd_col: str) -> float: - # cheap and cheerful circ median north_errors = section_df[f"filt_diff_to_{north_ref_wd_col}"] - north_errors_circmean = circmean(north_errors, high=180, low=-180, nan_policy="omit") - ## rotate the data so 0 represents the circmean, calculate median, then rotate back - north_errors_zero_centred = (north_errors - north_errors_circmean + 180) % 360 - 180 - circmedian = (np.nanmedian(north_errors_zero_centred) + north_errors_circmean + 180) % 360 - 180 - # return negative circmedian (the offset to add to make median north error 0) - return -circmedian + return -circ_median(north_errors) def _calc_north_offset_col( From cbdf5cb1a76fb6f54464cb8e2bcf4f1b9c32cffe Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 08:56:57 +0100 Subject: [PATCH 26/34] Update test_northing.py --- tests/test_northing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_northing.py b/tests/test_northing.py index a2a433f..2a1c85f 100644 --- a/tests/test_northing.py +++ b/tests/test_northing.py @@ -39,10 +39,10 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None: abs_north_errs_after_northing = _calc_max_abs_north_errs( wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s ) - assert abs_north_errs_before_northing.min() == pytest.approx(7.978039620049401) - assert abs_north_errs_before_northing.max() == pytest.approx(7.978039620049401) - assert abs_north_errs_after_northing["LSA_T07"] == pytest.approx(18.130420466185228) - assert abs_north_errs_after_northing["LSA_T09"] == pytest.approx(126.70998424500277) - assert abs_north_errs_after_northing["LSA_T12"] == pytest.approx(164.815273171483) - assert abs_north_errs_after_northing["LSA_T13"] == pytest.approx(10.978039620049401) - assert abs_north_errs_after_northing["LSA_T14"] == pytest.approx(12.130420466185228) + assert abs_north_errs_before_northing.min() == pytest.approx(7.911393667045218) + assert abs_north_errs_before_northing.max() == pytest.approx(7.911393667045218) + assert abs_north_errs_after_northing["LSA_T07"] == pytest.approx(18.402401473162058) + assert abs_north_errs_after_northing["LSA_T09"] == pytest.approx(174.10443861846687) + assert abs_north_errs_after_northing["LSA_T12"] == pytest.approx(172.59341754958666) + assert abs_north_errs_after_northing["LSA_T13"] == pytest.approx(10.894109266368332) + assert abs_north_errs_after_northing["LSA_T14"] == pytest.approx(12.41110384502656) From 72d005ee2820f6dc347523bb4837876c8ecfb461 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 09:08:10 +0100 Subject: [PATCH 27/34] try to fix lint issues --- tests/test_optimize_northing.py | 1 + tests/test_rolling_circ_mean.py | 2 +- tests/test_rolling_circ_median.py | 3 ++- wind_up/optimize_northing.py | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py index a04343d..1c02e54 100644 --- a/tests/test_optimize_northing.py +++ b/tests/test_optimize_northing.py @@ -113,6 +113,7 @@ def test_clip_wtg_north_table_entry_after_start() -> None: ] +@pytest.mark.slow @pytest.mark.parametrize(("wind_direction_offset"), wind_direction_offsets) def test_auto_northing_corrections(test_homer_config: WindUpConfig, wind_direction_offset: float) -> None: cfg = test_homer_config diff --git a/tests/test_rolling_circ_mean.py b/tests/test_rolling_circ_mean.py index fd15dd2..4d14b44 100644 --- a/tests/test_rolling_circ_mean.py +++ b/tests/test_rolling_circ_mean.py @@ -90,7 +90,7 @@ def scipy_method() -> float: assert_series_equal(new_method(), scipy_method()) # compare speed of new vs scipy_method - number_of_runs = 5 + number_of_runs = 10 rolling_circ_mean_time = timeit.timeit(new_method, number=number_of_runs) apply_scipy_circmean_time = timeit.timeit( scipy_method, diff --git a/tests/test_rolling_circ_median.py b/tests/test_rolling_circ_median.py index 242d932..73b263a 100644 --- a/tests/test_rolling_circ_median.py +++ b/tests/test_rolling_circ_median.py @@ -45,6 +45,7 @@ def test_rolling_circ_median_all_nans() -> None: assert_series_equal(result, expected) +@pytest.mark.slow def test_rolling_circ_median_performance() -> None: # Generate a large dataset n_rows = 10_000 @@ -85,7 +86,7 @@ def exact_method() -> float: assert_series_equal(new_method_results, exact_method_results, atol=360) # big atol to avoid wrap error # check speed - number_of_runs = 5 + number_of_runs = 10 rolling_circ_median_time = timeit.timeit(new_method, number=number_of_runs) apply_exact_time = timeit.timeit( exact_method, diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index 1da2977..fde9535 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -195,7 +195,7 @@ def _add_northed_ok_diff_and_rolling_cols( def _calc_good_north_offset(section_df: pd.DataFrame, north_ref_wd_col: str) -> float: north_errors = section_df[f"filt_diff_to_{north_ref_wd_col}"] - return -circ_median(north_errors) + return -float(circ_median(north_errors)) def _calc_north_offset_col( From 9f13f81cf18f1465633469c6a32f97f2ae90c4cd Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 10:04:57 +0100 Subject: [PATCH 28/34] Update optimize_northing.py --- wind_up/optimize_northing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wind_up/optimize_northing.py b/wind_up/optimize_northing.py index fde9535..fc635fc 100644 --- a/wind_up/optimize_northing.py +++ b/wind_up/optimize_northing.py @@ -195,7 +195,7 @@ def _add_northed_ok_diff_and_rolling_cols( def _calc_good_north_offset(section_df: pd.DataFrame, north_ref_wd_col: str) -> float: north_errors = section_df[f"filt_diff_to_{north_ref_wd_col}"] - return -float(circ_median(north_errors)) + return -float(circ_median(north_errors, range_360=False)) def _calc_north_offset_col( From a32f659946ebdf58f2482c92adcc922ab37811da Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 10:08:55 +0100 Subject: [PATCH 29/34] Update test_rolling_circ_median.py --- tests/test_rolling_circ_median.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_rolling_circ_median.py b/tests/test_rolling_circ_median.py index 73b263a..63fcca6 100644 --- a/tests/test_rolling_circ_median.py +++ b/tests/test_rolling_circ_median.py @@ -30,6 +30,10 @@ def test_rolling_circ_median(*, range_360: bool) -> None: if not range_360: expected = ((expected + 180) % 360) - 180 assert_series_equal(result, expected, atol=0.1) + residuals = circ_diff(result, expected) + if any(~residuals.isna()): + assert residuals.abs().max() < 1e-1 + assert_series_equal(result, expected, atol=360) # big atol to avoid wrap error def test_rolling_circ_median_all_nans() -> None: From c1857bb2cefaeb2ea782734a5b99f4f1f26053d2 Mon Sep 17 00:00:00 2001 From: Alex Clerc Date: Fri, 17 Oct 2025 10:30:07 +0100 Subject: [PATCH 30/34] Update smarteole_example.ipynb --- examples/smarteole_example.ipynb | 1040 +++++++++++++++--------------- 1 file changed, 533 insertions(+), 507 deletions(-) diff --git a/examples/smarteole_example.ipynb b/examples/smarteole_example.ipynb index 268d025..6dffe0b 100644 --- a/examples/smarteole_example.ipynb +++ b/examples/smarteole_example.ipynb @@ -2,14 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, "id": "23b38e8c-78c6-4d1c-8ae7-53dc12328bd8", - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-17T09:12:24.153070Z", + "start_time": "2025-10-17T09:12:23.914101Z" + } + }, "source": [ "%load_ext autoreload\n", "%autoreload 2" - ] + ], + "outputs": [], + "execution_count": 1 }, { "cell_type": "markdown", @@ -26,20 +31,32 @@ }, { "cell_type": "code", - "execution_count": 2, "id": "6bc89c1a-0b7d-4b2e-beb7-b6cdcfa627af", "metadata": { "ExecuteTime": { - "end_time": "2025-04-03T15:41:04.780969Z", - "start_time": "2025-04-03T15:41:03.265486Z" + "end_time": "2025-10-17T09:12:25.342269Z", + "start_time": "2025-10-17T09:12:24.871220Z" } }, - "outputs": [], "source": [ "from helpers import setup_logger\n", "\n", + "import wind_up\n", + "\n", + "msg = f\"wind-up version: {wind_up.__version__}\"\n", + "print(msg)\n", "setup_logger()" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "wind-up version: 0.4.4\n" + ] + } + ], + "execution_count": 2 }, { "cell_type": "markdown", @@ -51,23 +68,13 @@ }, { "cell_type": "code", - "execution_count": 3, "id": "de9489f8-f258-44fc-ad7d-63f23714299a", "metadata": { "ExecuteTime": { - "end_time": "2025-04-03T15:41:04.830323Z", - "start_time": "2025-04-03T15:41:04.793728Z" + "end_time": "2025-10-17T09:12:25.551209Z", + "start_time": "2025-10-17T09:12:25.397174Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "All filenames are locally cached, no download necessary.\n" - ] - } - ], "source": [ "from helpers import download_zenodo_data\n", "\n", @@ -82,7 +89,17 @@ " filenames=[filename],\n", " cache_overwrite=False, # change to True if you want to force the re-download\n", ")" - ] + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "All filenames are locally cached, no download necessary.\n" + ] + } + ], + "execution_count": 3 }, { "cell_type": "markdown", @@ -94,15 +111,13 @@ }, { "cell_type": "code", - "execution_count": 4, "id": "713a7c25-b340-4851-b8fe-d2ca96571eb6", "metadata": { "ExecuteTime": { - "end_time": "2025-04-03T15:41:15.978251Z", - "start_time": "2025-04-03T15:41:06.047099Z" + "end_time": "2025-10-17T09:12:30.427538Z", + "start_time": "2025-10-17T09:12:25.593925Z" } }, - "outputs": [], "source": [ "from smarteole_utils import SmartEoleExtractor\n", "\n", @@ -112,7 +127,9 @@ " zipped_fpath=outpur_dir / filename,\n", " analysis_timebase_s=analysis_timebase_s,\n", ")" - ] + ], + "outputs": [], + "execution_count": 4 }, { "cell_type": "markdown", @@ -124,17 +141,45 @@ }, { "cell_type": "code", - "execution_count": 5, "id": "fa97d149-d877-42d0-9bbd-465f5d54a04c", "metadata": { "ExecuteTime": { - "end_time": "2025-04-03T15:41:16.312261Z", - "start_time": "2025-04-03T15:41:15.997230Z" + "end_time": "2025-10-17T09:12:30.798752Z", + "start_time": "2025-10-17T09:12:30.447644Z" } }, + "source": [ + "scada_df = extractor.unpack_smarteole_scada()\n", + "scada_df.head(3)" + ], "outputs": [ { "data": { + "text/plain": [ + " TurbineName ActivePowerMean ActivePowerSD \\\n", + "TimeStamp_StartFormat \n", + "2020-02-17 16:30:00 SMV1 2017.8566 57.0837 \n", + "2020-02-17 16:40:00 SMV1 1946.2472 91.7614 \n", + "2020-02-17 16:50:00 SMV1 1946.5069 98.3934 \n", + "\n", + " WindSpeedMean WindSpeedSD YawAngleMean YawAngleMin \\\n", + "TimeStamp_StartFormat \n", + "2020-02-17 16:30:00 13.6164 1.2411 247.884662 239.930 \n", + "2020-02-17 16:40:00 13.0139 1.0178 259.686000 259.686 \n", + "2020-02-17 16:50:00 12.5818 0.9482 259.686000 259.686 \n", + "\n", + " YawAngleMax PitchAngleMean GenRpmMean AmbientTemp \\\n", + "TimeStamp_StartFormat \n", + "2020-02-17 16:30:00 259.686 6.0648 1800.5394 11.6191 \n", + "2020-02-17 16:40:00 259.686 3.5153 1798.7652 11.5493 \n", + "2020-02-17 16:50:00 259.686 2.1367 1799.5171 11.4929 \n", + "\n", + " ShutdownDuration \n", + "TimeStamp_StartFormat \n", + "2020-02-17 16:30:00 0 \n", + "2020-02-17 16:40:00 0 \n", + "2020-02-17 16:50:00 0 " + ], "text/html": [ "
\n", "