diff --git a/README.md b/README.md index be03043..cb91a3d 100644 --- a/README.md +++ b/README.md @@ -281,47 +281,57 @@ Benchmarked on Apple M-series (ARM), window = 100, n = 1 000 000. ### Python vs pandas -Best robustrolling configuration vs pandas (¹ `assume_finite=True`, ² `method="fast"`). - -| Function | robustrolling | pandas | speedup | -| ------------------------ | ------------- | --------- | --------- | -| `rolling_mean` ¹ | 0.78 ms | 4.58 ms | **5.9x** | -| `rolling_max` | 11.5 ms | 12.3 ms | 1.1x | -| `rolling_min` | 11.5 ms | 12.7 ms | 1.1x | -| `rolling_median` | 111 ms | 233 ms | **2.1x** | -| `rolling_variance` ² | 4.4 ms | 10.6 ms | **2.4x** | -| `rolling_skewness` ² | 10.9 ms | 10.1 ms | ~1.0x | -| `rolling_kurtosis` ² | 8.4 ms | 10.0 ms | 1.2x | -| `rolling_cov` | 16.8 ms | 19.3 ms | 1.2x | -| `rolling_cor` | 16.8 ms | 39.6 ms | **2.4x** | +| Function | robustrolling | pandas | speedup | +| -------------------- | ------------- | -------- | -------- | +| `rolling_mean` | 3.1 ms | 4.4 ms | **1.4x** | +| `rolling_max` | 11.1 ms | 11.7 ms | 1.1x | +| `rolling_min` | 11.2 ms | 12.2 ms | 1.1x | +| `rolling_median` | 106 ms | 233 ms | **2.2x** | +| `rolling_variance` | 15.2 ms | 9.6 ms | 0.6x | +| `rolling_skewness` | 14.0 ms | 9.1 ms | 0.6x | +| `rolling_kurtosis` | 14.3 ms | 9.2 ms | 0.6x | +| `rolling_cov` | 14.8 ms | 18.2 ms | **1.2x** | +| `rolling_cor` | 14.6 ms | 36.7 ms | **2.5x** | + +### Python vs Polars + +| Function | robustrolling | Polars | speedup | +| -------------------- | ------------- | -------- | -------- | +| `rolling_mean` | 3.1 ms | 8.0 ms | **2.6x** | +| `rolling_max` | 11.1 ms | 11.4 ms | 1.0x | +| `rolling_min` | 11.0 ms | 11.6 ms | 1.1x | +| `rolling_median` | 106 ms | 40.8 ms | 0.4x | +| `rolling_variance` | 15.7 ms | 16.2 ms | 1.0x | +| `rolling_skewness` | 13.9 ms | 16.0 ms | **1.2x** | +| `rolling_kurtosis` | 14.3 ms | 15.6 ms | 1.1x | ### Python — stable vs fast | Function | stable | fast | speedup | | ---------------------- | -------- | -------- | -------- | -| `mean` (assume_finite) | 3.5 ms | 0.78 ms | **4.4x** | -| `variance` | 16.1 ms | 4.4 ms | **3.7x** | -| `skewness` | 23.9 ms | 10.9 ms | **2.2x** | -| `kurtosis` | 21.7 ms | 8.4 ms | **2.6x** | +| `mean` (assume_finite) | 3.2 ms | 0.73 ms | **4.4x** | +| `variance` | 15.2 ms | 3.9 ms | **3.9x** | +| `skewness` | 13.9 ms | 10.0 ms | **1.4x** | +| `kurtosis` | 14.4 ms | 7.6 ms | **1.9x** | ### R vs slider vs RcppRoll | Function | robustrolling | slider | RcppRoll | vs slider | vs RcppRoll | | -------------------- | ------------- | ---------- | --------- | ---------- | ----------- | -| `rolling_max` | 15.9 ms | 349 ms | 181 ms | **22x** | **11x** | -| `rolling_min` | 15.2 ms | 353 ms | 181 ms | **23x** | **12x** | -| `rolling_mean` | 3.2 ms | 1 558 ms | 39.0 ms | **495x** | **12x** | -| `rolling_variance` | 16.9 ms | 2 578 ms | 320 ms | **152x** | **19x** | -| `rolling_median` | 114 ms | 10 254 ms | 2 014 ms | **90x** | **18x** | +| `rolling_max` | 15.1 ms | 338 ms | 175 ms | **22x** | **12x** | +| `rolling_min` | 14.9 ms | 350 ms | 175 ms | **24x** | **12x** | +| `rolling_mean` | 3.1 ms | 1 523 ms | 37.4 ms | **487x** | **12x** | +| `rolling_variance` | 16.0 ms | 2 477 ms | 304 ms | **154x** | **19x** | +| `rolling_median` | 112 ms | 10 084 ms | 1 938 ms | **90x** | **17x** | ### R — stable vs fast | Function | stable | fast | speedup | | ---------------------- | -------- | -------- | -------- | -| `mean` (assume_finite) | 3.3 ms | 0.80 ms | **4.2x** | -| `variance` | 16.8 ms | 4.4 ms | **3.9x** | -| `skewness` | 21.9 ms | 10.6 ms | **2.1x** | -| `kurtosis` | 21.6 ms | 8.3 ms | **2.6x** | +| `mean` (assume_finite) | 3.2 ms | 0.78 ms | **4.0x** | +| `variance` | 16.2 ms | 4.1 ms | **4.0x** | +| `skewness` | 14.5 ms | 10.3 ms | **1.4x** | +| `kurtosis` | 14.4 ms | 7.8 ms | **1.8x** | --- diff --git a/benchmarks/bench_core.cxx b/benchmarks/bench_core.cxx index 513351b..ea10cdd 100644 --- a/benchmarks/bench_core.cxx +++ b/benchmarks/bench_core.cxx @@ -1,37 +1,151 @@ +#include "MonotonicMax.hpp" +#include "MonotonicMin.hpp" #include "MultisetMedian.hpp" +#include "SlidingCovariance.hpp" +#include "SlidingMean.hpp" +#include "SlidingMoments.hpp" +#include "SlidingWelfordRing.hpp" #include #include #include #include -std::vector generate_market_data(std::size_t size) { - std::vector data(size); +static std::vector make_data(std::size_t n, double nan_frac = 0.0) { std::mt19937 gen(42); std::normal_distribution dist(100.0, 5.0); + std::uniform_real_distribution coin(0.0, 1.0); + std::vector v(n); + for (auto &x : v) + x = (coin(gen) < nan_frac) ? std::numeric_limits::quiet_NaN() + : dist(gen); + return v; +} + +const auto DATA_CLEAN = make_data(100'000); +const auto DATA_NAN = make_data(100'000, 0.15); // 15% NaN + +static void BM_MonotonicMax(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + MonotonicMax engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_max()); + } + } +} +BENCHMARK(BM_MonotonicMax)->Arg(10)->Arg(100)->Arg(1000); + +static void BM_MonotonicMin(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + MonotonicMin engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_min()); + } + } +} +BENCHMARK(BM_MonotonicMin)->Arg(10)->Arg(100)->Arg(1000); + +static void BM_MultisetMedian(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + MultisetMedian engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_median()); + } + } +} +BENCHMARK(BM_MultisetMedian)->Arg(10)->Arg(100)->Arg(1000); + +static void BM_SlidingMean(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + SlidingMean engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_mean()); + } + } +} +BENCHMARK(BM_SlidingMean)->Arg(10)->Arg(100)->Arg(1000); + +static void BM_SlidingWelfordRing(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + SlidingWelfordRing engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_variance()); + } + } +} +BENCHMARK(BM_SlidingWelfordRing)->Arg(10)->Arg(100)->Arg(1000); - for (std::size_t i = 0; i < size; ++i) { - data[i] = dist(gen); +static void BM_SlidingMoments(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + SlidingMoments engine(w); + for (double v : DATA_CLEAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_skewness()); + benchmark::DoNotOptimize(engine.get_kurtosis()); + } } - return data; } +BENCHMARK(BM_SlidingMoments)->Arg(10)->Arg(100)->Arg(1000); -const auto MARKET_DATA = generate_market_data(100000); +static std::pair, std::vector> +make_pair_data(std::size_t n) { + std::mt19937 gen(99); + std::normal_distribution dist(0.0, 1.0); + std::vector x(n), y(n); + for (std::size_t i = 0; i < n; ++i) { + x[i] = dist(gen); + y[i] = dist(gen); + } + return {x, y}; +} -template -static void BM_RollingMedian(benchmark::State &state) { - std::size_t window_size = static_cast(state.range(0)); +const auto [COV_X, COV_Y] = make_pair_data(100'000); +static void BM_SlidingCovariance(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); for (auto _ : state) { - MedianAlgo engine(window_size); + SlidingCovariance engine(w); + for (std::size_t i = 0; i < COV_X.size(); ++i) { + engine.update(COV_X[i], COV_Y[i]); + benchmark::DoNotOptimize(engine.get_covariance()); + } + } +} +BENCHMARK(BM_SlidingCovariance)->Arg(10)->Arg(100)->Arg(1000); - for (double price : MARKET_DATA) { - engine.update(price); +static void BM_MultisetMedian_NaN(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + MultisetMedian engine(w); + for (double v : DATA_NAN) { + if (std::isnan(v)) + engine.skip(); + else + engine.update(v); benchmark::DoNotOptimize(engine.get_median()); } } } +BENCHMARK(BM_MultisetMedian_NaN)->Arg(100); -BENCHMARK_TEMPLATE(BM_RollingMedian, MultisetMedian) - ->Arg(10) - ->Arg(100) - ->Arg(1000); \ No newline at end of file +static void BM_SlidingMoments_NaN(benchmark::State &state) { + std::size_t w = static_cast(state.range(0)); + for (auto _ : state) { + SlidingMoments engine(w); + for (double v : DATA_NAN) { + engine.update(v); + benchmark::DoNotOptimize(engine.get_skewness()); + } + } +} +BENCHMARK(BM_SlidingMoments_NaN)->Arg(100); diff --git a/benchmarks/bench_polars.py b/benchmarks/bench_polars.py index 73fc55f..e149adf 100644 --- a/benchmarks/bench_polars.py +++ b/benchmarks/bench_polars.py @@ -1,5 +1,5 @@ """ -Benchmark: robustrolling vs Polars rolling functions + stable vs fast. +Benchmark: robustrolling vs Polars rolling functions (stable methods only). Usage: pip install polars @@ -20,6 +20,7 @@ def bench(fn, reps: int = REPS) -> float: """Return median wall time in milliseconds over `reps` runs.""" + fn() # warmup: prime caches before timing times = [] for _ in range(reps): t0 = time.perf_counter() @@ -30,14 +31,11 @@ def bench(fn, reps: int = REPS) -> float: def make_data(n: int): x = RNG.standard_normal(n) - y = RNG.standard_normal(n) - sx = pl.Series(x) - sy = pl.Series(y) - return x, y, sx, sy + return x, pl.Series(x) def run_vs_polars(n: int) -> list[dict]: - x, y, sx, sy = make_data(n) + x, sx = make_data(n) w = WINDOW cases = [ @@ -59,31 +57,11 @@ def run_vs_polars(n: int) -> list[dict]: return results -def run_fast_vs_polars(n: int) -> list[dict]: - x, _y, sx, _sy = make_data(n) - w = WINDOW - - cases = [ - ("rolling_mean (SIMD)", lambda: rr.rolling_mean(x, w, assume_finite=True), lambda: sx.rolling_mean(w)), - ("rolling_variance (fast)", lambda: rr.rolling_variance(x, w, method="fast"), lambda: sx.rolling_var(w)), - ("rolling_skewness (fast)", lambda: rr.rolling_skewness(x, w, method="fast"), lambda: sx.rolling_skew(w)), - ("rolling_kurtosis (fast)", lambda: rr.rolling_kurtosis(x, w, method="fast"), lambda: sx.rolling_kurtosis(w)), - ] - - results = [] - for name, our_fn, pl_fn in cases: - our_ms = bench(our_fn) - pl_ms = bench(pl_fn) - results.append({"name": name, "our_ms": our_ms, "pl_ms": pl_ms, - "speedup": pl_ms / our_ms}) - return results - - def flag(v: float) -> str: return "x" if v >= 1.0 else " " -def print_table(n: int, rows: list[dict], label: str) -> None: +def print_table(n: int, rows: list[dict]) -> None: print(f"\n n = {n:,} window = {WINDOW} (median of {REPS} runs)") print(f" {'Function':<28} {'robustrolling':>14} {'polars':>10} {'speedup':>9}") print(" " + "-" * 65) @@ -98,15 +76,8 @@ def print_table(n: int, rows: list[dict], label: str) -> None: if __name__ == "__main__": print(f"robustrolling vs Polars {pl.__version__} — rolling window benchmark") print("=" * 65) - - print("\n--- stable (default) methods vs Polars ---") for n in SIZES: rows = run_vs_polars(n) - print_table(n, rows, "stable") - - print("\n\n--- fast methods vs Polars ---") - for n in SIZES: - rows = run_fast_vs_polars(n) - print_table(n, rows, "fast") + print_table(n, rows) print() diff --git a/benchmarks/bench_python.py b/benchmarks/bench_python.py index 7c36b9f..b5c8a62 100644 --- a/benchmarks/bench_python.py +++ b/benchmarks/bench_python.py @@ -20,6 +20,7 @@ def bench(fn, reps: int = REPS) -> float: """Return median wall time in milliseconds over `reps` runs.""" + fn() # warmup: prime caches before timing times = [] for _ in range(reps): t0 = time.perf_counter() diff --git a/benchmarks/bench_r.R b/benchmarks/bench_r.R index 85641bf..0eeb9db 100644 --- a/benchmarks/bench_r.R +++ b/benchmarks/bench_r.R @@ -70,6 +70,26 @@ run_vs_libs <- function(n) { do.call(rbind, rows) } +run_unique_metrics <- function(n) { + x <- as.double(rnorm(n)) + y <- as.double(rnorm(n)) + w <- WINDOW + + cases <- list( + rolling_skewness = list(robustrolling = function() rolling_skewness(x, w)), + rolling_kurtosis = list(robustrolling = function() rolling_kurtosis(x, w)), + rolling_cov = list(robustrolling = function() rolling_cov(x, y, w)), + rolling_cor = list(robustrolling = function() rolling_cor(x, y, w)) + ) + + rows <- lapply(names(cases), function(nm) { + meds <- med_ms(cases[[nm]]) + data.frame(name = nm, our_ms = meds[["robustrolling"]], + stringsAsFactors = FALSE) + }) + do.call(rbind, rows) +} + run_stable_vs_fast <- function(n) { x <- as.double(rnorm(n)) w <- WINDOW @@ -125,6 +145,17 @@ print_vs_libs <- function(n, df) { } } +print_unique_metrics <- function(n, df) { + cat(sprintf("\n n = %s window = %d (median of %d runs)\n", + fmt_n(n), WINDOW, REPS)) + cat(sprintf(" %-20s %14s\n", "Function", "robustrolling")) + cat(" ", strrep("-", 36), "\n", sep = "") + for (i in seq_len(nrow(df))) { + r <- df[i, ] + cat(sprintf(" %-20s %10.2f ms\n", r$name, r$our_ms)) + } +} + print_stable_vs_fast <- function(n, df) { cat(sprintf("\n n = %s window = %d (median of %d runs)\n", fmt_n(n), WINDOW, REPS)) @@ -145,6 +176,13 @@ for (n in SIZES) { print_vs_libs(n, df) } +cat("\n\nunique metrics (no slider/RcppRoll equivalent)\n") +cat(strrep("=", 80), "\n") +for (n in SIZES) { + df <- run_unique_metrics(n) + print_unique_metrics(n, df) +} + cat("\n\nstable vs fast — prefix-sum acceleration\n") cat(strrep("=", 59), "\n") for (n in SIZES) { diff --git a/docs/python/conf.py b/docs/python/conf.py index e68ce02..ecbcb32 100644 --- a/docs/python/conf.py +++ b/docs/python/conf.py @@ -5,7 +5,7 @@ project = "robustrolling" author = "Igor Ptak" -release = "0.1.0" +release = "0.2.0" copyright = "2026, Igor Ptak" extensions = [ diff --git a/docs/python/index.rst b/docs/python/index.rst index f980c66..bdd51af 100644 --- a/docs/python/index.rst +++ b/docs/python/index.rst @@ -59,6 +59,196 @@ Algorithm overview - ``rolling_cov``, ``rolling_cor`` - ``rolling_cov``, ``rolling_cor``, ``SlidingCovariance`` +Performance +----------- + +Benchmarked on Apple M-series (ARM), window = 100, n = 1 000 000 +(median of 10 runs). Best robustrolling configuration shown +(¹ ``assume_finite=True``, ² ``method="fast"``). + +**Python vs pandas** + +.. list-table:: + :header-rows: 1 + :widths: 28 18 14 12 + + * - Function + - robustrolling + - pandas + - speedup + * - ``rolling_mean`` + - 3.1 ms + - 4.4 ms + - **1.4x** + * - ``rolling_max`` + - 11.1 ms + - 11.7 ms + - 1.1x + * - ``rolling_min`` + - 11.2 ms + - 12.2 ms + - 1.1x + * - ``rolling_median`` + - 106 ms + - 233 ms + - **2.2x** + * - ``rolling_variance`` + - 15.2 ms + - 9.6 ms + - 0.6x + * - ``rolling_skewness`` + - 14.0 ms + - 9.1 ms + - 0.6x + * - ``rolling_kurtosis`` + - 14.3 ms + - 9.2 ms + - 0.6x + * - ``rolling_cov`` + - 14.8 ms + - 18.2 ms + - **1.2x** + * - ``rolling_cor`` + - 14.6 ms + - 36.7 ms + - **2.5x** + +**Python vs Polars** + +.. list-table:: + :header-rows: 1 + :widths: 28 18 14 12 + + * - Function + - robustrolling + - Polars + - speedup + * - ``rolling_mean`` + - 3.1 ms + - 8.0 ms + - **2.6x** + * - ``rolling_max`` + - 11.1 ms + - 11.4 ms + - 1.0x + * - ``rolling_min`` + - 11.0 ms + - 11.6 ms + - 1.1x + * - ``rolling_median`` + - 106 ms + - 40.8 ms + - 0.4x + * - ``rolling_variance`` + - 15.7 ms + - 16.2 ms + - 1.0x + * - ``rolling_skewness`` + - 13.9 ms + - 16.0 ms + - **1.2x** + * - ``rolling_kurtosis`` + - 14.3 ms + - 15.6 ms + - 1.1x + +**Python — stable vs fast (prefix-sum acceleration)** + +.. list-table:: + :header-rows: 1 + :widths: 28 14 12 12 + + * - Function + - stable + - fast + - speedup + * - ``mean`` (assume_finite) + - 3.2 ms + - 0.73 ms + - **4.4x** + * - ``variance`` + - 15.2 ms + - 3.9 ms + - **3.9x** + * - ``skewness`` + - 13.9 ms + - 10.0 ms + - **1.4x** + * - ``kurtosis`` + - 14.4 ms + - 7.6 ms + - **1.9x** + +**R vs slider vs RcppRoll** + +.. list-table:: + :header-rows: 1 + :widths: 22 16 14 14 12 14 + + * - Function + - robustrolling + - slider + - RcppRoll + - vs slider + - vs RcppRoll + * - ``rolling_max`` + - 15.1 ms + - 338 ms + - 175 ms + - **22x** + - **12x** + * - ``rolling_min`` + - 14.9 ms + - 350 ms + - 175 ms + - **24x** + - **12x** + * - ``rolling_mean`` + - 3.1 ms + - 1 523 ms + - 37.4 ms + - **487x** + - **12x** + * - ``rolling_variance`` + - 16.0 ms + - 2 477 ms + - 304 ms + - **154x** + - **19x** + * - ``rolling_median`` + - 112 ms + - 10 084 ms + - 1 938 ms + - **90x** + - **17x** + +**R — stable vs fast** + +.. list-table:: + :header-rows: 1 + :widths: 28 14 12 12 + + * - Function + - stable + - fast + - speedup + * - ``mean`` (assume_finite) + - 3.2 ms + - 0.78 ms + - **4.0x** + * - ``variance`` + - 16.2 ms + - 4.1 ms + - **4.0x** + * - ``skewness`` + - 14.5 ms + - 10.3 ms + - **1.4x** + * - ``kurtosis`` + - 14.4 ms + - 7.8 ms + - **1.8x** + Install for R ------------- diff --git a/docs/python/r_api/index.rst b/docs/python/r_api/index.rst index 0788208..d03e34e 100644 --- a/docs/python/r_api/index.rst +++ b/docs/python/r_api/index.rst @@ -5,16 +5,182 @@ All functions accept a numeric vector ``x`` (and ``y`` for bivariate functions), a ``window_size`` integer, and an optional ``min_periods`` parameter compatible with *pandas* semantics. -.. toctree:: - :maxdepth: 1 - :caption: Functions - - rolling_cor - rolling_cov - rolling_kurtosis - rolling_max - rolling_mean - rolling_median - rolling_min - rolling_skewness - rolling_variance +.. function:: rolling_cor(x, y, window_size, min_periods = window_size) + + *Rolling Correlation* — Computes the rolling Pearson correlation between two numeric vectors. + + :param x: A numeric vector of type double. + :param y: A numeric vector of type double, same length as ``x``. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. + :returns: A numeric vector with rolling correlation values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + y <- as.double(c(2, 4, 6, 8, 10)) + rolling_cor(x, y, 3L) + + +---- + +.. function:: rolling_cov(x, y, window_size, min_periods = window_size) + + *Rolling Covariance* — Computes the rolling sample covariance (ddof=1) between two numeric vectors. + + :param x: A numeric vector of type double. + :param y: A numeric vector of type double, same length as ``x``. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. + :returns: A numeric vector with rolling covariance values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + y <- as.double(c(2, 4, 6, 8, 10)) + rolling_cov(x, y, 3L) + + +---- + +.. function:: rolling_kurtosis(x, window_size, min_periods = window_size, method = "stable") + + *Rolling Kurtosis* — Computes the rolling excess kurtosis (Fisher) over a numeric vector. + Requires at least 4 non-``NA`` observations per window. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :param method: ``"stable"`` (default) uses Terriberry's online algorithm. ``"fast"`` uses a prefix-sum approach (faster, but susceptible to catastrophic cancellation when values are large and variance is small). + :returns: A numeric vector with rolling excess kurtosis values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + rolling_kurtosis(x, 4L) + + +---- + +.. function:: rolling_max(x, window_size, min_periods = window_size) + + *Rolling Maximum* — Computes the rolling maximum over a numeric vector using a monotonic deque. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :returns: A numeric vector with rolling maximum values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_max(x, 3L) + + +---- + +.. function:: rolling_mean(x, window_size, min_periods = window_size, assume_finite = FALSE) + + *Rolling Mean* — Computes the rolling mean over a numeric vector. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :param assume_finite: If ``TRUE``, assumes the input contains no ``NA`` values and uses a faster SIMD prefix-sum path. Passing ``TRUE`` when ``NA``s are present produces incorrect results. Defaults to ``FALSE``. + :returns: A numeric vector with rolling mean values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4)) + rolling_mean(x, 3L) + + +---- + +.. function:: rolling_median(x, window_size, min_periods = window_size) + + *Rolling Median* — Computes the rolling median over a numeric vector using an ordered multiset + with a tracked median iterator. Time complexity: O(log n) per element. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :returns: A numeric vector with rolling median values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_median(x, 3L) + + +---- + +.. function:: rolling_min(x, window_size, min_periods = window_size) + + *Rolling Minimum* — Computes the rolling minimum over a numeric vector using a monotonic deque. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :returns: A numeric vector with rolling minimum values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_min(x, 3L) + + +---- + +.. function:: rolling_skewness(x, window_size, min_periods = window_size, method = "stable") + + *Rolling Skewness* — Computes the rolling adjusted Fisher-Pearson skewness over a numeric vector. + Requires at least 3 non-``NA`` observations per window. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + :param method: ``"stable"`` (default) uses Terriberry's online algorithm. ``"fast"`` uses a prefix-sum approach (faster, but susceptible to catastrophic cancellation when values are large and variance is small). + :returns: A numeric vector with rolling skewness values. + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + rolling_skewness(x, 3L) + + +---- + +.. function:: rolling_variance(x, window_size, min_periods = window_size, method = "stable") + + *Rolling Sample Variance* — Computes the rolling sample variance over a numeric vector. + + :param x: A numeric vector of type double. + :param window_size: Positive integer window length. + :param min_periods: Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size`` (pandas semantics). Positions with fewer non-``NA`` values yield ``NA``. + :param method: ``"stable"`` (default) uses the Welford online algorithm. ``"fast"`` uses a prefix-sum approach (faster, but susceptible to catastrophic cancellation when values are large and variance is small). + :returns: A numeric vector with rolling sample variance values. Entries are ``NA`` when fewer than ``min_periods`` non-``NA`` observations are present in the window, and ``NaN`` when variance is undefined (fewer than two values). + + .. rubric:: Example + + .. code-block:: r + + x <- as.double(c(1, 2, 3, 4)) + rolling_variance(x, 3L) + diff --git a/docs/python/r_api/rolling_cor.rst b/docs/python/r_api/rolling_cor.rst deleted file mode 100644 index 4171c2a..0000000 --- a/docs/python/r_api/rolling_cor.rst +++ /dev/null @@ -1,44 +0,0 @@ -Rolling Correlation -=================== - -Computes the rolling Pearson correlation between two numeric vectors. - -Usage ------ - -.. code-block:: r - - rolling_cor(x, y, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``y`` - - A numeric vector of type double, same length as ``x``. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling correlation values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4, 5)) - y <- as.double(c(2, 4, 6, 8, 10)) - rolling_cor(x, y, 3L) - diff --git a/docs/python/r_api/rolling_cov.rst b/docs/python/r_api/rolling_cov.rst deleted file mode 100644 index 183ac87..0000000 --- a/docs/python/r_api/rolling_cov.rst +++ /dev/null @@ -1,44 +0,0 @@ -Rolling Covariance -================== - -Computes the rolling sample covariance (ddof=1) between two numeric vectors. - -Usage ------ - -.. code-block:: r - - rolling_cov(x, y, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``y`` - - A numeric vector of type double, same length as ``x``. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling covariance values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4, 5)) - y <- as.double(c(2, 4, 6, 8, 10)) - rolling_cov(x, y, 3L) - diff --git a/docs/python/r_api/rolling_kurtosis.rst b/docs/python/r_api/rolling_kurtosis.rst deleted file mode 100644 index 47adddb..0000000 --- a/docs/python/r_api/rolling_kurtosis.rst +++ /dev/null @@ -1,42 +0,0 @@ -Rolling Kurtosis -================ - -Computes the rolling excess kurtosis (Fisher) over a numeric vector. -Requires at least 4 non-``NA`` observations per window. - -Usage ------ - -.. code-block:: r - - rolling_kurtosis(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling excess kurtosis values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4, 5)) - rolling_kurtosis(x, 4L) - diff --git a/docs/python/r_api/rolling_max.rst b/docs/python/r_api/rolling_max.rst deleted file mode 100644 index 29d9024..0000000 --- a/docs/python/r_api/rolling_max.rst +++ /dev/null @@ -1,41 +0,0 @@ -Rolling Maximum -=============== - -Computes the rolling maximum over a numeric vector using a monotonic deque. - -Usage ------ - -.. code-block:: r - - rolling_max(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling maximum values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 3, 2, 5, 4)) - rolling_max(x, 3L) - diff --git a/docs/python/r_api/rolling_mean.rst b/docs/python/r_api/rolling_mean.rst deleted file mode 100644 index eae9838..0000000 --- a/docs/python/r_api/rolling_mean.rst +++ /dev/null @@ -1,41 +0,0 @@ -Rolling Mean -============ - -Computes the rolling mean over a numeric vector. - -Usage ------ - -.. code-block:: r - - rolling_mean(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling mean values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4)) - rolling_mean(x, 3L) - diff --git a/docs/python/r_api/rolling_median.rst b/docs/python/r_api/rolling_median.rst deleted file mode 100644 index e83bc42..0000000 --- a/docs/python/r_api/rolling_median.rst +++ /dev/null @@ -1,42 +0,0 @@ -Rolling Median -============== - -Computes the rolling median over a numeric vector using an ordered multiset -with a tracked median iterator. Time complexity: O(log n) per element. - -Usage ------ - -.. code-block:: r - - rolling_median(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling median values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 3, 2, 5, 4)) - rolling_median(x, 3L) - diff --git a/docs/python/r_api/rolling_min.rst b/docs/python/r_api/rolling_min.rst deleted file mode 100644 index 7a48358..0000000 --- a/docs/python/r_api/rolling_min.rst +++ /dev/null @@ -1,41 +0,0 @@ -Rolling Minimum -=============== - -Computes the rolling minimum over a numeric vector using a monotonic deque. - -Usage ------ - -.. code-block:: r - - rolling_min(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling minimum values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 3, 2, 5, 4)) - rolling_min(x, 3L) - diff --git a/docs/python/r_api/rolling_skewness.rst b/docs/python/r_api/rolling_skewness.rst deleted file mode 100644 index a2e5e3c..0000000 --- a/docs/python/r_api/rolling_skewness.rst +++ /dev/null @@ -1,42 +0,0 @@ -Rolling Skewness -================ - -Computes the rolling adjusted Fisher-Pearson skewness over a numeric vector. -Requires at least 3 non-``NA`` observations per window. - -Usage ------ - -.. code-block:: r - - rolling_skewness(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. - -Returns -------- - -A numeric vector with rolling skewness values. - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4, 5)) - rolling_skewness(x, 3L) - diff --git a/docs/python/r_api/rolling_variance.rst b/docs/python/r_api/rolling_variance.rst deleted file mode 100644 index 71fefdc..0000000 --- a/docs/python/r_api/rolling_variance.rst +++ /dev/null @@ -1,44 +0,0 @@ -Rolling Sample Variance -======================= - -Computes the rolling sample variance over a numeric vector. - -Usage ------ - -.. code-block:: r - - rolling_variance(x, window_size, min_periods = window_size) - -Parameters ----------- - -.. list-table:: - :header-rows: 1 - :widths: 20 80 - - * - Parameter - - Description - * - ``x`` - - A numeric vector of type double. - * - ``window_size`` - - Positive integer window length. - * - ``min_periods`` - - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size`` (pandas semantics). Positions with fewer non-``NA`` values yield ``NA``. - -Returns -------- - -A numeric vector with rolling sample variance values. Entries are -``NA`` when fewer than ``min_periods`` non-``NA`` observations are -present in the window, and ``NaN`` when variance is undefined (fewer -than two values). - -Examples --------- - -.. code-block:: r - - x <- as.double(c(1, 2, 3, 4)) - rolling_variance(x, 3L) - diff --git a/docs/rd_to_rst.py b/docs/rd_to_rst.py index c4a0f86..7982629 100644 --- a/docs/rd_to_rst.py +++ b/docs/rd_to_rst.py @@ -96,52 +96,45 @@ def _extract_items(content: str, tag: str) -> list[tuple[str, str]]: return items +def _indent(text: str, prefix: str) -> str: + """Indent every line of text with prefix.""" + lines = text.splitlines() + return "\n".join(prefix + l if l.strip() else "" for l in lines) + + def rd_to_rst(rd_path: Path) -> str: content = rd_path.read_text(encoding="utf-8") title = _strip_rd(_extract(content, "title")) description = _strip_rd(_extract(content, "description")) - usage = _extract(content, "usage").strip() - value = _strip_rd(_extract(content, "value")) + usage = " ".join(_extract(content, "usage").split()) # collapse to one line + value = " ".join(_strip_rd(_extract(content, "value")).split()) # collapse to one line examples_raw = _extract(content, "examples").strip() args = _extract_items(content, "arguments") - rst = f"{title}\n{'=' * len(title)}\n\n" - - rst += f"{description}\n\n" + rst = "" if usage: - rst += "Usage\n-----\n\n" - rst += f".. code-block:: r\n\n" - for line in usage.splitlines(): - rst += f" {line}\n" - rst += "\n" - - if args: - rst += "Parameters\n----------\n\n" - rst += ".. list-table::\n" - rst += " :header-rows: 1\n" - rst += " :widths: 20 80\n\n" - rst += " * - Parameter\n" - rst += " - Description\n" - for name, desc in args: - cleaned = _strip_rd(desc).replace("\n", " ") - rst += f" * - ``{name}``\n" - rst += f" - {cleaned}\n" - rst += "\n" + rst += f".. function:: {usage}\n\n" + + rst += _indent(f"*{title}* — {description}" if title else description, " ") + "\n\n" + + for name, desc in args: + cleaned = " ".join(_strip_rd(desc).split()) # collapse to one line + rst += f" :param {name}: {cleaned}\n" if value: - rst += "Returns\n-------\n\n" - rst += f"{value}\n\n" + rst += f" :returns: {value}\n" if examples_raw: lines = [l for l in examples_raw.splitlines() if not l.strip().startswith("%")] example_code = "\n".join(lines).strip() if example_code: - rst += "Examples\n--------\n\n" - rst += ".. code-block:: r\n\n" + rst += "\n" + rst += " .. rubric:: Example\n\n" + rst += " .. code-block:: r\n\n" for line in example_code.splitlines(): - rst += f" {line}\n" + rst += f" {line}\n" rst += "\n" return rst @@ -150,28 +143,26 @@ def rd_to_rst(rd_path: Path) -> str: def main() -> None: OUT_DIR.mkdir(parents=True, exist_ok=True) rd_files = sorted(MAN_DIR.glob("*.Rd")) - # skip package-level Rd rd_files = [f for f in rd_files if not f.stem.endswith("-package")] - names: list[str] = [] - for rd in rd_files: - rst_content = rd_to_rst(rd) - out_path = OUT_DIR / f"{rd.stem}.rst" - out_path.write_text(rst_content, encoding="utf-8") - names.append(rd.stem) - print(f" {rd.name} → {out_path.relative_to(ROOT)}") + # Remove stale per-function .rst files from a previous run + for old in OUT_DIR.glob("*.rst"): + if old.name != "index.rst": + old.unlink() index = "R API Reference\n===============\n\n" index += "All functions accept a numeric vector ``x`` (and ``y`` for bivariate\n" index += "functions), a ``window_size`` integer, and an optional ``min_periods``\n" index += "parameter compatible with *pandas* semantics.\n\n" - index += ".. toctree::\n" - index += " :maxdepth: 1\n" - index += " :caption: Functions\n\n" - for name in sorted(names): - index += f" {name}\n" + + for i, rd in enumerate(rd_files): + index += rd_to_rst(rd) + if i < len(rd_files) - 1: + index += "\n----\n\n" + print(f" {rd.name} → r_api/index.rst") + (OUT_DIR / "index.rst").write_text(index, encoding="utf-8") - print(f" → r_api/index.rst ({len(names)} entries)") + print(f" -> r_api/index.rst ({len(rd_files)} functions)") if __name__ == "__main__": diff --git a/py_package/tests/conftest.py b/py_package/tests/conftest.py new file mode 100644 index 0000000..35676b2 --- /dev/null +++ b/py_package/tests/conftest.py @@ -0,0 +1,62 @@ +import numpy as np + + +def nan_allclose(result, expected, rtol=1e-12, atol=1e-12): + """Assert arrays are equal, treating NaN positions as matching.""" + result = np.asarray(result, dtype=np.float64) + expected = np.asarray(expected, dtype=np.float64) + nan_exp = np.isnan(expected) + assert np.array_equal(np.isnan(result), nan_exp), ( + f"NaN mask mismatch:\n result = {result}\n expected= {expected}" + ) + if np.any(~nan_exp): + np.testing.assert_allclose(result[~nan_exp], expected[~nan_exp], rtol=rtol, atol=atol) + + +def var_ref(x, k): + out = np.full(len(x), np.nan) + for i in range(len(x)): + w = x[max(0, i - k + 1):i + 1] + if len(w) >= 2: + out[i] = np.var(w, ddof=1) + return out + + +def mean_ref(x, k): + out = np.full(len(x), np.nan) + for i in range(len(x)): + w = x[max(0, i - k + 1):i + 1] + valid = w[~np.isnan(w)] + if len(valid) >= 1: + out[i] = valid.mean() + return out + + +def median_ref(x, k): + return np.array([np.median(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) + + +def cov_ref(x, y, k): + """Sample covariance (ddof=1) over valid pairs in each rolling window.""" + out = np.full(len(x), np.nan) + for i in range(len(x)): + xi = x[max(0, i - k + 1):i + 1] + yi = y[max(0, i - k + 1):i + 1] + mask = ~np.isnan(xi) & ~np.isnan(yi) + xi, yi = xi[mask], yi[mask] + if len(xi) >= 2: + out[i] = np.cov(xi, yi, ddof=1)[0, 1] + return out + + +def cor_ref(x, y, k): + """Pearson correlation over valid pairs in each rolling window.""" + out = np.full(len(x), np.nan) + for i in range(len(x)): + xi = x[max(0, i - k + 1):i + 1] + yi = y[max(0, i - k + 1):i + 1] + mask = ~np.isnan(xi) & ~np.isnan(yi) + xi, yi = xi[mask], yi[mask] + if len(xi) >= 2 and np.std(xi) > 0 and np.std(yi) > 0: + out[i] = np.corrcoef(xi, yi)[0, 1] + return out \ No newline at end of file diff --git a/py_package/tests/test_bindings.py b/py_package/tests/test_bindings.py deleted file mode 100644 index 79302df..0000000 --- a/py_package/tests/test_bindings.py +++ /dev/null @@ -1,1058 +0,0 @@ -import math - -import numpy as np -import pandas as pd -import pytest - -import robust_rolling_core as rrc -import robustrolling as rr - - -# ── helpers ──────────────────────────────────────────────────────────────────── - -def _nan_allclose(result, expected, rtol=1e-12, atol=1e-12): - """Assert arrays are equal, treating NaN positions as matching.""" - result = np.asarray(result, dtype=np.float64) - expected = np.asarray(expected, dtype=np.float64) - nan_exp = np.isnan(expected) - assert np.array_equal(np.isnan(result), nan_exp), ( - f"NaN mask mismatch:\n result = {result}\n expected= {expected}" - ) - if np.any(~nan_exp): - np.testing.assert_allclose(result[~nan_exp], expected[~nan_exp], rtol=rtol, atol=atol) - - -def _var_ref(x, k): - out = np.full(len(x), np.nan) - for i in range(len(x)): - w = x[max(0, i - k + 1):i + 1] - if len(w) >= 2: - out[i] = np.var(w, ddof=1) - return out - - -def _median_ref(x, k): - return np.array([np.median(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) - - -# ── C++ engine — SlidingWelford ──────────────────────────────────────────────── - -class TestSlidingWelford: - - def test_known_values(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - out = rrc.SlidingWelford(3).process_batch(x) - _nan_allclose(out, [np.nan, 0.5, 1.0, 1.0, 1.0]) - - def test_window_size_1_all_nan(self): - out = rrc.SlidingWelford(1).process_batch(np.array([10.0, 11.0, 12.0])) - assert np.all(np.isnan(out)) - - def test_window_larger_than_array(self): - out = rrc.SlidingWelford(10).process_batch(np.array([2.0, 4.0, 6.0])) - assert np.isnan(out[0]) - assert np.isclose(out[1], 2.0, rtol=1e-12) # var([2,4]) - assert np.isclose(out[2], 4.0, rtol=1e-12) # var([2,4,6]) - - def test_constant_zero_variance(self): - out = rrc.SlidingWelford(4).process_batch(np.full(8, 5.0)) - assert np.isnan(out[0]) - np.testing.assert_allclose(out[1:], 0.0, atol=1e-12) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_against_naive_reference(self, k): - x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) - out = rrc.SlidingWelford(k).process_batch(x) - np.testing.assert_allclose(out, _var_ref(x, k), rtol=1e-11, atol=1e-11, - equal_nan=True) - - def test_nan_does_not_contribute_to_welford(self): - # NaN at pos 2: window still holds [1,2], so var=0.5 - x = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) - out = rrc.SlidingWelford(3).process_batch(x) - assert np.isclose(out[2], 0.5, atol=1e-12) - - def test_empty_input(self): - out = rrc.SlidingWelford(3).process_batch(np.array([])) - assert len(out) == 0 and out.dtype == np.float64 - - def test_rejects_zero_window(self): - with pytest.raises(ValueError, match="Window length must be greater than 0"): - rrc.SlidingWelford(0) - - @pytest.mark.parametrize("k", [-1, -5]) - def test_rejects_negative_window(self, k): - with pytest.raises(TypeError): - rrc.SlidingWelford(k) - - def test_rejects_none_window(self): - with pytest.raises(TypeError): - rrc.SlidingWelford(None) - - def test_rejects_inf_window(self): - with pytest.raises((ValueError, OverflowError, TypeError)): - rrc.SlidingWelford(float("inf")) - - def test_rejects_2d_input(self): - with pytest.raises(RuntimeError, match="Input must be 1D array"): - rrc.SlidingWelford(2).process_batch(np.ones((2, 3))) - - def test_integer_input_converted_to_float64(self): - out = rrc.SlidingWelford(2).process_batch(np.array([1, 2, 3, 4], dtype=np.int32)) - assert out.dtype == np.float64 - - -# ── C++ engine — MonotonicMax ────────────────────────────────────────────────── - -class TestMonotonicMax: - - def test_known_values(self): - x = np.array([1.0, 2.0, 3.0, 2.0, 5.0]) - out = rrc.MonotonicMax(3).process_batch(x) - np.testing.assert_allclose(out, [1.0, 2.0, 3.0, 3.0, 5.0], atol=1e-12) - - def test_window_size_1_identity(self): - x = np.array([-1.0, 0.0, 10.0, 2.0]) - np.testing.assert_allclose(rrc.MonotonicMax(1).process_batch(x), x) - - def test_window_larger_than_array_cumulative(self): - x = np.array([2.0, -3.0, 7.0, 1.0]) - np.testing.assert_allclose(rrc.MonotonicMax(20).process_batch(x), - np.maximum.accumulate(x)) - - def test_decreasing_sequence(self): - x = np.array([9.0, 7.0, 5.0, 3.0, 1.0]) - np.testing.assert_allclose(rrc.MonotonicMax(3).process_batch(x), - [9.0, 9.0, 9.0, 7.0, 5.0]) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_against_naive_reference(self, k): - x = np.array([-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0]) - out = rrc.MonotonicMax(k).process_batch(x) - expected = np.array([np.max(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) - np.testing.assert_allclose(out, expected, atol=1e-12) - - def test_nan_advances_window_returns_current_max(self): - # window=2: [1,2,NaN,1]. At NaN: window=[2,NaN], max=2 (not NaN passthrough) - x = np.array([1.0, 2.0, np.nan, 1.0]) - out = rrc.MonotonicMax(2).process_batch(x) - assert out[2] == 2.0 - - def test_nan_at_start_empty_window_returns_nan(self): - out = rrc.MonotonicMax(2).process_batch(np.array([np.nan, 2.0, 3.0])) - assert np.isnan(out[0]) - - def test_empty_input(self): - out = rrc.MonotonicMax(3).process_batch(np.array([])) - assert len(out) == 0 and out.dtype == np.float64 - - def test_rejects_zero_window(self): - with pytest.raises(ValueError, match="Window length must be greater than 0"): - rrc.MonotonicMax(0) - - @pytest.mark.parametrize("k", [-1, -5]) - def test_rejects_negative_window(self, k): - with pytest.raises(TypeError): - rrc.MonotonicMax(k) - - def test_rejects_none_window(self): - with pytest.raises(TypeError): - rrc.MonotonicMax(None) - - def test_rejects_inf_window(self): - with pytest.raises((ValueError, OverflowError, TypeError)): - rrc.MonotonicMax(float("inf")) - - def test_rejects_2d_input(self): - with pytest.raises(RuntimeError, match="Input must be 1D array"): - rrc.MonotonicMax(2).process_batch(np.ones((2, 3))) - - def test_integer_input_converted_to_float64(self): - out = rrc.MonotonicMax(2).process_batch(np.array([1, 2, 3], dtype=np.int32)) - assert out.dtype == np.float64 - - -# ── C++ engine — MonotonicMin ────────────────────────────────────────────────── - -class TestMonotonicMin: - - def test_known_values(self): - x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) - out = rrc.MonotonicMin(3).process_batch(x) - np.testing.assert_allclose(out, [1.0, 1.0, 1.0, 2.0, 2.0], atol=1e-12) - - def test_window_size_1_identity(self): - x = np.array([-1.0, 0.0, 10.0, 2.0]) - np.testing.assert_allclose(rrc.MonotonicMin(1).process_batch(x), x) - - def test_window_larger_than_array_cumulative(self): - x = np.array([2.0, -3.0, 7.0, 1.0]) - np.testing.assert_allclose(rrc.MonotonicMin(20).process_batch(x), - np.minimum.accumulate(x)) - - def test_increasing_sequence(self): - x = np.array([1.0, 3.0, 5.0, 7.0, 9.0]) - np.testing.assert_allclose(rrc.MonotonicMin(3).process_batch(x), - [1.0, 1.0, 1.0, 3.0, 5.0]) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_against_naive_reference(self, k): - x = np.array([-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0]) - out = rrc.MonotonicMin(k).process_batch(x) - expected = np.array([np.min(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) - np.testing.assert_allclose(out, expected, atol=1e-12) - - def test_nan_advances_window_returns_current_min(self): - # window=2: [5,1,NaN,9]. At NaN: window=[1,NaN], min=1 - x = np.array([5.0, 1.0, np.nan, 9.0]) - out = rrc.MonotonicMin(2).process_batch(x) - assert out[2] == 1.0 - - def test_nan_at_start_empty_window_returns_nan(self): - out = rrc.MonotonicMin(2).process_batch(np.array([np.nan, 2.0, 3.0])) - assert np.isnan(out[0]) - - def test_empty_input(self): - out = rrc.MonotonicMin(3).process_batch(np.array([])) - assert len(out) == 0 - - def test_rejects_zero_window(self): - with pytest.raises(ValueError, match="Window length must be greater than 0"): - rrc.MonotonicMin(0) - - def test_rejects_2d_input(self): - with pytest.raises(RuntimeError, match="Input must be 1D array"): - rrc.MonotonicMin(2).process_batch(np.ones((2, 3))) - - -# ── C++ engine — MultisetMedian ─────────────────────────────────────────────── - -class TestMultisetMedian: - - def test_known_values_odd_window(self): - x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) - np.testing.assert_allclose(rrc.MultisetMedian(3).process_batch(x), - _median_ref(x, 3), rtol=1e-12) - - def test_known_values_even_window(self): - x = np.array([1.0, 3.0, 2.0, 4.0]) - np.testing.assert_allclose(rrc.MultisetMedian(4).process_batch(x), - _median_ref(x, 4), rtol=1e-12) - - def test_window_size_1_identity(self): - x = np.array([-1.0, 0.0, 10.0, 2.0]) - np.testing.assert_allclose(rrc.MultisetMedian(1).process_batch(x), x) - - def test_window_size_2_regression(self): - # Regression: window_size=2 caused segfault before fix - x = np.array([3.0, 1.0, 2.0, 5.0, 4.0]) - np.testing.assert_allclose(rrc.MultisetMedian(2).process_batch(x), - _median_ref(x, 2), rtol=1e-12) - - def test_even_window_descending_fill_regression(self): - # Regression: even window filled descending caused wrong mid_ position - x = np.array([4.0, 3.0, 2.0, 1.0]) - np.testing.assert_allclose(rrc.MultisetMedian(4).process_batch(x), - _median_ref(x, 4), rtol=1e-12) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_against_naive_reference(self, k): - x = np.array([-2.0, 6.0, 1.0, -8.0, 0.0, 8.0, -1.0]) - np.testing.assert_allclose(rrc.MultisetMedian(k).process_batch(x), - _median_ref(x, k), rtol=1e-12) - - def test_large_array_against_reference(self): - np.random.seed(42) - x = np.random.randn(1000) - k = 15 - np.testing.assert_allclose(rrc.MultisetMedian(k).process_batch(x), - _median_ref(x, k), rtol=1e-10) - - def test_element_entering_equals_leaving(self): - x = np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0]) - np.testing.assert_allclose(rrc.MultisetMedian(3).process_batch(x), - _median_ref(x, 3), rtol=1e-12) - - def test_window_larger_than_array(self): - x = np.array([3.0, 1.0, 4.0, 1.0]) - np.testing.assert_allclose(rrc.MultisetMedian(20).process_batch(x), - _median_ref(x, 20), rtol=1e-12) - - def test_empty_input(self): - out = rrc.MultisetMedian(3).process_batch(np.array([])) - assert len(out) == 0 and out.dtype == np.float64 - - def test_rejects_zero_window(self): - with pytest.raises(ValueError, match="Window length must be greater than 0"): - rrc.MultisetMedian(0) - - def test_rejects_none_window(self): - with pytest.raises(TypeError): - rrc.MultisetMedian(None) - - def test_rejects_2d_input(self): - with pytest.raises(RuntimeError, match="Input must be 1D array"): - rrc.MultisetMedian(2).process_batch(np.ones((2, 3))) - - def test_integer_input_converted_to_float64(self): - out = rrc.MultisetMedian(2).process_batch(np.array([1, 2, 3], dtype=np.int32)) - assert out.dtype == np.float64 - -# ── C++ engine — SlidingMean ───────────────────────────────────────────────────── - -def _mean_ref(x, k): - out = np.full(len(x), np.nan) - for i in range(len(x)): - w = x[max(0, i - k + 1):i + 1] - valid = w[~np.isnan(w)] - if len(valid) >= 1: - out[i] = valid.mean() - return out - - -class TestSlidingMean: - - def test_known_values(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - out = rrc.SlidingMean(3).process_batch(x) - _nan_allclose(out, [1.0, 1.5, 2.0, 3.0, 4.0]) - - def test_window_size_1_identity(self): - x = np.array([3.0, 7.0, -1.0]) - out = rrc.SlidingMean(1).process_batch(x) - _nan_allclose(out, x) - - def test_window_larger_than_array(self): - out = rrc.SlidingMean(10).process_batch(np.array([2.0, 4.0, 6.0])) - _nan_allclose(out, [2.0, 3.0, 4.0]) - - def test_constant_sequence(self): - out = rrc.SlidingMean(3).process_batch(np.full(6, 5.0)) - np.testing.assert_allclose(out, 5.0, atol=1e-12) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_against_naive_reference(self, k): - x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) - out = rrc.SlidingMean(k).process_batch(x) - np.testing.assert_allclose(out, _mean_ref(x, k), rtol=1e-12, atol=1e-12, - equal_nan=True) - - def test_nan_does_not_contribute(self): - x = np.array([1.0, np.nan, 3.0, 4.0, 5.0]) - out = rrc.SlidingMean(3).process_batch(x) - # window [1, nan, 3] → mean of [1, 3] = 2.0 - assert np.isclose(out[2], 2.0, atol=1e-12) - - def test_nan_advances_window(self): - x = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) - out = rrc.SlidingMean(3).process_batch(x) - # window [2, nan, 4] → mean of [2, 4] = 3.0 - assert np.isclose(out[3], 3.0, atol=1e-12) - - def test_all_nan_returns_nan(self): - x = np.array([np.nan, np.nan, np.nan]) - out = rrc.SlidingMean(2).process_batch(x) - assert np.all(np.isnan(out)) - - def test_empty_input(self): - out = rrc.SlidingMean(3).process_batch(np.array([])) - assert len(out) == 0 and out.dtype == np.float64 - - def test_rejects_zero_window(self): - with pytest.raises(ValueError, match="Window length must be greater than 0"): - rrc.SlidingMean(0) - - def test_rejects_2d_input(self): - with pytest.raises(RuntimeError, match="Input must be 1D array"): - rrc.SlidingMean(2).process_batch(np.ones((2, 3))) - - def test_integer_input_converted_to_float64(self): - out = rrc.SlidingMean(2).process_batch(np.array([1, 2, 3, 4], dtype=np.int32)) - assert out.dtype == np.float64 - - def test_min_periods_masks_warmup(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - out = rrc.SlidingMean(3).process_batch(x, min_periods=3) - assert np.isnan(out[0]) and np.isnan(out[1]) - assert np.isclose(out[2], 2.0, atol=1e-12) - - def test_min_periods_with_nan_input(self): - x = np.array([1.0, np.nan, 3.0, 4.0, 5.0]) - out = rrc.SlidingMean(3).process_batch(x, min_periods=2) - # pos 1: window [1, nan] → 1 non-NaN < 2 → NaN - assert np.isnan(out[1]) - # pos 2: window [1, nan, 3] → 2 non-NaN >= 2 → 2.0 - assert np.isclose(out[2], 2.0, atol=1e-12) - - -# ── C++ engine — SlidingMoments ────────────────────────────────────────────────── - -class TestSlidingMoments: - - def test_initial_state_is_nan(self): - sm = rrc.SlidingMoments(4) - assert sm.current_size() == 0 - assert math.isnan(sm.get_skewness()) - assert math.isnan(sm.get_kurtosis()) - - def test_symmetric_window_skewness_is_zero(self): - # [1, 2, 3] is symmetric -> skewness = 0 - sm = rrc.SlidingMoments(3) - for v in [1.0, 2.0, 3.0]: - sm.update(v) - assert pytest.approx(sm.get_skewness(), abs=1e-12) == 0.0 - assert math.isnan(sm.get_kurtosis()) # n=3 < 4 - - def test_known_kurtosis_uniform_window(self): - # [1, 2, 3, 4]: excess kurtosis = -1.2 (uniform distribution) - sm = rrc.SlidingMoments(4) - for v in [1.0, 2.0, 3.0, 4.0]: - sm.update(v) - assert pytest.approx(sm.get_kurtosis(), abs=1e-10) == -1.2 - - def test_nan_advances_window_and_reduces_size(self): - sm = rrc.SlidingMoments(4) - for v in [1.0, 2.0, 3.0, 4.0]: - sm.update(v) - assert sm.current_size() == 4 - assert not math.isnan(sm.get_kurtosis()) - - sm.update(float('nan')) - assert sm.current_size() == 3 - assert not math.isnan(sm.get_skewness()) - assert math.isnan(sm.get_kurtosis()) # n=3 < 4 - - def test_nan_does_not_corrupt_state(self): - sm = rrc.SlidingMoments(3) - for v in [10.0, 20.0, 30.0]: - sm.update(v) - skew_before = sm.get_skewness() - - for _ in range(3): - sm.update(float('nan')) - assert sm.current_size() == 0 - - for v in [10.0, 20.0, 30.0]: - sm.update(v) - assert pytest.approx(sm.get_skewness(), abs=1e-10) == skew_before - - def test_nan_behavior_matches_pandas(self): - data = [1.0, 2.0, 5.0, 7.0, np.nan, np.nan, 8.0, 1.0, 3.0] - window_size = 4 - - s = pd.Series(data) - expected_skew = s.rolling(window_size, min_periods=3).skew() - expected_kurt = s.rolling(window_size, min_periods=4).kurt() - expected_count = s.rolling(window_size, min_periods=0).count() - - sm = rrc.SlidingMoments(window_size) - - for i, val in enumerate(data): - sm.update(val) - assert sm.current_size() == expected_count[i], f"count mismatch at {i}" - - cpp_skew, pd_skew = sm.get_skewness(), expected_skew[i] - if pd.isna(pd_skew): - assert math.isnan(cpp_skew), f"skew should be NaN at {i}" - else: - assert pytest.approx(cpp_skew, abs=1e-5) == pd_skew, f"skew mismatch at {i}" - - cpp_kurt, pd_kurt = sm.get_kurtosis(), expected_kurt[i] - if pd.isna(pd_kurt): - assert math.isnan(cpp_kurt), f"kurtosis should be NaN at {i}" - else: - assert pytest.approx(cpp_kurt, abs=1e-5) == pd_kurt, f"kurtosis mismatch at {i}" - - def test_randomized_fuzzing_against_pandas(self): - np.random.seed(42) - window_size = int(np.random.randint(4, 20)) - data = np.random.randn(1000) * 50.0 - data[np.random.rand(1000) < 0.15] = np.nan - - s = pd.Series(data) - expected_skew = s.rolling(window=window_size, min_periods=3).skew() - expected_kurt = s.rolling(window=window_size, min_periods=4).kurt() - expected_count = s.rolling(window=window_size, min_periods=0).count() - - sm = rrc.SlidingMoments(window_size) - - for i, val in enumerate(data): - sm.update(val) - assert sm.current_size() == expected_count[i], f"count mismatch at {i}" - - cpp_skew, pd_skew = sm.get_skewness(), expected_skew[i] - if pd.isna(pd_skew): - assert math.isnan(cpp_skew), f"skewness should be NaN at {i}" - else: - assert pytest.approx(cpp_skew, rel=1e-4, abs=1e-6) == pd_skew, \ - f"skewness mismatch at {i}: expected {pd_skew}, got {cpp_skew}" - - cpp_kurt, pd_kurt = sm.get_kurtosis(), expected_kurt[i] - if pd.isna(pd_kurt): - assert math.isnan(cpp_kurt), f"kurtosis should be NaN at {i}" - else: - assert pytest.approx(cpp_kurt, rel=1e-4, abs=1e-6) == pd_kurt, \ - f"kurtosis mismatch at {i}: expected {pd_kurt}, got {cpp_kurt}" - -# ── High-level API — output type preservation ───────────────────────────────── - -_ALL_FNS = [rr.rolling_max, rr.rolling_min, rr.rolling_median, rr.rolling_variance] - - -class TestOutputType: - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_series_input_returns_series(self, fn): - s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], name="x") - out = fn(s, 3) - assert isinstance(out, pd.Series) - assert out.name == "x" - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_series_preserves_datetime_index(self, fn): - idx = pd.date_range("2024-01-01", periods=5) - s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], index=idx) - out = fn(s, 3) - assert isinstance(out, pd.Series) - assert out.index.equals(idx) - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_series_preserves_range_index(self, fn): - idx = pd.RangeIndex(start=10, stop=15) - s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], index=idx) - assert fn(s, 3).index.equals(idx) - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_ndarray_input_returns_ndarray(self, fn): - arr = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) - assert isinstance(fn(arr, 3), np.ndarray) - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_output_length_equals_input_length(self, fn): - arr = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) - assert len(fn(arr, 3)) == len(arr) - - -# ── High-level API — pandas value comparison ────────────────────────────────── -# -# Each case is (data, k, min_periods). None -> default (= window_size). -# Tested against the equivalent pandas rolling call. - -_PANDAS_CASES = [ - # label, data, k mp - ("clean_default", [1.0, 3.0, 2.0, 5.0, 4.0], 3, None), - ("clean_mp1", [1.0, 3.0, 2.0, 5.0, 4.0], 3, 1), - ("clean_mp2", [1.0, 3.0, 2.0, 5.0, 4.0], 3, 2), - ("longer_default", [-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0], 4, None), - ("longer_mp2", [-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0], 4, 2), - ("nan_mp1", [1.0, np.nan, 3.0, 4.0, 5.0], 3, 1), - ("nan_mp2", [1.0, np.nan, 3.0, 4.0, 5.0], 3, 2), - ("nan_default", [1.0, np.nan, 3.0, 4.0, 5.0], 3, None), - ("leading_nans", [np.nan, np.nan, 3.0, 4.0], 2, 1), - ("k_gt_n", [1.0, 2.0, 3.0], 10, 1), - ("constant", [5.0] * 6, 3, None), - ("negatives_mp1", [-5.0, -1.0, -3.0, -2.0, -4.0], 3, 1), - ("mixed_nan_mp1", [np.nan, 1.0, np.nan, 3.0, np.nan], 2, 1), - ("window2_nan_gap", [5.0, 1.0, np.nan, 0.0], 2, 1), -] - -_CASE_IDS = [c[0] for c in _PANDAS_CASES] -_CASE_PARAMS = [(c[1], c[2], c[3]) for c in _PANDAS_CASES] # (data, k, mp) - - -def _pd_mp(mp, k): - """Resolve None -> k for pandas min_periods argument.""" - return k if mp is None else mp - - -class TestPandasComparison: - """rolling_max/min/median/variance must produce values identical to pandas.""" - - @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) - def test_rolling_max_matches_pandas(self, data, k, mp): - arr = np.array(data, dtype=np.float64) - expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).max().to_numpy() - kw = {} if mp is None else {"min_periods": mp} - _nan_allclose(rr.rolling_max(arr, k, **kw), expected) - - @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) - def test_rolling_min_matches_pandas(self, data, k, mp): - arr = np.array(data, dtype=np.float64) - expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).min().to_numpy() - kw = {} if mp is None else {"min_periods": mp} - _nan_allclose(rr.rolling_min(arr, k, **kw), expected) - - @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) - def test_rolling_median_matches_pandas(self, data, k, mp): - arr = np.array(data, dtype=np.float64) - expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).median().to_numpy() - kw = {} if mp is None else {"min_periods": mp} - _nan_allclose(rr.rolling_median(arr, k, **kw), expected) - - @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) - def test_rolling_variance_matches_pandas(self, data, k, mp): - arr = np.array(data, dtype=np.float64) - expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).var().to_numpy() - kw = {} if mp is None else {"min_periods": mp} - _nan_allclose(rr.rolling_variance(arr, k, **kw), expected) - - @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) - def test_rolling_mean_matches_pandas(self, data, k, mp): - arr = np.array(data, dtype=np.float64) - expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).mean().to_numpy() - kw = {} if mp is None else {"min_periods": mp} - _nan_allclose(rr.rolling_mean(arr, k, **kw), expected) - - -# ── High-level API — min_periods edge cases ─────────────────────────────────── - -class TestMinPeriods: - - def test_default_equals_window_size_max_min_median(self): - """First k-1 positions are NaN with default min_periods.""" - x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) - for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): - out = fn(x, 3) - assert np.isnan(out[0]) and np.isnan(out[1]) and not np.isnan(out[2]) - - def test_default_equals_window_size_variance(self): - """Variance: first k-1 positions NaN; pos k-1 has valid value.""" - out = rr.rolling_variance(np.array([1.0, 2.0, 3.0, 4.0]), 3) - assert np.isnan(out[0]) and np.isnan(out[1]) - assert pytest.approx(out[2], abs=1e-10) == 1.0 - - def test_min_periods_0_no_masking_on_clean_input(self): - """min_periods=0 never adds extra NaN for clean (non-NaN) input.""" - x = np.array([1.0, 2.0, 3.0, 4.0]) - for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): - assert not np.any(np.isnan(fn(x, 3, min_periods=0))) - - def test_nan_series_default_all_masked(self): - """[1,nan,3,4] window=3: non_na_count=[1,1,2,2], all < 3 -> all NaN.""" - x = np.array([1.0, np.nan, 3.0, 4.0]) - for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): - assert np.all(np.isnan(fn(x, 3))) - - def test_window_expiry_max_nan_gap(self): - """Value before NaN gap must expire: [5,1,NaN,0] window=2.""" - x = np.array([5.0, 1.0, np.nan, 0.0]) - out = rr.rolling_max(x, 2, min_periods=1) - assert pytest.approx(out[2], abs=1e-12) == 1.0 # window=[1,nan], max=1 - assert pytest.approx(out[3], abs=1e-12) == 0.0 # window=[nan,0], max=0 - - def test_window_expiry_min_nan_gap(self): - """Value before NaN gap must expire: [1,5,NaN,9] window=2.""" - x = np.array([1.0, 5.0, np.nan, 9.0]) - out = rr.rolling_min(x, 2, min_periods=1) - assert pytest.approx(out[2], abs=1e-12) == 5.0 # window=[5,nan], min=5 - assert pytest.approx(out[3], abs=1e-12) == 9.0 # window=[nan,9], min=9 - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_validation_negative_min_periods(self, fn): - with pytest.raises(Exception): - fn(np.array([1.0, 2.0, 3.0]), 3, min_periods=-1) - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_validation_min_periods_exceeds_window(self, fn): - with pytest.raises(Exception): - fn(np.array([1.0, 2.0, 3.0]), 3, min_periods=4) - - @pytest.mark.parametrize("fn", _ALL_FNS) - def test_empty_input_returns_empty(self, fn): - out = fn(np.array([]), 3) - assert len(out) == 0 - - def test_variance_min_periods_1_single_value_is_nan(self): - """With min_periods=1, pos 0 is still NaN: var needs ≥2 values (ddof=1). - This NaN comes from the algorithm, not min_periods masking.""" - out = rr.rolling_variance(np.array([1.0, 5.0]), 2, min_periods=1) - assert np.isnan(out[0]) - assert pytest.approx(out[1], abs=1e-10) == 8.0 # var([1,5]) = 8 - - -# ── Memory layout and dtype robustness ──────────────────────────────────────── - -_ENGINE_CLASSES = [rrc.MonotonicMax, rrc.MonotonicMin, rrc.MultisetMedian, rrc.SlidingWelford] -_ENGINE_IDS = ["MonotonicMax", "MonotonicMin", "MultisetMedian", "SlidingWelford"] - - -class TestMemoryLayoutAndDtypes: - """Verify pybind11 c_style|forcecast handles non-standard memory layouts and dtypes.""" - - @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) - def test_strided_every_other_element(self, cls): - """Non-contiguous array (stride=2) must give same result as contiguous copy.""" - x = np.arange(10, dtype=np.float64) - strided = x[::2] # [0,2,4,6,8] — NOT C_CONTIGUOUS - assert not strided.flags["C_CONTIGUOUS"] - out_strided = cls(2).process_batch(strided) - out_contig = cls(2).process_batch(np.ascontiguousarray(strided)) - np.testing.assert_array_equal(out_strided, out_contig) - - @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) - def test_strided_column_slice(self, cls): - """Column of a 2D array is non-contiguous — must still work correctly.""" - m = np.arange(16.0).reshape(8, 2) - col = m[:, 0] # [0,2,4,6,8,10,12,14] - assert not col.flags["C_CONTIGUOUS"] - out_col = cls(3).process_batch(col) - out_contig = cls(3).process_batch(np.ascontiguousarray(col)) - np.testing.assert_array_equal(out_col, out_contig) - - @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) - def test_strided_reversed(self, cls): - """Reverse-strided array (negative step) must work correctly.""" - x = np.arange(8, dtype=np.float64) - rev = x[::-1] # [7,6,5,4,3,2,1,0] - assert not rev.flags["C_CONTIGUOUS"] - out_rev = cls(2).process_batch(rev) - out_contig = cls(2).process_batch(np.ascontiguousarray(rev)) - np.testing.assert_array_equal(out_rev, out_contig) - - @pytest.mark.parametrize("dtype", [np.float32, np.int32, np.int64], - ids=["float32", "int32", "int64"]) - def test_numeric_dtype_cast_in_engine(self, dtype): - """C++ engine accepts numeric dtypes via forcecast -> output is always float64.""" - x = np.array([1, 3, 2, 5, 4], dtype=dtype) - out = rrc.MonotonicMax(3).process_batch(x) - assert out.dtype == np.float64 - np.testing.assert_allclose(out, rrc.MonotonicMax(3).process_batch(x.astype(np.float64))) - - @pytest.mark.parametrize("fn", _ALL_FNS, ids=["max", "min", "median", "variance"]) - @pytest.mark.parametrize("dtype", [np.float32, np.int64, np.bool_], - ids=["float32", "int64", "bool"]) - def test_numeric_dtype_in_high_level_api(self, fn, dtype): - """High-level API must handle any numeric dtype, returning float64.""" - x = np.array([1, 3, 2, 5, 4], dtype=dtype) - out = fn(x, 3) - assert out.dtype == np.float64 - _nan_allclose(out, fn(x.astype(np.float64), 3)) - - @pytest.mark.parametrize("fn", _ALL_FNS, ids=["max", "min", "median", "variance"]) - def test_strided_high_level_api(self, fn): - """High-level API must give same result for strided and contiguous input.""" - base = np.array([1.0, 3.0, 2.0, 7.0, 4.0, 6.0, 5.0, 8.0]) - strided = base[::2] # [1,2,4,5] — non-contiguous - assert not strided.flags["C_CONTIGUOUS"] - out_s = fn(strided, 2) - out_c = fn(np.ascontiguousarray(strided), 2) - _nan_allclose(np.asarray(out_s), np.asarray(out_c)) - - -# ── Fuzz / stress tests ──────────────────────────────────────────────────────── - -class TestFuzz: - """Large random-NaN arrays compared against pandas — catches edge cases - in MultisetMedian mid_ tracking and SlidingWelfordRing under churn.""" - - @pytest.mark.parametrize("fn,pd_fn", [ - (rr.rolling_max, lambda s, k, mp: s.rolling(k, min_periods=mp).max().to_numpy()), - (rr.rolling_min, lambda s, k, mp: s.rolling(k, min_periods=mp).min().to_numpy()), - (rr.rolling_median, lambda s, k, mp: s.rolling(k, min_periods=mp).median().to_numpy()), - (rr.rolling_variance, lambda s, k, mp: s.rolling(k, min_periods=mp).var().to_numpy()), - ], ids=["max", "min", "median", "variance"]) - def test_random_nan_20pct_matches_pandas(self, fn, pd_fn): - np.random.seed(42) - x = np.random.randn(5000) - x[np.random.rand(5000) < 0.2] = np.nan - k, mp = 15, 5 - _nan_allclose(fn(x, k, min_periods=mp), - pd_fn(pd.Series(x), k, mp), - rtol=1e-10, atol=1e-10) - - @pytest.mark.parametrize("fn,pd_fn", [ - (rr.rolling_max, lambda s, k, mp: s.rolling(k, min_periods=mp).max().to_numpy()), - (rr.rolling_min, lambda s, k, mp: s.rolling(k, min_periods=mp).min().to_numpy()), - (rr.rolling_median, lambda s, k, mp: s.rolling(k, min_periods=mp).median().to_numpy()), - (rr.rolling_variance, lambda s, k, mp: s.rolling(k, min_periods=mp).var().to_numpy()), - ], ids=["max", "min", "median", "variance"]) - def test_consecutive_nan_blocks_matches_pandas(self, fn, pd_fn): - """Long runs of NaN stress-test window expiry logic.""" - np.random.seed(7) - x = np.random.randn(2000) - # Insert 3 blocks of 20 consecutive NaN - for start in [100, 500, 1400]: - x[start:start + 20] = np.nan - k, mp = 10, 3 - _nan_allclose(fn(x, k, min_periods=mp), - pd_fn(pd.Series(x), k, mp), - rtol=1e-10, atol=1e-10) - - def test_stress_many_instances_no_crash(self): - """Create and discard 2000 instances — verifies no resource leak crashes.""" - x = np.random.randn(100).astype(np.float64) - for _ in range(2000): - rrc.MultisetMedian(10).process_batch(x) - - -# ── C++ engine — SlidingCovariance ──────────────────────────────────────────── - -def _cov_ref(x, y, k): - """Sample covariance (ddof=1) over valid pairs in each rolling window.""" - out = np.full(len(x), np.nan) - for i in range(len(x)): - xi = x[max(0, i - k + 1):i + 1] - yi = y[max(0, i - k + 1):i + 1] - mask = ~np.isnan(xi) & ~np.isnan(yi) - xi, yi = xi[mask], yi[mask] - if len(xi) >= 2: - out[i] = np.cov(xi, yi, ddof=1)[0, 1] - return out - - -def _cor_ref(x, y, k): - """Pearson correlation over valid pairs in each rolling window.""" - out = np.full(len(x), np.nan) - for i in range(len(x)): - xi = x[max(0, i - k + 1):i + 1] - yi = y[max(0, i - k + 1):i + 1] - mask = ~np.isnan(xi) & ~np.isnan(yi) - xi, yi = xi[mask], yi[mask] - if len(xi) >= 2 and np.std(xi) > 0 and np.std(yi) > 0: - out[i] = np.corrcoef(xi, yi)[0, 1] - return out - - -class TestSlidingCovariance: - - def test_initial_state_is_nan(self): - sc = rrc.SlidingCovariance(3) - assert math.isnan(sc.get_covariance()) - assert math.isnan(sc.get_correlation()) - assert math.isnan(sc.get_mean_x()) - assert math.isnan(sc.get_mean_y()) - - def test_perfect_positive_correlation(self): - sc = rrc.SlidingCovariance(3) - for xi, yi in [(1.0, 2.0), (2.0, 4.0), (3.0, 6.0)]: - sc.update(xi, yi) - assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 - assert pytest.approx(sc.get_covariance(), abs=1e-12) == 2.0 # cov([1,2,3],[2,4,6]) - - def test_perfect_negative_correlation(self): - sc = rrc.SlidingCovariance(3) - for xi, yi in [(1.0, 3.0), (2.0, 2.0), (3.0, 1.0)]: - sc.update(xi, yi) - assert pytest.approx(sc.get_correlation(), abs=1e-12) == -1.0 - - def test_uncorrelated_constant_x(self): - sc = rrc.SlidingCovariance(4) - for xi, yi in [(5.0, 1.0), (5.0, 2.0), (5.0, 3.0), (5.0, 4.0)]: - sc.update(xi, yi) - # M2_x == 0 -> correlation is NaN - assert math.isnan(sc.get_correlation()) - # covariance should be ~0 - assert pytest.approx(sc.get_covariance(), abs=1e-12) == 0.0 - - def test_window_expiry_removes_old_pairs(self): - sc = rrc.SlidingCovariance(2) - sc.update(1.0, 1.0) - sc.update(2.0, 2.0) - sc.update(10.0, 10.0) # window: [(2,2),(10,10)] - assert pytest.approx(sc.get_mean_x(), abs=1e-12) == 6.0 - assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 - - def test_nan_pair_skipped_not_added(self): - # Window=3: add (1,2),(2,4),(3,6) then NaN pair. - # NaN removes oldest (1,2) but adds nothing -> valid: [(2,4),(3,6)] - sc = rrc.SlidingCovariance(3) - for xi, yi in [(1.0, 2.0), (2.0, 4.0), (3.0, 6.0)]: - sc.update(xi, yi) - sc.update(math.nan, 5.0) - assert pytest.approx(sc.get_covariance(), abs=1e-12) == 1.0 # cov([2,3],[4,6]) - assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 - - def test_single_pair_covariance_is_nan(self): - sc = rrc.SlidingCovariance(3) - sc.update(1.0, 2.0) - assert math.isnan(sc.get_covariance()) # needs >=2 pairs - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_covariance_against_numpy_reference(self, k): - x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) - y = np.array([1.0, 3.0, -1.0, 4.0, 2.0, 6.0, 5.0, 0.0]) - sc = rrc.SlidingCovariance(k) - out = sc.process_covariance_batch(x, y) - _nan_allclose(out, _cov_ref(x, y, k), rtol=1e-11, atol=1e-11) - - @pytest.mark.parametrize("k", [2, 3, 5]) - def test_correlation_against_numpy_reference(self, k): - x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) - y = np.array([1.0, 3.0, -1.0, 4.0, 2.0, 6.0, 5.0, 0.0]) - sc = rrc.SlidingCovariance(k) - out = sc.process_correlation_batch(x, y) - _nan_allclose(out, _cor_ref(x, y, k), rtol=1e-11, atol=1e-11) - - def test_random_nan_fuzzing_against_reference(self): - np.random.seed(123) - x = np.random.randn(500) - y = np.random.randn(500) - x[np.random.rand(500) < 0.15] = np.nan - y[np.random.rand(500) < 0.15] = np.nan - k = 10 - sc = rrc.SlidingCovariance(k) - cov_out = sc.process_covariance_batch(x, y) - sc2 = rrc.SlidingCovariance(k) - cor_out = sc2.process_correlation_batch(x, y) - _nan_allclose(cov_out, _cov_ref(x, y, k), rtol=1e-9, atol=1e-9) - _nan_allclose(cor_out, _cor_ref(x, y, k), rtol=1e-9, atol=1e-9) - - def test_process_batch_rejects_length_mismatch(self): - sc = rrc.SlidingCovariance(3) - with pytest.raises(RuntimeError, match="same length"): - sc.process_covariance_batch(np.array([1.0, 2.0]), np.array([1.0])) - - def test_process_batch_rejects_2d_input(self): - sc = rrc.SlidingCovariance(3) - with pytest.raises(RuntimeError): - sc.process_covariance_batch(np.ones((2, 3)), np.ones(3)) - - def test_empty_input(self): - sc = rrc.SlidingCovariance(3) - out = sc.process_covariance_batch(np.array([]), np.array([])) - assert len(out) == 0 and out.dtype == np.float64 - - -# ── High-level API — rolling_cov / rolling_cor ──────────────────────────────── - -class TestRollingCovCor: - - def test_rolling_cov_known_values(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - y = np.array([2.0, 4.0, 6.0, 8.0, 10.0]) - # cov(x*2, x) = 2 * var(x) = 2 * 1.0 = 2.0 - out = rr.rolling_cov(x, y, 3) - expected = np.array([np.nan, np.nan, 2.0, 2.0, 2.0]) - _nan_allclose(out, expected) - - def test_rolling_cov_default_min_periods_masks_warmup(self): - x = np.array([1.0, 2.0, 3.0, 4.0]) - y = np.array([4.0, 3.0, 2.0, 1.0]) - out = rr.rolling_cov(x, y, 3) - assert np.isnan(out[0]) and np.isnan(out[1]) - assert not np.isnan(out[2]) - - def test_rolling_cor_known_perfect_positive(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - y = x * 3.0 - out = rr.rolling_cor(x, y, 3) - np.testing.assert_allclose(out[2:], 1.0, atol=1e-12) - - def test_rolling_cor_known_perfect_negative(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - y = -x + 6.0 - out = rr.rolling_cor(x, y, 3) - np.testing.assert_allclose(out[2:], -1.0, atol=1e-12) - - def test_rolling_cov_matches_pandas(self): - np.random.seed(7) - x = np.random.randn(200) - y = np.random.randn(200) - k = 8 - expected = pd.Series(x).rolling(k, min_periods=k).cov(pd.Series(y)).to_numpy() - _nan_allclose(rr.rolling_cov(x, y, k), expected, rtol=1e-10, atol=1e-10) - - def test_rolling_cor_matches_pandas(self): - np.random.seed(7) - x = np.random.randn(200) - y = np.random.randn(200) - k = 8 - expected = pd.Series(x).rolling(k, min_periods=k).corr(pd.Series(y)).to_numpy() - _nan_allclose(rr.rolling_cor(x, y, k), expected, rtol=1e-10, atol=1e-10) - - def test_rolling_cov_with_nan_matches_pandas(self): - np.random.seed(42) - x = np.random.randn(300) - y = np.random.randn(300) - x[np.random.rand(300) < 0.15] = np.nan - y[np.random.rand(300) < 0.15] = np.nan - k, mp = 7, 3 - expected = pd.Series(x).rolling(k, min_periods=mp).cov(pd.Series(y)).to_numpy() - _nan_allclose(rr.rolling_cov(x, y, k, min_periods=mp), expected, rtol=1e-9, atol=1e-9) - - def test_rolling_cor_with_nan_matches_pandas(self): - np.random.seed(42) - x = np.random.randn(300) - y = np.random.randn(300) - x[np.random.rand(300) < 0.15] = np.nan - y[np.random.rand(300) < 0.15] = np.nan - k, mp = 7, 3 - expected = pd.Series(x).rolling(k, min_periods=mp).corr(pd.Series(y)).to_numpy() - _nan_allclose(rr.rolling_cor(x, y, k, min_periods=mp), expected, rtol=1e-9, atol=1e-9) - - def test_series_input_returns_series(self): - s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0], name="a") - t = pd.Series([5.0, 4.0, 3.0, 2.0, 1.0], name="b") - out = rr.rolling_cov(s, t, 3) - assert isinstance(out, pd.Series) - assert out.name == "a" - - def test_empty_input(self): - out = rr.rolling_cov(np.array([]), np.array([]), 3) - assert len(out) == 0 - - -# ── method="fast" (SlidingMomentsPrefix) ───────────────────────────────────── - -class TestFastMethod: - - @pytest.mark.parametrize("fn,pd_fn", [ - (lambda x, k: rr.rolling_variance(x, k, method="fast"), - lambda s, k: s.rolling(k).var().to_numpy()), - (lambda x, k: rr.rolling_skewness(x, k, method="fast"), - lambda s, k: s.rolling(k).skew().to_numpy()), - (lambda x, k: rr.rolling_kurtosis(x, k, method="fast"), - lambda s, k: s.rolling(k).kurt().to_numpy()), - ], ids=["variance", "skewness", "kurtosis"]) - def test_fast_matches_pandas_no_nan(self, fn, pd_fn): - rng = np.random.default_rng(0) - x = rng.standard_normal(300) - k = 10 - _nan_allclose(fn(x, k), pd_fn(pd.Series(x), k), rtol=1e-9, atol=1e-9) - - @pytest.mark.parametrize("fn_stable,fn_fast", [ - (lambda x, k, mp: rr.rolling_variance(x, k, min_periods=mp), - lambda x, k, mp: rr.rolling_variance(x, k, min_periods=mp, method="fast")), - (lambda x, k, mp: rr.rolling_skewness(x, k, min_periods=mp), - lambda x, k, mp: rr.rolling_skewness(x, k, min_periods=mp, method="fast")), - (lambda x, k, mp: rr.rolling_kurtosis(x, k, min_periods=mp), - lambda x, k, mp: rr.rolling_kurtosis(x, k, min_periods=mp, method="fast")), - ], ids=["variance", "skewness", "kurtosis"]) - def test_fast_agrees_with_stable_with_nan(self, fn_stable, fn_fast): - rng = np.random.default_rng(1) - x = rng.standard_normal(300) - x[rng.random(300) < 0.1] = np.nan - k, mp = 10, 5 - _nan_allclose(fn_fast(x, k, mp), fn_stable(x, k, mp), rtol=1e-9, atol=1e-9) - - def test_fast_variance_warmup_nan(self): - x = np.arange(1.0, 11.0) - out = rr.rolling_variance(x, 5, method="fast") - assert all(np.isnan(out[:4])) - assert not np.isnan(out[4]) - - def test_fast_skewness_needs_3_obs(self): - x = np.arange(1.0, 11.0) - out = rr.rolling_skewness(x, 5, method="fast") - assert all(np.isnan(out[:4])) - - def test_fast_kurtosis_needs_4_obs(self): - x = np.arange(1.0, 11.0) - out = rr.rolling_kurtosis(x, 5, method="fast") - assert all(np.isnan(out[:4])) - - def test_fast_min_periods(self): - x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - out = rr.rolling_variance(x, 3, min_periods=2, method="fast") - assert np.isnan(out[0]) - assert not np.isnan(out[1]) - - def test_fast_all_nan_window_returns_nan(self): - x = np.array([np.nan, np.nan, np.nan, 1.0, 2.0, 3.0]) - out = rr.rolling_variance(x, 3, min_periods=1, method="fast") - assert np.isnan(out[2]) - assert not np.isnan(out[5]) - - def test_fast_empty_input(self): - for fn in [rr.rolling_variance, rr.rolling_skewness, rr.rolling_kurtosis]: - assert len(fn(np.array([]), 3, method="fast")) == 0 \ No newline at end of file diff --git a/py_package/tests/test_covcor.py b/py_package/tests/test_covcor.py new file mode 100644 index 0000000..36bfa30 --- /dev/null +++ b/py_package/tests/test_covcor.py @@ -0,0 +1,115 @@ +"""Tests for rolling_cov and rolling_cor high-level API.""" +import numpy as np +import pandas as pd +import pytest + +import robustrolling as rr + +from conftest import nan_allclose + + +# ── Output type ──────────────────────────────────────────────────────────────── + +class TestCovCorOutputType: + + def test_series_input_returns_series_with_name_of_x(self): + s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0], name="a") + t = pd.Series([5.0, 4.0, 3.0, 2.0, 1.0], name="b") + out = rr.rolling_cov(s, t, 3) + assert isinstance(out, pd.Series) + assert out.name == "a" + + def test_ndarray_input_returns_ndarray(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + y = np.array([5.0, 4.0, 3.0, 2.0, 1.0]) + assert isinstance(rr.rolling_cov(x, y, 3), np.ndarray) + assert isinstance(rr.rolling_cor(x, y, 3), np.ndarray) + + def test_output_length_equals_input_length(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + y = np.array([5.0, 4.0, 3.0, 2.0, 1.0]) + assert len(rr.rolling_cov(x, y, 3)) == 5 + assert len(rr.rolling_cor(x, y, 3)) == 5 + + def test_empty_input_returns_empty(self): + out = rr.rolling_cov(np.array([]), np.array([]), 3) + assert len(out) == 0 + + +# ── Known values ─────────────────────────────────────────────────────────────── + +class TestCovCorKnownValues: + + def test_rolling_cov_perfect_positive(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + y = x * 2.0 # cov(2x, x) = 2 * var(x) = 2 * 1.0 = 2.0 + out = rr.rolling_cov(x, y, 3) + nan_allclose(out, [np.nan, np.nan, 2.0, 2.0, 2.0]) + + def test_rolling_cor_perfect_positive(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rr.rolling_cor(x, x * 3.0, 3) + np.testing.assert_allclose(out[2:], 1.0, atol=1e-12) + + def test_rolling_cor_perfect_negative(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rr.rolling_cor(x, -x + 6.0, 3) + np.testing.assert_allclose(out[2:], -1.0, atol=1e-12) + + def test_rolling_cov_default_min_periods_masks_warmup(self): + x = np.array([1.0, 2.0, 3.0, 4.0]) + y = np.array([4.0, 3.0, 2.0, 1.0]) + out = rr.rolling_cov(x, y, 3) + assert np.isnan(out[0]) and np.isnan(out[1]) + assert not np.isnan(out[2]) + + +# ── Pandas comparison ────────────────────────────────────────────────────────── + +class TestCovCorPandasComparison: + + def test_rolling_cov_matches_pandas_clean(self): + np.random.seed(7) + x = np.random.randn(200) + y = np.random.randn(200) + k = 8 + expected = pd.Series(x).rolling(k, min_periods=k).cov(pd.Series(y)).to_numpy() + nan_allclose(rr.rolling_cov(x, y, k), expected, rtol=1e-10, atol=1e-10) + + def test_rolling_cor_matches_pandas_clean(self): + np.random.seed(7) + x = np.random.randn(200) + y = np.random.randn(200) + k = 8 + expected = pd.Series(x).rolling(k, min_periods=k).corr(pd.Series(y)).to_numpy() + nan_allclose(rr.rolling_cor(x, y, k), expected, rtol=1e-10, atol=1e-10) + + def test_rolling_cov_matches_pandas_with_nan(self): + np.random.seed(42) + x = np.random.randn(300) + y = np.random.randn(300) + x[np.random.rand(300) < 0.15] = np.nan + y[np.random.rand(300) < 0.15] = np.nan + k, mp = 7, 3 + expected = pd.Series(x).rolling(k, min_periods=mp).cov(pd.Series(y)).to_numpy() + nan_allclose(rr.rolling_cov(x, y, k, min_periods=mp), expected, rtol=1e-9, atol=1e-9) + + def test_rolling_cor_matches_pandas_with_nan(self): + np.random.seed(42) + x = np.random.randn(300) + y = np.random.randn(300) + x[np.random.rand(300) < 0.15] = np.nan + y[np.random.rand(300) < 0.15] = np.nan + k, mp = 7, 3 + expected = pd.Series(x).rolling(k, min_periods=mp).corr(pd.Series(y)).to_numpy() + nan_allclose(rr.rolling_cor(x, y, k, min_periods=mp), expected, rtol=1e-9, atol=1e-9) + + @pytest.mark.parametrize("mp", [1, 2, 3]) + def test_rolling_cov_min_periods_matches_pandas(self, mp): + np.random.seed(10 + mp) + x = np.random.randn(100) + y = np.random.randn(100) + x[np.random.rand(100) < 0.2] = np.nan + k = 6 + expected = pd.Series(x).rolling(k, min_periods=mp).cov(pd.Series(y)).to_numpy() + nan_allclose(rr.rolling_cov(x, y, k, min_periods=mp), expected, rtol=1e-9, atol=1e-9) diff --git a/py_package/tests/test_engine.py b/py_package/tests/test_engine.py new file mode 100644 index 0000000..51d9202 --- /dev/null +++ b/py_package/tests/test_engine.py @@ -0,0 +1,513 @@ +"""Tests for the C++ engine layer (rrc.*) — stateful rolling objects.""" +import math + +import numpy as np +import pandas as pd +import pytest + +import robust_rolling_core as rrc + +from conftest import nan_allclose, var_ref, mean_ref, median_ref, cov_ref, cor_ref + + +# ── SlidingWelford ───────────────────────────────────────────────────────────── + +class TestSlidingWelford: + + def test_known_values(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rrc.SlidingWelford(3).process_batch(x) + nan_allclose(out, [np.nan, 0.5, 1.0, 1.0, 1.0]) + + def test_window_size_1_all_nan(self): + out = rrc.SlidingWelford(1).process_batch(np.array([10.0, 11.0, 12.0])) + assert np.all(np.isnan(out)) + + def test_window_larger_than_array(self): + out = rrc.SlidingWelford(10).process_batch(np.array([2.0, 4.0, 6.0])) + assert np.isnan(out[0]) + assert np.isclose(out[1], 2.0, rtol=1e-12) # var([2,4]) + assert np.isclose(out[2], 4.0, rtol=1e-12) # var([2,4,6]) + + def test_constant_zero_variance(self): + out = rrc.SlidingWelford(4).process_batch(np.full(8, 5.0)) + assert np.isnan(out[0]) + np.testing.assert_allclose(out[1:], 0.0, atol=1e-12) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_against_naive_reference(self, k): + x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) + out = rrc.SlidingWelford(k).process_batch(x) + np.testing.assert_allclose(out, var_ref(x, k), rtol=1e-11, atol=1e-11, + equal_nan=True) + + def test_nan_does_not_contribute(self): + x = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) + out = rrc.SlidingWelford(3).process_batch(x) + assert np.isclose(out[2], 0.5, atol=1e-12) # var([1,2]) = 0.5 + + def test_empty_input(self): + out = rrc.SlidingWelford(3).process_batch(np.array([])) + assert len(out) == 0 and out.dtype == np.float64 + + def test_rejects_zero_window(self): + with pytest.raises(ValueError, match="Window length must be greater than 0"): + rrc.SlidingWelford(0) + + def test_rejects_2d_input(self): + with pytest.raises(RuntimeError, match="Input must be 1D array"): + rrc.SlidingWelford(2).process_batch(np.ones((2, 3))) + + def test_integer_input_converted_to_float64(self): + out = rrc.SlidingWelford(2).process_batch(np.array([1, 2, 3, 4], dtype=np.int32)) + assert out.dtype == np.float64 + + +# ── MonotonicMax ─────────────────────────────────────────────────────────────── + +class TestMonotonicMax: + + def test_known_values(self): + x = np.array([1.0, 2.0, 3.0, 2.0, 5.0]) + out = rrc.MonotonicMax(3).process_batch(x) + np.testing.assert_allclose(out, [1.0, 2.0, 3.0, 3.0, 5.0], atol=1e-12) + + def test_window_size_1_identity(self): + x = np.array([-1.0, 0.0, 10.0, 2.0]) + np.testing.assert_allclose(rrc.MonotonicMax(1).process_batch(x), x) + + def test_window_larger_than_array_cumulative(self): + x = np.array([2.0, -3.0, 7.0, 1.0]) + np.testing.assert_allclose(rrc.MonotonicMax(20).process_batch(x), + np.maximum.accumulate(x)) + + def test_decreasing_sequence(self): + x = np.array([9.0, 7.0, 5.0, 3.0, 1.0]) + np.testing.assert_allclose(rrc.MonotonicMax(3).process_batch(x), + [9.0, 9.0, 9.0, 7.0, 5.0]) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_against_naive_reference(self, k): + x = np.array([-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0]) + out = rrc.MonotonicMax(k).process_batch(x) + expected = np.array([np.max(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) + np.testing.assert_allclose(out, expected, atol=1e-12) + + def test_nan_advances_window_returns_current_max(self): + x = np.array([1.0, 2.0, np.nan, 1.0]) + out = rrc.MonotonicMax(2).process_batch(x) + assert out[2] == 2.0 # window=[2,NaN], max=2 + assert out[3] == 1.0 # window=[NaN,1], max=1 + + def test_nan_at_start_empty_window_returns_nan(self): + out = rrc.MonotonicMax(2).process_batch(np.array([np.nan, 2.0, 3.0])) + assert np.isnan(out[0]) + + def test_empty_input(self): + out = rrc.MonotonicMax(3).process_batch(np.array([])) + assert len(out) == 0 and out.dtype == np.float64 + + def test_rejects_zero_window(self): + with pytest.raises(ValueError, match="Window length must be greater than 0"): + rrc.MonotonicMax(0) + + def test_rejects_2d_input(self): + with pytest.raises(RuntimeError, match="Input must be 1D array"): + rrc.MonotonicMax(2).process_batch(np.ones((2, 3))) + + def test_integer_input_converted_to_float64(self): + out = rrc.MonotonicMax(2).process_batch(np.array([1, 2, 3], dtype=np.int32)) + assert out.dtype == np.float64 + + +# ── MonotonicMin ─────────────────────────────────────────────────────────────── + +class TestMonotonicMin: + + def test_known_values(self): + x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + out = rrc.MonotonicMin(3).process_batch(x) + np.testing.assert_allclose(out, [1.0, 1.0, 1.0, 2.0, 2.0], atol=1e-12) + + def test_window_size_1_identity(self): + x = np.array([-1.0, 0.0, 10.0, 2.0]) + np.testing.assert_allclose(rrc.MonotonicMin(1).process_batch(x), x) + + def test_window_larger_than_array_cumulative(self): + x = np.array([2.0, -3.0, 7.0, 1.0]) + np.testing.assert_allclose(rrc.MonotonicMin(20).process_batch(x), + np.minimum.accumulate(x)) + + def test_increasing_sequence(self): + x = np.array([1.0, 3.0, 5.0, 7.0, 9.0]) + np.testing.assert_allclose(rrc.MonotonicMin(3).process_batch(x), + [1.0, 1.0, 1.0, 3.0, 5.0]) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_against_naive_reference(self, k): + x = np.array([-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0]) + out = rrc.MonotonicMin(k).process_batch(x) + expected = np.array([np.min(x[max(0, i - k + 1):i + 1]) for i in range(len(x))]) + np.testing.assert_allclose(out, expected, atol=1e-12) + + def test_nan_advances_window_returns_current_min(self): + x = np.array([5.0, 1.0, np.nan, 9.0]) + out = rrc.MonotonicMin(2).process_batch(x) + assert out[2] == 1.0 # window=[1,NaN], min=1 + assert out[3] == 9.0 # window=[NaN,9], min=9 + + def test_nan_at_start_empty_window_returns_nan(self): + out = rrc.MonotonicMin(2).process_batch(np.array([np.nan, 2.0, 3.0])) + assert np.isnan(out[0]) + + def test_empty_input(self): + out = rrc.MonotonicMin(3).process_batch(np.array([])) + assert len(out) == 0 + + def test_rejects_zero_window(self): + with pytest.raises(ValueError, match="Window length must be greater than 0"): + rrc.MonotonicMin(0) + + def test_rejects_2d_input(self): + with pytest.raises(RuntimeError, match="Input must be 1D array"): + rrc.MonotonicMin(2).process_batch(np.ones((2, 3))) + + +# ── MultisetMedian ───────────────────────────────────────────────────────────── + +class TestMultisetMedian: + + def test_known_values_odd_window(self): + x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + np.testing.assert_allclose(rrc.MultisetMedian(3).process_batch(x), + median_ref(x, 3), rtol=1e-12) + + def test_known_values_even_window(self): + x = np.array([1.0, 3.0, 2.0, 4.0]) + np.testing.assert_allclose(rrc.MultisetMedian(4).process_batch(x), + median_ref(x, 4), rtol=1e-12) + + def test_window_size_1_identity(self): + x = np.array([-1.0, 0.0, 10.0, 2.0]) + np.testing.assert_allclose(rrc.MultisetMedian(1).process_batch(x), x) + + def test_window_size_2_regression(self): + # Regression: window_size=2 caused segfault before fix + x = np.array([3.0, 1.0, 2.0, 5.0, 4.0]) + np.testing.assert_allclose(rrc.MultisetMedian(2).process_batch(x), + median_ref(x, 2), rtol=1e-12) + + def test_even_window_descending_fill_regression(self): + # Regression: even window filled descending caused wrong mid_ position + x = np.array([4.0, 3.0, 2.0, 1.0]) + np.testing.assert_allclose(rrc.MultisetMedian(4).process_batch(x), + median_ref(x, 4), rtol=1e-12) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_against_naive_reference(self, k): + x = np.array([-2.0, 6.0, 1.0, -8.0, 0.0, 8.0, -1.0]) + np.testing.assert_allclose(rrc.MultisetMedian(k).process_batch(x), + median_ref(x, k), rtol=1e-12) + + def test_large_array_against_reference(self): + np.random.seed(42) + x = np.random.randn(1000) + k = 15 + np.testing.assert_allclose(rrc.MultisetMedian(k).process_batch(x), + median_ref(x, k), rtol=1e-10) + + def test_element_entering_equals_leaving(self): + x = np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0]) + np.testing.assert_allclose(rrc.MultisetMedian(3).process_batch(x), + median_ref(x, 3), rtol=1e-12) + + def test_window_larger_than_array(self): + x = np.array([3.0, 1.0, 4.0, 1.0]) + np.testing.assert_allclose(rrc.MultisetMedian(20).process_batch(x), + median_ref(x, 20), rtol=1e-12) + + def test_nan_does_not_contribute(self): + # window=3, [1, 2, NaN, 4]: at NaN window=[1,2,NaN] -> median=1.5 + x = np.array([1.0, 2.0, np.nan, 4.0]) + out = rrc.MultisetMedian(3).process_batch(x) + assert np.isclose(out[2], 1.5, atol=1e-12) + assert np.isclose(out[3], 3.0, atol=1e-12) # window=[2,NaN,4] -> median([2,4])=3 + + def test_empty_input(self): + out = rrc.MultisetMedian(3).process_batch(np.array([])) + assert len(out) == 0 and out.dtype == np.float64 + + def test_rejects_zero_window(self): + with pytest.raises(ValueError, match="Window length must be greater than 0"): + rrc.MultisetMedian(0) + + def test_rejects_none_window(self): + with pytest.raises(TypeError): + rrc.MultisetMedian(None) + + def test_rejects_2d_input(self): + with pytest.raises(RuntimeError, match="Input must be 1D array"): + rrc.MultisetMedian(2).process_batch(np.ones((2, 3))) + + def test_integer_input_converted_to_float64(self): + out = rrc.MultisetMedian(2).process_batch(np.array([1, 2, 3], dtype=np.int32)) + assert out.dtype == np.float64 + + +# ── SlidingMean ──────────────────────────────────────────────────────────────── + +class TestSlidingMean: + + def test_known_values(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rrc.SlidingMean(3).process_batch(x) + nan_allclose(out, [1.0, 1.5, 2.0, 3.0, 4.0]) + + def test_window_size_1_identity(self): + x = np.array([3.0, 7.0, -1.0]) + nan_allclose(rrc.SlidingMean(1).process_batch(x), x) + + def test_window_larger_than_array(self): + nan_allclose(rrc.SlidingMean(10).process_batch(np.array([2.0, 4.0, 6.0])), + [2.0, 3.0, 4.0]) + + def test_constant_sequence(self): + out = rrc.SlidingMean(3).process_batch(np.full(6, 5.0)) + np.testing.assert_allclose(out, 5.0, atol=1e-12) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_against_naive_reference(self, k): + x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) + out = rrc.SlidingMean(k).process_batch(x) + np.testing.assert_allclose(out, mean_ref(x, k), rtol=1e-12, atol=1e-12, + equal_nan=True) + + def test_nan_does_not_contribute(self): + x = np.array([1.0, np.nan, 3.0, 4.0, 5.0]) + out = rrc.SlidingMean(3).process_batch(x) + assert np.isclose(out[2], 2.0, atol=1e-12) # mean([1,3]) = 2.0 + + def test_nan_advances_window(self): + x = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) + out = rrc.SlidingMean(3).process_batch(x) + assert np.isclose(out[3], 3.0, atol=1e-12) # mean([2,4]) = 3.0 + + def test_all_nan_returns_nan(self): + out = rrc.SlidingMean(2).process_batch(np.array([np.nan, np.nan, np.nan])) + assert np.all(np.isnan(out)) + + def test_empty_input(self): + out = rrc.SlidingMean(3).process_batch(np.array([])) + assert len(out) == 0 and out.dtype == np.float64 + + def test_rejects_zero_window(self): + with pytest.raises(ValueError, match="Window length must be greater than 0"): + rrc.SlidingMean(0) + + def test_rejects_2d_input(self): + with pytest.raises(RuntimeError, match="Input must be 1D array"): + rrc.SlidingMean(2).process_batch(np.ones((2, 3))) + + def test_integer_input_converted_to_float64(self): + out = rrc.SlidingMean(2).process_batch(np.array([1, 2, 3, 4], dtype=np.int32)) + assert out.dtype == np.float64 + + def test_min_periods_masks_warmup(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rrc.SlidingMean(3).process_batch(x, min_periods=3) + assert np.isnan(out[0]) and np.isnan(out[1]) + assert np.isclose(out[2], 2.0, atol=1e-12) + + def test_min_periods_with_nan_input(self): + x = np.array([1.0, np.nan, 3.0, 4.0, 5.0]) + out = rrc.SlidingMean(3).process_batch(x, min_periods=2) + assert np.isnan(out[1]) # 1 non-NaN < 2 + assert np.isclose(out[2], 2.0, atol=1e-12) # 2 non-NaN >= 2 + + +# ── SlidingMoments ───────────────────────────────────────────────────────────── + +class TestSlidingMoments: + + def test_initial_state_is_nan(self): + sm = rrc.SlidingMoments(4) + assert sm.current_size() == 0 + assert math.isnan(sm.get_skewness()) + assert math.isnan(sm.get_kurtosis()) + + def test_symmetric_window_skewness_is_zero(self): + sm = rrc.SlidingMoments(3) + for v in [1.0, 2.0, 3.0]: + sm.update(v) + assert pytest.approx(sm.get_skewness(), abs=1e-12) == 0.0 + assert math.isnan(sm.get_kurtosis()) # n=3 < 4 + + def test_known_kurtosis_uniform_window(self): + # [1, 2, 3, 4]: excess kurtosis = -1.2 + sm = rrc.SlidingMoments(4) + for v in [1.0, 2.0, 3.0, 4.0]: + sm.update(v) + assert pytest.approx(sm.get_kurtosis(), abs=1e-10) == -1.2 + + def test_small_window_skewness_always_nan(self): + # window=2 -> at most 2 values -> skewness (needs >=3) always NaN + sm = rrc.SlidingMoments(2) + for v in [1.0, 2.0, 3.0]: + sm.update(v) + assert math.isnan(sm.get_skewness()) + assert math.isnan(sm.get_kurtosis()) + + def test_window_size_1_both_moments_always_nan(self): + sm = rrc.SlidingMoments(1) + sm.update(5.0) + assert sm.current_size() == 1 + assert math.isnan(sm.get_skewness()) + assert math.isnan(sm.get_kurtosis()) + + def test_nan_advances_window_and_reduces_size(self): + sm = rrc.SlidingMoments(4) + for v in [1.0, 2.0, 3.0, 4.0]: + sm.update(v) + assert sm.current_size() == 4 + assert not math.isnan(sm.get_kurtosis()) + + sm.update(float('nan')) + assert sm.current_size() == 3 + assert not math.isnan(sm.get_skewness()) + assert math.isnan(sm.get_kurtosis()) # n=3 < 4 + + def test_nan_does_not_corrupt_state(self): + sm = rrc.SlidingMoments(3) + for v in [10.0, 20.0, 30.0]: + sm.update(v) + skew_before = sm.get_skewness() + + for _ in range(3): + sm.update(float('nan')) + assert sm.current_size() == 0 + + for v in [10.0, 20.0, 30.0]: + sm.update(v) + assert pytest.approx(sm.get_skewness(), abs=1e-10) == skew_before + + def test_randomized_fuzzing_against_pandas(self): + np.random.seed(42) + window_size = int(np.random.randint(4, 20)) + data = np.random.randn(1000) * 50.0 + data[np.random.rand(1000) < 0.15] = np.nan + s = pd.Series(data) + expected_skew = s.rolling(window=window_size, min_periods=3).skew() + expected_kurt = s.rolling(window=window_size, min_periods=4).kurt() + expected_count = s.rolling(window=window_size, min_periods=0).count() + + sm = rrc.SlidingMoments(window_size) + for i, val in enumerate(data): + sm.update(val) + assert sm.current_size() == expected_count[i], f"count mismatch at {i}" + cpp_skew, pd_skew = sm.get_skewness(), expected_skew[i] + if pd.isna(pd_skew): + assert math.isnan(cpp_skew), f"skewness should be NaN at {i}" + else: + assert pytest.approx(cpp_skew, rel=1e-4, abs=1e-6) == pd_skew, \ + f"skewness mismatch at {i}" + cpp_kurt, pd_kurt = sm.get_kurtosis(), expected_kurt[i] + if pd.isna(pd_kurt): + assert math.isnan(cpp_kurt), f"kurtosis should be NaN at {i}" + else: + assert pytest.approx(cpp_kurt, rel=1e-4, abs=1e-6) == pd_kurt, \ + f"kurtosis mismatch at {i}" + + +# ── SlidingCovariance ────────────────────────────────────────────────────────── + +class TestSlidingCovariance: + + def test_initial_state_is_nan(self): + sc = rrc.SlidingCovariance(3) + assert math.isnan(sc.get_covariance()) + assert math.isnan(sc.get_correlation()) + assert math.isnan(sc.get_mean_x()) + assert math.isnan(sc.get_mean_y()) + + def test_perfect_positive_correlation(self): + sc = rrc.SlidingCovariance(3) + for xi, yi in [(1.0, 2.0), (2.0, 4.0), (3.0, 6.0)]: + sc.update(xi, yi) + assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 + assert pytest.approx(sc.get_covariance(), abs=1e-12) == 2.0 + + def test_perfect_negative_correlation(self): + sc = rrc.SlidingCovariance(3) + for xi, yi in [(1.0, 3.0), (2.0, 2.0), (3.0, 1.0)]: + sc.update(xi, yi) + assert pytest.approx(sc.get_correlation(), abs=1e-12) == -1.0 + + def test_constant_x_correlation_is_nan(self): + # std_dev(x) = 0 -> correlation = NaN, covariance = 0 + sc = rrc.SlidingCovariance(4) + for yi in [1.0, 2.0, 3.0, 4.0]: + sc.update(5.0, yi) + assert math.isnan(sc.get_correlation()) + assert pytest.approx(sc.get_covariance(), abs=1e-12) == 0.0 + assert pytest.approx(sc.get_mean_x(), abs=1e-12) == 5.0 + + def test_window_expiry_removes_old_pairs(self): + sc = rrc.SlidingCovariance(2) + sc.update(1.0, 1.0) + sc.update(2.0, 2.0) + sc.update(10.0, 10.0) # window: [(2,2),(10,10)] + assert pytest.approx(sc.get_mean_x(), abs=1e-12) == 6.0 + assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 + + def test_nan_pair_skipped_not_added(self): + sc = rrc.SlidingCovariance(3) + for xi, yi in [(1.0, 2.0), (2.0, 4.0), (3.0, 6.0)]: + sc.update(xi, yi) + sc.update(math.nan, 5.0) + assert pytest.approx(sc.get_covariance(), abs=1e-12) == 1.0 + assert pytest.approx(sc.get_correlation(), abs=1e-12) == 1.0 + + def test_single_pair_covariance_is_nan(self): + sc = rrc.SlidingCovariance(3) + sc.update(1.0, 2.0) + assert math.isnan(sc.get_covariance()) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_covariance_against_numpy_reference(self, k): + x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) + y = np.array([1.0, 3.0, -1.0, 4.0, 2.0, 6.0, 5.0, 0.0]) + out = rrc.SlidingCovariance(k).process_covariance_batch(x, y) + nan_allclose(out, cov_ref(x, y, k), rtol=1e-11, atol=1e-11) + + @pytest.mark.parametrize("k", [2, 3, 5]) + def test_correlation_against_numpy_reference(self, k): + x = np.array([-3.0, -1.0, 0.0, 2.0, 10.0, 7.0, 7.0, 8.0]) + y = np.array([1.0, 3.0, -1.0, 4.0, 2.0, 6.0, 5.0, 0.0]) + out = rrc.SlidingCovariance(k).process_correlation_batch(x, y) + nan_allclose(out, cor_ref(x, y, k), rtol=1e-11, atol=1e-11) + + def test_random_nan_fuzzing_against_reference(self): + np.random.seed(123) + x = np.random.randn(500) + y = np.random.randn(500) + x[np.random.rand(500) < 0.15] = np.nan + y[np.random.rand(500) < 0.15] = np.nan + k = 10 + cov_out = rrc.SlidingCovariance(k).process_covariance_batch(x, y) + cor_out = rrc.SlidingCovariance(k).process_correlation_batch(x, y) + nan_allclose(cov_out, cov_ref(x, y, k), rtol=1e-9, atol=1e-9) + nan_allclose(cor_out, cor_ref(x, y, k), rtol=1e-9, atol=1e-9) + + def test_process_batch_rejects_length_mismatch(self): + sc = rrc.SlidingCovariance(3) + with pytest.raises(RuntimeError, match="same length"): + sc.process_covariance_batch(np.array([1.0, 2.0]), np.array([1.0])) + + def test_process_batch_rejects_2d_input(self): + sc = rrc.SlidingCovariance(3) + with pytest.raises(RuntimeError): + sc.process_covariance_batch(np.ones((2, 3)), np.ones(3)) + + def test_empty_input(self): + out = rrc.SlidingCovariance(3).process_covariance_batch(np.array([]), np.array([])) + assert len(out) == 0 and out.dtype == np.float64 diff --git a/py_package/tests/test_moments.py b/py_package/tests/test_moments.py new file mode 100644 index 0000000..55dd64d --- /dev/null +++ b/py_package/tests/test_moments.py @@ -0,0 +1,166 @@ +"""Tests for rolling_skewness, rolling_kurtosis and method='fast'.""" +import numpy as np +import pandas as pd +import pytest + +import robustrolling as rr + +from conftest import nan_allclose + +_FNS = [rr.rolling_skewness, rr.rolling_kurtosis] +_FN_IDS = ["skewness", "kurtosis"] + +# Minimum valid observations per metric (algorithm threshold, not min_periods) +_MIN_OBS = {rr.rolling_skewness: 3, rr.rolling_kurtosis: 4} + + +# ── Output type preservation ─────────────────────────────────────────────────── + +class TestMomentsOutputType: + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_series_input_returns_series(self, fn): + s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0], name="x") + out = fn(s, 5) + assert isinstance(out, pd.Series) + assert out.name == "x" + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_series_preserves_index(self, fn): + idx = pd.date_range("2024-01-01", periods=5) + s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0], index=idx) + assert fn(s, 5).index.equals(idx) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_ndarray_input_returns_ndarray(self, fn): + assert isinstance(fn(np.arange(1.0, 8.0), 5), np.ndarray) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_output_length_equals_input_length(self, fn): + arr = np.arange(1.0, 8.0) + assert len(fn(arr, 5)) == len(arr) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_output_dtype_is_float64(self, fn): + arr = np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.int32) + assert fn(arr, 5).dtype == np.float64 + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_empty_input_returns_empty(self, fn): + assert len(fn(np.array([]), 5)) == 0 + + +# ── Pandas comparison (stable method) ───────────────────────────────────────── + +_MOMENT_CASES = [ + # label, data, k + ("basic", [1.0, 3.0, -1.0, 5.0, 2.0, 8.0, 0.0], 4), + ("longer", [-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0, + 3.0, 5.0, 2.0], 5), + ("with_nan", [1.0, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0], 4), + ("leading_nan", [np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], 4), + ("negatives", [-5.0, -1.0, -3.0, -2.0, -4.0, -6.0], 4), +] + +_MOMENT_IDS = [c[0] for c in _MOMENT_CASES] +_MOMENT_PARAMS = [(c[1], c[2]) for c in _MOMENT_CASES] + + +class TestMomentsPandasComparison: + + @pytest.mark.parametrize("data,k", _MOMENT_PARAMS, ids=_MOMENT_IDS) + def test_rolling_skewness_matches_pandas(self, data, k): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=3).skew().to_numpy() + nan_allclose(rr.rolling_skewness(arr, k, min_periods=3), expected, rtol=1e-9, atol=1e-9) + + @pytest.mark.parametrize("data,k", _MOMENT_PARAMS, ids=_MOMENT_IDS) + def test_rolling_kurtosis_matches_pandas(self, data, k): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=4).kurt().to_numpy() + nan_allclose(rr.rolling_kurtosis(arr, k, min_periods=4), expected, rtol=1e-9, atol=1e-9) + + def test_skewness_symmetric_window_is_zero(self): + # [1,2,3] is symmetric -> skewness = 0 + out = rr.rolling_skewness(np.array([1.0, 2.0, 3.0]), 3) + assert pytest.approx(out[2], abs=1e-12) == 0.0 + + def test_kurtosis_uniform_window_known_value(self): + # [1,2,3,4]: excess kurtosis = -1.2 + out = rr.rolling_kurtosis(np.array([1.0, 2.0, 3.0, 4.0]), 4) + assert pytest.approx(out[3], abs=1e-10) == -1.2 + + def test_skewness_window_smaller_than_min_obs_always_nan(self): + # window=2 -> never >=3 observations -> always NaN + out = rr.rolling_skewness(np.arange(1.0, 8.0), 2) + assert np.all(np.isnan(out)) + + def test_kurtosis_window_smaller_than_min_obs_always_nan(self): + # window=3 -> never >=4 observations -> always NaN + out = rr.rolling_kurtosis(np.arange(1.0, 8.0), 3) + assert np.all(np.isnan(out)) + + +# ── method="fast" ────────────────────────────────────────────────────────────── + +class TestFastMethod: + + @pytest.mark.parametrize("fn,pd_fn", [ + (lambda x, k: rr.rolling_variance(x, k, method="fast"), + lambda s, k: s.rolling(k).var().to_numpy()), + (lambda x, k: rr.rolling_skewness(x, k, method="fast"), + lambda s, k: s.rolling(k).skew().to_numpy()), + (lambda x, k: rr.rolling_kurtosis(x, k, method="fast"), + lambda s, k: s.rolling(k).kurt().to_numpy()), + ], ids=["variance", "skewness", "kurtosis"]) + def test_fast_matches_pandas_no_nan(self, fn, pd_fn): + rng = np.random.default_rng(0) + x = rng.standard_normal(300) + k = 10 + nan_allclose(fn(x, k), pd_fn(pd.Series(x), k), rtol=1e-9, atol=1e-9) + + @pytest.mark.parametrize("fn_stable,fn_fast", [ + (lambda x, k, mp: rr.rolling_variance(x, k, min_periods=mp), + lambda x, k, mp: rr.rolling_variance(x, k, min_periods=mp, method="fast")), + (lambda x, k, mp: rr.rolling_skewness(x, k, min_periods=mp), + lambda x, k, mp: rr.rolling_skewness(x, k, min_periods=mp, method="fast")), + (lambda x, k, mp: rr.rolling_kurtosis(x, k, min_periods=mp), + lambda x, k, mp: rr.rolling_kurtosis(x, k, min_periods=mp, method="fast")), + ], ids=["variance", "skewness", "kurtosis"]) + def test_fast_agrees_with_stable_with_nan(self, fn_stable, fn_fast): + rng = np.random.default_rng(1) + x = rng.standard_normal(300) + x[rng.random(300) < 0.1] = np.nan + k, mp = 10, 5 + nan_allclose(fn_fast(x, k, mp), fn_stable(x, k, mp), rtol=1e-9, atol=1e-9) + + def test_fast_variance_warmup_nan(self): + x = np.arange(1.0, 11.0) + out = rr.rolling_variance(x, 5, method="fast") + assert all(np.isnan(out[:4])) + assert not np.isnan(out[4]) + + def test_fast_skewness_needs_3_obs(self): + out = rr.rolling_skewness(np.arange(1.0, 11.0), 5, method="fast") + assert all(np.isnan(out[:4])) + + def test_fast_kurtosis_needs_4_obs(self): + out = rr.rolling_kurtosis(np.arange(1.0, 11.0), 5, method="fast") + assert all(np.isnan(out[:4])) + + def test_fast_min_periods(self): + x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + out = rr.rolling_variance(x, 3, min_periods=2, method="fast") + assert np.isnan(out[0]) + assert not np.isnan(out[1]) + + def test_fast_all_nan_window_returns_nan(self): + x = np.array([np.nan, np.nan, np.nan, 1.0, 2.0, 3.0]) + out = rr.rolling_variance(x, 3, min_periods=1, method="fast") + assert np.isnan(out[2]) + assert not np.isnan(out[5]) + + @pytest.mark.parametrize("fn", [rr.rolling_variance, rr.rolling_skewness, rr.rolling_kurtosis], + ids=["variance", "skewness", "kurtosis"]) + def test_fast_empty_input(self, fn): + assert len(fn(np.array([]), 3, method="fast")) == 0 diff --git a/py_package/tests/test_robustness.py b/py_package/tests/test_robustness.py new file mode 100644 index 0000000..22aed20 --- /dev/null +++ b/py_package/tests/test_robustness.py @@ -0,0 +1,111 @@ +"""Memory layout, dtype, and fuzz tests — cross-cutting robustness.""" +import numpy as np +import pandas as pd +import pytest + +import robust_rolling_core as rrc +import robustrolling as rr + +from conftest import nan_allclose + +_ENGINE_CLASSES = [rrc.MonotonicMax, rrc.MonotonicMin, rrc.MultisetMedian, rrc.SlidingWelford] +_ENGINE_IDS = ["MonotonicMax", "MonotonicMin", "MultisetMedian", "SlidingWelford"] + +_SIMPLE_FNS = [rr.rolling_max, rr.rolling_min, rr.rolling_median, rr.rolling_variance, rr.rolling_mean] +_SIMPLE_IDS = ["max", "min", "median", "variance", "mean"] + + +# ── Memory layout and dtype ──────────────────────────────────────────────────── + +class TestMemoryLayout: + + @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) + def test_strided_every_other_element(self, cls): + x = np.arange(10, dtype=np.float64) + strided = x[::2] + assert not strided.flags["C_CONTIGUOUS"] + np.testing.assert_array_equal( + cls(2).process_batch(strided), + cls(2).process_batch(np.ascontiguousarray(strided)) + ) + + @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) + def test_strided_column_slice(self, cls): + col = np.arange(16.0).reshape(8, 2)[:, 0] + assert not col.flags["C_CONTIGUOUS"] + np.testing.assert_array_equal( + cls(3).process_batch(col), + cls(3).process_batch(np.ascontiguousarray(col)) + ) + + @pytest.mark.parametrize("cls", _ENGINE_CLASSES, ids=_ENGINE_IDS) + def test_strided_reversed(self, cls): + rev = np.arange(8, dtype=np.float64)[::-1] + assert not rev.flags["C_CONTIGUOUS"] + np.testing.assert_array_equal( + cls(2).process_batch(rev), + cls(2).process_batch(np.ascontiguousarray(rev)) + ) + + @pytest.mark.parametrize("dtype", [np.float32, np.int32, np.int64], + ids=["float32", "int32", "int64"]) + def test_numeric_dtype_cast_in_engine(self, dtype): + x = np.array([1, 3, 2, 5, 4], dtype=dtype) + out = rrc.MonotonicMax(3).process_batch(x) + assert out.dtype == np.float64 + np.testing.assert_allclose(out, rrc.MonotonicMax(3).process_batch(x.astype(np.float64))) + + @pytest.mark.parametrize("fn", _SIMPLE_FNS, ids=_SIMPLE_IDS) + @pytest.mark.parametrize("dtype", [np.float32, np.int64, np.bool_], + ids=["float32", "int64", "bool"]) + def test_numeric_dtype_in_high_level_api(self, fn, dtype): + x = np.array([1, 3, 2, 5, 4], dtype=dtype) + out = fn(x, 3) + assert out.dtype == np.float64 + nan_allclose(out, fn(x.astype(np.float64), 3)) + + @pytest.mark.parametrize("fn", _SIMPLE_FNS, ids=_SIMPLE_IDS) + def test_strided_high_level_api(self, fn): + base = np.array([1.0, 3.0, 2.0, 7.0, 4.0, 6.0, 5.0, 8.0]) + strided = base[::2] + assert not strided.flags["C_CONTIGUOUS"] + nan_allclose(np.asarray(fn(strided, 2)), + np.asarray(fn(np.ascontiguousarray(strided), 2))) + + +# ── Fuzz / stress ────────────────────────────────────────────────────────────── + +class TestFuzz: + + @pytest.mark.parametrize("fn,pd_fn", [ + (rr.rolling_max, lambda s, k, mp: s.rolling(k, min_periods=mp).max().to_numpy()), + (rr.rolling_min, lambda s, k, mp: s.rolling(k, min_periods=mp).min().to_numpy()), + (rr.rolling_median, lambda s, k, mp: s.rolling(k, min_periods=mp).median().to_numpy()), + (rr.rolling_variance, lambda s, k, mp: s.rolling(k, min_periods=mp).var().to_numpy()), + (rr.rolling_mean, lambda s, k, mp: s.rolling(k, min_periods=mp).mean().to_numpy()), + ], ids=["max", "min", "median", "variance", "mean"]) + def test_random_nan_20pct_matches_pandas(self, fn, pd_fn): + np.random.seed(42) + x = np.random.randn(5000) + x[np.random.rand(5000) < 0.2] = np.nan + k, mp = 15, 5 + nan_allclose(fn(x, k, min_periods=mp), pd_fn(pd.Series(x), k, mp), + rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("fn,pd_fn", [ + (lambda x, k, mp: rr.rolling_skewness(x, k, min_periods=mp), + lambda s, k, mp: s.rolling(k, min_periods=mp).skew().to_numpy()), + (lambda x, k, mp: rr.rolling_kurtosis(x, k, min_periods=mp), + lambda s, k, mp: s.rolling(k, min_periods=mp).kurt().to_numpy()), + ], ids=["skewness", "kurtosis"]) + def test_moments_random_nan_matches_pandas(self, fn, pd_fn): + np.random.seed(99) + x = np.random.randn(2000) + x[np.random.rand(2000) < 0.15] = np.nan + k, mp = 10, 4 + nan_allclose(fn(x, k, mp), pd_fn(pd.Series(x), k, mp), rtol=1e-8, atol=1e-8) + + def test_stress_many_instances_no_crash(self): + x = np.random.randn(100).astype(np.float64) + for _ in range(2000): + rrc.MultisetMedian(10).process_batch(x) diff --git a/py_package/tests/test_simple.py b/py_package/tests/test_simple.py new file mode 100644 index 0000000..72842ac --- /dev/null +++ b/py_package/tests/test_simple.py @@ -0,0 +1,174 @@ +"""Tests for the high-level API: rolling_max, min, median, variance, mean.""" +import numpy as np +import pandas as pd +import pytest + +import robustrolling as rr + +from conftest import nan_allclose + +_FNS = [rr.rolling_max, rr.rolling_min, rr.rolling_median, rr.rolling_variance, rr.rolling_mean] +_FN_IDS = ["max", "min", "median", "variance", "mean"] + + +# ── Output type preservation ─────────────────────────────────────────────────── + +class TestOutputType: + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_series_input_returns_series(self, fn): + s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], name="x") + out = fn(s, 3) + assert isinstance(out, pd.Series) + assert out.name == "x" + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_series_preserves_datetime_index(self, fn): + idx = pd.date_range("2024-01-01", periods=5) + s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], index=idx) + out = fn(s, 3) + assert isinstance(out, pd.Series) + assert out.index.equals(idx) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_series_preserves_range_index(self, fn): + idx = pd.RangeIndex(start=10, stop=15) + s = pd.Series([1.0, 3.0, 2.0, 5.0, 4.0], index=idx) + assert fn(s, 3).index.equals(idx) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_ndarray_input_returns_ndarray(self, fn): + arr = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + assert isinstance(fn(arr, 3), np.ndarray) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_output_length_equals_input_length(self, fn): + arr = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + assert len(fn(arr, 3)) == len(arr) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_output_dtype_is_float64(self, fn): + arr = np.array([1, 3, 2, 5, 4], dtype=np.int32) + assert fn(arr, 3).dtype == np.float64 + + +# ── Pandas comparison ────────────────────────────────────────────────────────── + +_PANDAS_CASES = [ + # label, data, k mp + ("clean_default", [1.0, 3.0, 2.0, 5.0, 4.0], 3, None), + ("clean_mp1", [1.0, 3.0, 2.0, 5.0, 4.0], 3, 1), + ("clean_mp2", [1.0, 3.0, 2.0, 5.0, 4.0], 3, 2), + ("longer_default", [-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0], 4, None), + ("longer_mp2", [-2.0, 6.0, 1.0, 8.0, 0.0, 8.0, -1.0], 4, 2), + ("nan_mp1", [1.0, np.nan, 3.0, 4.0, 5.0], 3, 1), + ("nan_mp2", [1.0, np.nan, 3.0, 4.0, 5.0], 3, 2), + ("nan_default", [1.0, np.nan, 3.0, 4.0, 5.0], 3, None), + ("leading_nans", [np.nan, np.nan, 3.0, 4.0], 2, 1), + ("k_gt_n", [1.0, 2.0, 3.0], 10, 1), + ("constant", [5.0] * 6, 3, None), + ("negatives_mp1", [-5.0, -1.0, -3.0, -2.0, -4.0], 3, 1), + ("mixed_nan_mp1", [np.nan, 1.0, np.nan, 3.0, np.nan], 2, 1), + ("window2_nan_gap", [5.0, 1.0, np.nan, 0.0], 2, 1), +] + +_CASE_IDS = [c[0] for c in _PANDAS_CASES] +_CASE_PARAMS = [(c[1], c[2], c[3]) for c in _PANDAS_CASES] + + +def _pd_mp(mp, k): + return k if mp is None else mp + + +def _kw(mp): + return {} if mp is None else {"min_periods": mp} + + +class TestPandasComparison: + + @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) + def test_rolling_max_matches_pandas(self, data, k, mp): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).max().to_numpy() + nan_allclose(rr.rolling_max(arr, k, **_kw(mp)), expected) + + @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) + def test_rolling_min_matches_pandas(self, data, k, mp): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).min().to_numpy() + nan_allclose(rr.rolling_min(arr, k, **_kw(mp)), expected) + + @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) + def test_rolling_median_matches_pandas(self, data, k, mp): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).median().to_numpy() + nan_allclose(rr.rolling_median(arr, k, **_kw(mp)), expected) + + @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) + def test_rolling_variance_matches_pandas(self, data, k, mp): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).var().to_numpy() + nan_allclose(rr.rolling_variance(arr, k, **_kw(mp)), expected) + + @pytest.mark.parametrize("data,k,mp", _CASE_PARAMS, ids=_CASE_IDS) + def test_rolling_mean_matches_pandas(self, data, k, mp): + arr = np.array(data, dtype=np.float64) + expected = pd.Series(arr).rolling(k, min_periods=_pd_mp(mp, k)).mean().to_numpy() + nan_allclose(rr.rolling_mean(arr, k, **_kw(mp)), expected) + + +# ── min_periods edge cases ───────────────────────────────────────────────────── + +class TestMinPeriods: + + def test_default_equals_window_size_max_min_median(self): + x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): + out = fn(x, 3) + assert np.isnan(out[0]) and np.isnan(out[1]) and not np.isnan(out[2]) + + def test_default_equals_window_size_variance(self): + out = rr.rolling_variance(np.array([1.0, 2.0, 3.0, 4.0]), 3) + assert np.isnan(out[0]) and np.isnan(out[1]) + assert pytest.approx(out[2], abs=1e-10) == 1.0 + + def test_min_periods_0_no_masking_on_clean_input(self): + x = np.array([1.0, 2.0, 3.0, 4.0]) + for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): + assert not np.any(np.isnan(fn(x, 3, min_periods=0))) + + def test_nan_series_default_all_masked(self): + x = np.array([1.0, np.nan, 3.0, 4.0]) + for fn in (rr.rolling_max, rr.rolling_min, rr.rolling_median): + assert np.all(np.isnan(fn(x, 3))) + + def test_window_expiry_max_nan_gap(self): + x = np.array([5.0, 1.0, np.nan, 0.0]) + out = rr.rolling_max(x, 2, min_periods=1) + assert pytest.approx(out[2], abs=1e-12) == 1.0 + assert pytest.approx(out[3], abs=1e-12) == 0.0 + + def test_window_expiry_min_nan_gap(self): + x = np.array([1.0, 5.0, np.nan, 9.0]) + out = rr.rolling_min(x, 2, min_periods=1) + assert pytest.approx(out[2], abs=1e-12) == 5.0 + assert pytest.approx(out[3], abs=1e-12) == 9.0 + + def test_variance_min_periods_1_single_value_is_nan(self): + out = rr.rolling_variance(np.array([1.0, 5.0]), 2, min_periods=1) + assert np.isnan(out[0]) + assert pytest.approx(out[1], abs=1e-10) == 8.0 + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_validation_negative_min_periods(self, fn): + with pytest.raises(Exception): + fn(np.array([1.0, 2.0, 3.0]), 3, min_periods=-1) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_validation_min_periods_exceeds_window(self, fn): + with pytest.raises(Exception): + fn(np.array([1.0, 2.0, 3.0]), 3, min_periods=4) + + @pytest.mark.parametrize("fn", _FNS, ids=_FN_IDS) + def test_empty_input_returns_empty(self, fn): + assert len(fn(np.array([]), 3)) == 0 diff --git a/tests/test_monotonic_max.cxx b/tests/test_monotonic_max.cxx index f7cc498..1404389 100644 --- a/tests/test_monotonic_max.cxx +++ b/tests/test_monotonic_max.cxx @@ -61,3 +61,40 @@ TEST(MonotonicMaxTest, CrtpInterfaceMatchesMax) { EXPECT_EQ(base.current_size(), 3U); EXPECT_DOUBLE_EQ(base.get_value(), mm.get_max()); } + +TEST(MonotonicMaxTest, InitialStateIsNaN) { + MonotonicMax mm(3); + EXPECT_TRUE(std::isnan(mm.get_max())); + EXPECT_EQ(mm.current_size(), 0); +} + +TEST(MonotonicMaxTest, WindowSize1Identity) { + MonotonicMax mm(1); + mm.update(5.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 5.0); + mm.update(3.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 3.0); + mm.update(8.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 8.0); + mm.update(1.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 1.0); +} + +TEST(MonotonicMaxTest, NanDoesNotContributeToWindow) { + MonotonicMax mm(2); + mm.update(1.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 1.0); + mm.update(2.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 2.0); + mm.skip(); + EXPECT_DOUBLE_EQ(mm.get_max(), 2.0); + mm.update(1.0); + EXPECT_DOUBLE_EQ(mm.get_max(), 1.0); +} + +TEST(MonotonicMaxTest, NanAtStartReturnsNan) { + MonotonicMax mm(3); + mm.skip(); + EXPECT_TRUE(std::isnan(mm.get_max())); + EXPECT_EQ(mm.current_size(), 0); +} \ No newline at end of file diff --git a/tests/test_monotonic_min.cxx b/tests/test_monotonic_min.cxx index 929213d..a514546 100644 --- a/tests/test_monotonic_min.cxx +++ b/tests/test_monotonic_min.cxx @@ -16,7 +16,7 @@ TEST(MonotonicMinTest, BasicFunctionality) { EXPECT_DOUBLE_EQ(mm.get_min(), 5.0); // [10, 20, 5] } -TEST(MonotonicMinTest, MaxExitsWindow) { +TEST(MonotonicMinTest, MinExitsWindow) { MonotonicMin mm(3); mm.update(50.0); mm.update(10.0); @@ -48,7 +48,7 @@ TEST(MonotonicMinTest, Duplicates) { EXPECT_DOUBLE_EQ(mm.get_min(), 10.0); } -TEST(MonotonicMinTest, CrtpInterfaceMatchesMax) { +TEST(MonotonicMinTest, CrtpInterfaceMatchesMin) { MonotonicMin mm(3); RollingMetric &base = mm; @@ -61,3 +61,42 @@ TEST(MonotonicMinTest, CrtpInterfaceMatchesMax) { EXPECT_EQ(base.current_size(), 3U); EXPECT_DOUBLE_EQ(base.get_value(), mm.get_min()); } + +TEST(MonotonicMinTest, InitialStateIsNaN) { + MonotonicMin mm(3); + EXPECT_TRUE(std::isnan(mm.get_min())); + EXPECT_EQ(mm.current_size(), 0U); +} + +TEST(MonotonicMinTest, WindowSize1Identity) { + MonotonicMin mm(1); + mm.update(5.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 5.0); + mm.update(3.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 3.0); + mm.update(8.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 8.0); + mm.update(1.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 1.0); +} + +TEST(MonotonicMinTest, NanDoesNotContributeToWindow) { + MonotonicMin mm(2); + mm.update(5.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 5.0); + mm.update(1.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 1.0); + mm.skip(); + EXPECT_DOUBLE_EQ(mm.get_min(), 1.0); + EXPECT_EQ(mm.current_size(), 1U); + mm.update(9.0); + EXPECT_DOUBLE_EQ(mm.get_min(), 9.0); + EXPECT_EQ(mm.current_size(), 1U); +} + +TEST(MonotonicMinTest, NanAtStartReturnsNan) { + MonotonicMin mm(3); + mm.skip(); + EXPECT_TRUE(std::isnan(mm.get_min())); + EXPECT_EQ(mm.current_size(), 0U); +} diff --git a/tests/test_multiset_median.cxx b/tests/test_multiset_median.cxx index ad01785..6fee4fb 100644 --- a/tests/test_multiset_median.cxx +++ b/tests/test_multiset_median.cxx @@ -1,5 +1,6 @@ #include "MultisetMedian.hpp" +#include #include TEST(MultisetMedianTest, HandlesInitialization) { @@ -77,4 +78,36 @@ TEST(MultisetMedianTest, HandlesWindowSize2Sliding) { EXPECT_DOUBLE_EQ(sm.get_median(), 1.5); sm.update(3.0); EXPECT_DOUBLE_EQ(sm.get_median(), 2.5); +} + +TEST(MultisetMedianTest, WindowSize1Identity) { + MultisetMedian sm(1); + sm.update(5.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 5.0); + sm.update(3.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 3.0); + sm.update(8.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 8.0); +} + +TEST(MultisetMedianTest, NanDoesNotContributeToWindow) { + // window=3, sequence [1, 2, skip, 4]: + // after skip: window=[1,2,NaN] -> valid=[1,2], median=1.5 + // after update(4): window=[2,NaN,4] -> valid=[2,4], median=3.0 + MultisetMedian sm(3); + sm.update(1.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 1.0); + EXPECT_EQ(sm.current_size(), 1U); + + sm.update(2.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 1.5); + EXPECT_EQ(sm.current_size(), 2U); + + sm.skip(); + EXPECT_DOUBLE_EQ(sm.get_median(), 1.5); + EXPECT_EQ(sm.current_size(), 2U); + + sm.update(4.0); + EXPECT_DOUBLE_EQ(sm.get_median(), 3.0); + EXPECT_EQ(sm.current_size(), 2U); } \ No newline at end of file diff --git a/tests/test_sliding_covariance.cxx b/tests/test_sliding_covariance.cxx index f4821f5..fef3ff3 100644 --- a/tests/test_sliding_covariance.cxx +++ b/tests/test_sliding_covariance.cxx @@ -51,3 +51,24 @@ TEST(SlidingCovarianceTest, NanPairSkipped) { EXPECT_NEAR(sc.get_covariance(), 1.0, 1e-12); EXPECT_NEAR(sc.get_correlation(), 1.0, 1e-12); } + +TEST(SlidingCovarianceTest, SinglePairCovarianceIsNaN) { + SlidingCovariance sc(3); + sc.update(1.0, 2.0); + EXPECT_TRUE(std::isnan(sc.get_covariance())); + EXPECT_TRUE(std::isnan(sc.get_correlation())); + EXPECT_EQ(sc.current_size(), 1); +} + +TEST(SlidingCovarianceTest, ConstantXCorrelationIsNaN) { + // x is constant -> std_dev(x) = 0 -> correlation = cov / (0 * sd_y) = NaN + // covariance itself is 0 (x never deviates from its mean) + SlidingCovariance sc(4); + sc.update(5.0, 1.0); + sc.update(5.0, 2.0); + sc.update(5.0, 3.0); + sc.update(5.0, 4.0); + EXPECT_TRUE(std::isnan(sc.get_correlation())); + EXPECT_NEAR(sc.get_covariance(), 0.0, 1e-12); + EXPECT_NEAR(sc.get_mean_x(), 5.0, 1e-12); +} \ No newline at end of file diff --git a/tests/test_sliding_moments.cxx b/tests/test_sliding_moments.cxx index abdec0a..8bdcd12 100644 --- a/tests/test_sliding_moments.cxx +++ b/tests/test_sliding_moments.cxx @@ -64,4 +64,28 @@ TEST(SlidingMomentsTest, NanDoesNotCorruptMathematicalState) { sm.update(20.0); sm.update(30.0); EXPECT_NEAR(sm.get_skewness(), skew_before, 1e-10); +} + +TEST(SlidingMomentsTest, SmallWindowSkewnessAlwaysNaN) { + // window=2 -> at most 2 values -> skewness (needs >=3) always NaN + SlidingMoments sm(2); + sm.update(1.0); + sm.update(2.0); + EXPECT_EQ(sm.current_size(), 2U); + EXPECT_TRUE(std::isnan(sm.get_skewness())); + EXPECT_TRUE(std::isnan(sm.get_kurtosis())); + + sm.update(3.0); // window slides to [2, 3], size still 2 + EXPECT_EQ(sm.current_size(), 2U); + EXPECT_TRUE(std::isnan(sm.get_skewness())); + EXPECT_TRUE(std::isnan(sm.get_kurtosis())); +} + +TEST(SlidingMomentsTest, WindowSize1BothMomentsAlwaysNaN) { + // window=1 -> size always <=1 -> both moments always NaN + SlidingMoments sm(1); + sm.update(5.0); + EXPECT_EQ(sm.current_size(), 1U); + EXPECT_TRUE(std::isnan(sm.get_skewness())); + EXPECT_TRUE(std::isnan(sm.get_kurtosis())); } \ No newline at end of file diff --git a/tests/test_sliding_welford_ring.cxx b/tests/test_sliding_welford_ring.cxx index c16fc23..f02c552 100644 --- a/tests/test_sliding_welford_ring.cxx +++ b/tests/test_sliding_welford_ring.cxx @@ -61,3 +61,29 @@ TEST(SlidingWelfordRingTest, CrtpInterfaceMatchesVariance) { EXPECT_EQ(base.current_size(), 2U); EXPECT_DOUBLE_EQ(base.get_value(), sw.get_variance()); } + +TEST(SlidingWelfordRingTest, NanDoesNotContributeToWindow) { + // window=3, sequence [10, 20, skip, 30]: + // after skip: valid=[10,20], mean=15, var=50, size=2 + // after update(30): 10 expires -> valid=[20,30], mean=25, var=50, size=2 + SlidingWelfordRing sw(3); + + sw.update(10.0); + EXPECT_DOUBLE_EQ(sw.get_mean(), 10.0); + EXPECT_EQ(sw.current_size(), 1U); + + sw.update(20.0); + EXPECT_DOUBLE_EQ(sw.get_mean(), 15.0); + EXPECT_DOUBLE_EQ(sw.get_variance(), 50.0); + EXPECT_EQ(sw.current_size(), 2U); + + sw.skip(); + EXPECT_DOUBLE_EQ(sw.get_mean(), 15.0); + EXPECT_DOUBLE_EQ(sw.get_variance(), 50.0); + EXPECT_EQ(sw.current_size(), 2U); + + sw.update(30.0); + EXPECT_DOUBLE_EQ(sw.get_mean(), 25.0); + EXPECT_DOUBLE_EQ(sw.get_variance(), 50.0); + EXPECT_EQ(sw.current_size(), 2U); +}