Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 36 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -281,47 +281,57 @@ Benchmarked on Apple M-series (ARM), window = 100, n = 1 000 000.

### Python vs pandas

Best robustrolling configuration vs pandas (¹ `assume_finite=True`, ² `method="fast"`).

| Function | robustrolling | pandas | speedup |
| ------------------------ | ------------- | --------- | --------- |
| `rolling_mean` ¹ | 0.78 ms | 4.58 ms | **5.9x** |
| `rolling_max` | 11.5 ms | 12.3 ms | 1.1x |
| `rolling_min` | 11.5 ms | 12.7 ms | 1.1x |
| `rolling_median` | 111 ms | 233 ms | **2.1x** |
| `rolling_variance` ² | 4.4 ms | 10.6 ms | **2.4x** |
| `rolling_skewness` ² | 10.9 ms | 10.1 ms | ~1.0x |
| `rolling_kurtosis` ² | 8.4 ms | 10.0 ms | 1.2x |
| `rolling_cov` | 16.8 ms | 19.3 ms | 1.2x |
| `rolling_cor` | 16.8 ms | 39.6 ms | **2.4x** |
| Function | robustrolling | pandas | speedup |
| -------------------- | ------------- | -------- | -------- |
| `rolling_mean` | 3.1 ms | 4.4 ms | **1.4x** |
| `rolling_max` | 11.1 ms | 11.7 ms | 1.1x |
| `rolling_min` | 11.2 ms | 12.2 ms | 1.1x |
| `rolling_median` | 106 ms | 233 ms | **2.2x** |
| `rolling_variance` | 15.2 ms | 9.6 ms | 0.6x |
| `rolling_skewness` | 14.0 ms | 9.1 ms | 0.6x |
| `rolling_kurtosis` | 14.3 ms | 9.2 ms | 0.6x |
| `rolling_cov` | 14.8 ms | 18.2 ms | **1.2x** |
| `rolling_cor` | 14.6 ms | 36.7 ms | **2.5x** |

### Python vs Polars

| Function | robustrolling | Polars | speedup |
| -------------------- | ------------- | -------- | -------- |
| `rolling_mean` | 3.1 ms | 8.0 ms | **2.6x** |
| `rolling_max` | 11.1 ms | 11.4 ms | 1.0x |
| `rolling_min` | 11.0 ms | 11.6 ms | 1.1x |
| `rolling_median` | 106 ms | 40.8 ms | 0.4x |
| `rolling_variance` | 15.7 ms | 16.2 ms | 1.0x |
| `rolling_skewness` | 13.9 ms | 16.0 ms | **1.2x** |
| `rolling_kurtosis` | 14.3 ms | 15.6 ms | 1.1x |

### Python — stable vs fast

| Function | stable | fast | speedup |
| ---------------------- | -------- | -------- | -------- |
| `mean` (assume_finite) | 3.5 ms | 0.78 ms | **4.4x** |
| `variance` | 16.1 ms | 4.4 ms | **3.7x** |
| `skewness` | 23.9 ms | 10.9 ms | **2.2x** |
| `kurtosis` | 21.7 ms | 8.4 ms | **2.6x** |
| `mean` (assume_finite) | 3.2 ms | 0.73 ms | **4.4x** |
| `variance` | 15.2 ms | 3.9 ms | **3.9x** |
| `skewness` | 13.9 ms | 10.0 ms | **1.4x** |
| `kurtosis` | 14.4 ms | 7.6 ms | **1.9x** |

### R vs slider vs RcppRoll

| Function | robustrolling | slider | RcppRoll | vs slider | vs RcppRoll |
| -------------------- | ------------- | ---------- | --------- | ---------- | ----------- |
| `rolling_max` | 15.9 ms | 349 ms | 181 ms | **22x** | **11x** |
| `rolling_min` | 15.2 ms | 353 ms | 181 ms | **23x** | **12x** |
| `rolling_mean` | 3.2 ms | 1 558 ms | 39.0 ms | **495x** | **12x** |
| `rolling_variance` | 16.9 ms | 2 578 ms | 320 ms | **152x** | **19x** |
| `rolling_median` | 114 ms | 10 254 ms | 2 014 ms | **90x** | **18x** |
| `rolling_max` | 15.1 ms | 338 ms | 175 ms | **22x** | **12x** |
| `rolling_min` | 14.9 ms | 350 ms | 175 ms | **24x** | **12x** |
| `rolling_mean` | 3.1 ms | 1 523 ms | 37.4 ms | **487x** | **12x** |
| `rolling_variance` | 16.0 ms | 2 477 ms | 304 ms | **154x** | **19x** |
| `rolling_median` | 112 ms | 10 084 ms | 1 938 ms | **90x** | **17x** |

### R — stable vs fast

| Function | stable | fast | speedup |
| ---------------------- | -------- | -------- | -------- |
| `mean` (assume_finite) | 3.3 ms | 0.80 ms | **4.2x** |
| `variance` | 16.8 ms | 4.4 ms | **3.9x** |
| `skewness` | 21.9 ms | 10.6 ms | **2.1x** |
| `kurtosis` | 21.6 ms | 8.3 ms | **2.6x** |
| `mean` (assume_finite) | 3.2 ms | 0.78 ms | **4.0x** |
| `variance` | 16.2 ms | 4.1 ms | **4.0x** |
| `skewness` | 14.5 ms | 10.3 ms | **1.4x** |
| `kurtosis` | 14.4 ms | 7.8 ms | **1.8x** |

---

Expand Down
146 changes: 130 additions & 16 deletions benchmarks/bench_core.cxx
Original file line number Diff line number Diff line change
@@ -1,37 +1,151 @@
#include "MonotonicMax.hpp"
#include "MonotonicMin.hpp"
#include "MultisetMedian.hpp"
#include "SlidingCovariance.hpp"
#include "SlidingMean.hpp"
#include "SlidingMoments.hpp"
#include "SlidingWelfordRing.hpp"
#include <benchmark/benchmark.h>
#include <cstddef>
#include <random>
#include <vector>

std::vector<double> generate_market_data(std::size_t size) {
std::vector<double> data(size);
static std::vector<double> make_data(std::size_t n, double nan_frac = 0.0) {
std::mt19937 gen(42);
std::normal_distribution<double> dist(100.0, 5.0);
std::uniform_real_distribution<double> coin(0.0, 1.0);
std::vector<double> v(n);
for (auto &x : v)
x = (coin(gen) < nan_frac) ? std::numeric_limits<double>::quiet_NaN()
: dist(gen);
return v;
}

const auto DATA_CLEAN = make_data(100'000);
const auto DATA_NAN = make_data(100'000, 0.15); // 15% NaN

static void BM_MonotonicMax(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
MonotonicMax engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_max());
}
}
}
BENCHMARK(BM_MonotonicMax)->Arg(10)->Arg(100)->Arg(1000);

static void BM_MonotonicMin(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
MonotonicMin engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_min());
}
}
}
BENCHMARK(BM_MonotonicMin)->Arg(10)->Arg(100)->Arg(1000);

static void BM_MultisetMedian(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
MultisetMedian engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_median());
}
}
}
BENCHMARK(BM_MultisetMedian)->Arg(10)->Arg(100)->Arg(1000);

static void BM_SlidingMean(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
SlidingMean engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_mean());
}
}
}
BENCHMARK(BM_SlidingMean)->Arg(10)->Arg(100)->Arg(1000);

static void BM_SlidingWelfordRing(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
SlidingWelfordRing engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_variance());
}
}
}
BENCHMARK(BM_SlidingWelfordRing)->Arg(10)->Arg(100)->Arg(1000);

for (std::size_t i = 0; i < size; ++i) {
data[i] = dist(gen);
static void BM_SlidingMoments(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
SlidingMoments engine(w);
for (double v : DATA_CLEAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_skewness());
benchmark::DoNotOptimize(engine.get_kurtosis());
}
}
return data;
}
BENCHMARK(BM_SlidingMoments)->Arg(10)->Arg(100)->Arg(1000);

const auto MARKET_DATA = generate_market_data(100000);
static std::pair<std::vector<double>, std::vector<double>>
make_pair_data(std::size_t n) {
std::mt19937 gen(99);
std::normal_distribution<double> dist(0.0, 1.0);
std::vector<double> x(n), y(n);
for (std::size_t i = 0; i < n; ++i) {
x[i] = dist(gen);
y[i] = dist(gen);
}
return {x, y};
}

template <class MedianAlgo>
static void BM_RollingMedian(benchmark::State &state) {
std::size_t window_size = static_cast<std::size_t>(state.range(0));
const auto [COV_X, COV_Y] = make_pair_data(100'000);

static void BM_SlidingCovariance(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
MedianAlgo engine(window_size);
SlidingCovariance engine(w);
for (std::size_t i = 0; i < COV_X.size(); ++i) {
engine.update(COV_X[i], COV_Y[i]);
benchmark::DoNotOptimize(engine.get_covariance());
}
}
}
BENCHMARK(BM_SlidingCovariance)->Arg(10)->Arg(100)->Arg(1000);

for (double price : MARKET_DATA) {
engine.update(price);
static void BM_MultisetMedian_NaN(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
MultisetMedian engine(w);
for (double v : DATA_NAN) {
if (std::isnan(v))
engine.skip();
else
engine.update(v);
benchmark::DoNotOptimize(engine.get_median());
}
}
}
BENCHMARK(BM_MultisetMedian_NaN)->Arg(100);

BENCHMARK_TEMPLATE(BM_RollingMedian, MultisetMedian)
->Arg(10)
->Arg(100)
->Arg(1000);
static void BM_SlidingMoments_NaN(benchmark::State &state) {
std::size_t w = static_cast<std::size_t>(state.range(0));
for (auto _ : state) {
SlidingMoments engine(w);
for (double v : DATA_NAN) {
engine.update(v);
benchmark::DoNotOptimize(engine.get_skewness());
}
}
}
BENCHMARK(BM_SlidingMoments_NaN)->Arg(100);
41 changes: 6 additions & 35 deletions benchmarks/bench_polars.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Benchmark: robustrolling vs Polars rolling functions + stable vs fast.
Benchmark: robustrolling vs Polars rolling functions (stable methods only).

Usage:
pip install polars
Expand All @@ -20,6 +20,7 @@

def bench(fn, reps: int = REPS) -> float:
"""Return median wall time in milliseconds over `reps` runs."""
fn() # warmup: prime caches before timing
times = []
for _ in range(reps):
t0 = time.perf_counter()
Expand All @@ -30,14 +31,11 @@ def bench(fn, reps: int = REPS) -> float:

def make_data(n: int):
x = RNG.standard_normal(n)
y = RNG.standard_normal(n)
sx = pl.Series(x)
sy = pl.Series(y)
return x, y, sx, sy
return x, pl.Series(x)


def run_vs_polars(n: int) -> list[dict]:
x, y, sx, sy = make_data(n)
x, sx = make_data(n)
w = WINDOW

cases = [
Expand All @@ -59,31 +57,11 @@ def run_vs_polars(n: int) -> list[dict]:
return results


def run_fast_vs_polars(n: int) -> list[dict]:
x, _y, sx, _sy = make_data(n)
w = WINDOW

cases = [
("rolling_mean (SIMD)", lambda: rr.rolling_mean(x, w, assume_finite=True), lambda: sx.rolling_mean(w)),
("rolling_variance (fast)", lambda: rr.rolling_variance(x, w, method="fast"), lambda: sx.rolling_var(w)),
("rolling_skewness (fast)", lambda: rr.rolling_skewness(x, w, method="fast"), lambda: sx.rolling_skew(w)),
("rolling_kurtosis (fast)", lambda: rr.rolling_kurtosis(x, w, method="fast"), lambda: sx.rolling_kurtosis(w)),
]

results = []
for name, our_fn, pl_fn in cases:
our_ms = bench(our_fn)
pl_ms = bench(pl_fn)
results.append({"name": name, "our_ms": our_ms, "pl_ms": pl_ms,
"speedup": pl_ms / our_ms})
return results


def flag(v: float) -> str:
return "x" if v >= 1.0 else " "


def print_table(n: int, rows: list[dict], label: str) -> None:
def print_table(n: int, rows: list[dict]) -> None:
print(f"\n n = {n:,} window = {WINDOW} (median of {REPS} runs)")
print(f" {'Function':<28} {'robustrolling':>14} {'polars':>10} {'speedup':>9}")
print(" " + "-" * 65)
Expand All @@ -98,15 +76,8 @@ def print_table(n: int, rows: list[dict], label: str) -> None:
if __name__ == "__main__":
print(f"robustrolling vs Polars {pl.__version__} — rolling window benchmark")
print("=" * 65)

print("\n--- stable (default) methods vs Polars ---")
for n in SIZES:
rows = run_vs_polars(n)
print_table(n, rows, "stable")

print("\n\n--- fast methods vs Polars ---")
for n in SIZES:
rows = run_fast_vs_polars(n)
print_table(n, rows, "fast")
print_table(n, rows)

print()
1 change: 1 addition & 0 deletions benchmarks/bench_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

def bench(fn, reps: int = REPS) -> float:
"""Return median wall time in milliseconds over `reps` runs."""
fn() # warmup: prime caches before timing
times = []
for _ in range(reps):
t0 = time.perf_counter()
Expand Down
Loading
Loading