diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..c6d06b4 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,50 @@ +name: Build and deploy docs + +on: + push: + branches: [main] + paths: + - "docs/**" + - "py_package/**" + - "R/**" + - "man/**" + +permissions: + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Sphinx dependencies + run: pip install -r docs/requirements.txt + + - name: Generate R API RST files + run: python docs/rd_to_rst.py + + - name: Build HTML docs + run: sphinx-build docs/python docs/_build -W --keep-going + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/_build + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index fbf4c73..2b72f82 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,7 @@ inst/include/ py_package/dist/ py_package/venv/ __pycache__/ -*.pyc \ No newline at end of file +*.pyc + +# Sphinx build output +docs/_build/ \ No newline at end of file diff --git a/README.md b/README.md index 36f43aa..91f8e37 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # robustrolling -[![R package check](https://github.com/Ptak07/rolling_window/actions/workflows/r_check.yml/badge.svg)](https://github.com/Ptak07/rolling_window/actions/workflows/r_check.yml) -[![C++ tests](https://github.com/Ptak07/rolling_window/actions/workflows/cpp_test.yml/badge.svg)](https://github.com/Ptak07/rolling_window/actions/workflows/cpp_test.yml) -[![Python package](https://github.com/Ptak07/rolling_window/actions/workflows/python.yml/badge.svg)](https://github.com/Ptak07/rolling_window/actions/workflows/python.yml) +[![R package check](https://github.com/IgorPtak/rolling_window/actions/workflows/r_check.yml/badge.svg)](https://github.com/IgorPtak/rolling_window/actions/workflows/r_check.yml) +[![C++ tests](https://github.com/IgorPtak/rolling_window/actions/workflows/cpp_test.yml/badge.svg)](https://github.com/IgorPtak/rolling_window/actions/workflows/cpp_test.yml) +[![Python package](https://github.com/IgorPtak/rolling_window/actions/workflows/python.yml/badge.svg)](https://github.com/IgorPtak/rolling_window/actions/workflows/python.yml) High-performance rolling window metrics for R and Python, implemented in C++17. @@ -41,13 +41,13 @@ Six production-grade algorithms covering the most common rolling statistics: ### R ```r -remotes::install_github("Ptak07/rolling_window") +remotes::install_github("IgorPtak/rolling_window") ``` Or build from source: ```bash -git clone https://github.com/Ptak07/rolling_window.git +git clone https://github.com/IgorPtak/rolling_window.git cd rolling_window make r-build ``` @@ -57,7 +57,7 @@ Requires: R ≥ 4.0, a C++17 compiler. ### Python ```bash -git clone https://github.com/Ptak07/rolling_window.git +git clone https://github.com/IgorPtak/rolling_window.git cd rolling_window pip install py_package/ ``` diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 0000000..0cef456 --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,27 @@ +/* robustrolling — custom overrides on top of furo */ + +:root { + --color-brand-primary: #2E86AB; + --color-brand-content: #2E86AB; + --color-highlight-on-target: #e8f4f8; +} + +/* Tighten the algorithm table on landing page */ +.algorithm-table td, .algorithm-table th { + padding: 0.4rem 0.6rem; + font-size: 0.9rem; +} + +/* Monospace for inline code in parameter tables */ +dl.field-list > dd code { + font-size: 0.85em; +} + +/* Hero paragraph */ +.hero { + font-size: 1.1rem; + color: var(--color-foreground-secondary); + margin-bottom: 1.5rem; + border-left: 4px solid var(--color-brand-primary); + padding-left: 1rem; +} diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg new file mode 100644 index 0000000..9eb0c20 --- /dev/null +++ b/docs/_static/logo.svg @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + diff --git a/docs/python/api.rst b/docs/python/api.rst new file mode 100644 index 0000000..4388496 --- /dev/null +++ b/docs/python/api.rst @@ -0,0 +1,53 @@ +Python API Reference +==================== + +All functions accept ``np.ndarray`` and ``pd.Series`` and return the same +type. The ``min_periods`` parameter follows pandas semantics: positions with +fewer valid observations than ``min_periods`` are set to ``nan``. + +High-level functions +-------------------- + +.. automodule:: robustrolling + :members: rolling_max, rolling_min, rolling_median, rolling_variance, + rolling_mean, rolling_skewness, rolling_kurtosis, + rolling_cov, rolling_cor + :no-undoc-members: + +Low-level classes +----------------- + +Six C++ classes are exposed directly for streaming (one observation at a time) +or for computing multiple statistics in a single pass without calling several +high-level functions. + +.. list-table:: + :header-rows: 1 + :widths: 25 35 40 + + * - Class + - Algorithm + - Key methods + * - :py:class:`~robustrolling.MonotonicMax` + - Monotonic deque + - ``update``, ``get_max``, ``process_batch`` + * - :py:class:`~robustrolling.MonotonicMin` + - Monotonic deque + - ``update``, ``get_min``, ``process_batch`` + * - :py:class:`~robustrolling.MultisetMedian` + - ``std::multiset`` + tracked iterator + - ``update``, ``get_median``, ``process_batch`` + * - :py:class:`~robustrolling.SlidingWelford` + - Welford + ring buffer + - ``update``, ``get_variance``, ``process_batch`` + * - :py:class:`~robustrolling.SlidingMoments` + - Terriberry 4th-moment + - ``update``, ``get_mean``, ``get_skewness``, ``get_kurtosis`` + * - :py:class:`~robustrolling.SlidingCovariance` + - 2-D Welford + - ``update``, ``get_covariance``, ``get_correlation`` + +.. toctree:: + :hidden: + + low_level diff --git a/docs/python/conf.py b/docs/python/conf.py new file mode 100644 index 0000000..e68ce02 --- /dev/null +++ b/docs/python/conf.py @@ -0,0 +1,56 @@ +import sys +import os + +sys.path.insert(0, os.path.abspath("../../py_package")) + +project = "robustrolling" +author = "Igor Ptak" +release = "0.1.0" +copyright = "2026, Igor Ptak" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://numpy.org/doc/stable/", None), +} + +autodoc_mock_imports = ["robust_rolling_core"] + +napoleon_numpy_docstring = True +napoleon_google_docstring = False +napoleon_use_param = False +napoleon_use_rtype = False + +autodoc_member_order = "bysource" +autodoc_default_options = { + "members": True, + "undoc-members": False, + "show-inheritance": False, +} + +html_theme = "furo" +html_title = "robustrolling" +html_logo = "../_static/logo.svg" +html_favicon = "../_static/logo.svg" +html_static_path = ["../_static"] +html_css_files = ["custom.css"] + +html_theme_options = { + "light_css_variables": { + "color-brand-primary": "#2E86AB", + "color-brand-content": "#2E86AB", + "color-highlight-on-target": "#e8f4f8", + }, + "dark_css_variables": { + "color-brand-primary": "#5aafe0", + "color-brand-content": "#5aafe0", + }, + "sidebar_hide_name": False, + "navigation_with_keys": True, +} diff --git a/docs/python/index.rst b/docs/python/index.rst new file mode 100644 index 0000000..f980c66 --- /dev/null +++ b/docs/python/index.rst @@ -0,0 +1,81 @@ +robustrolling +============= + +.. raw:: html + +

+ High-performance rolling-window statistics for R and Python — six + algorithms implemented in C++17, exposed through idiomatic bindings in + both languages, with O(1) or O(log n) updates per element. +

+ +.. toctree:: + :maxdepth: 2 + :caption: API Reference + + api + r_api/index + +Algorithm overview +------------------ + +.. list-table:: + :header-rows: 1 + :widths: 26 22 12 20 20 + + * - C++ class + - Algorithm + - Complexity + - R function(s) + - Python function(s) + * - ``SlidingWelfordRing`` + - Welford online variance (ring buffer) + - O(1) + - ``rolling_variance`` + - ``rolling_variance``, ``SlidingWelford`` + * - ``MonotonicMax`` + - Monotonic deque maximum + - O(1) amortised + - ``rolling_max`` + - ``rolling_max``, ``MonotonicMax`` + * - ``MonotonicMin`` + - Monotonic deque minimum + - O(1) amortised + - ``rolling_min`` + - ``rolling_min``, ``MonotonicMin`` + * - ``MultisetMedian`` + - ``std::multiset`` tracked-iterator median + - O(log n) + - ``rolling_median`` + - ``rolling_median``, ``MultisetMedian`` + * - ``SlidingMoments`` + - Terriberry 4th-moment online algorithm + - O(1) + - ``rolling_mean``, ``rolling_skewness``, ``rolling_kurtosis`` + - ``rolling_mean``, ``rolling_skewness``, ``rolling_kurtosis``, ``SlidingMoments`` + * - ``SlidingCovariance`` + - 2-D Welford online covariance + - O(1) + - ``rolling_cov``, ``rolling_cor`` + - ``rolling_cov``, ``rolling_cor``, ``SlidingCovariance`` + +Install for R +------------- + +.. code-block:: r + + # Requires a C++17 compiler (GCC ≥ 7, Clang ≥ 5, MSVC ≥ 2017) + install.packages("remotes") + remotes::install_github("IgorPtak/rolling_window") + +Install for Python +------------------ + +.. code-block:: bash + + # Requires Python ≥ 3.8 and a C++17 compiler + pip install git+https://github.com/IgorPtak/rolling_window.git#subdirectory=py_package + + # Or clone and install locally: + git clone https://github.com/IgorPtak/rolling_window.git + pip install rolling_window/py_package/ diff --git a/docs/python/low_level.rst b/docs/python/low_level.rst new file mode 100644 index 0000000..0cffd97 --- /dev/null +++ b/docs/python/low_level.rst @@ -0,0 +1,123 @@ +Low-level Classes +================= + +.. py:currentmodule:: robustrolling + +C++17 classes exposed via pybind11. Use them for streaming (one value at a +time) or to read multiple statistics from a single pass. + +---- + +.. py:class:: MonotonicMax(window_size) + + Rolling maximum — monotonic deque, O(1) amortised. + + :param window_size: int + + .. py:method:: update(value: float) + .. py:method:: get_max() -> float + .. py:method:: process_batch(x: numpy.ndarray) -> numpy.ndarray + + .. code-block:: python + + mm = MonotonicMax(3) + mm.update(1.0); mm.update(3.0); mm.update(2.0) + mm.get_max() # 3.0 + +---- + +.. py:class:: MonotonicMin(window_size) + + Rolling minimum — monotonic deque, O(1) amortised. + + :param window_size: int + + .. py:method:: update(value: float) + .. py:method:: get_min() -> float + .. py:method:: process_batch(x: numpy.ndarray) -> numpy.ndarray + +---- + +.. py:class:: MultisetMedian(window_size) + + Rolling median — ``std::multiset`` with tracked iterator, O(log n). + Even-size windows return the average of the two middle elements. + + :param window_size: int + + .. py:method:: update(value: float) + .. py:method:: get_median() -> float + .. py:method:: process_batch(x: numpy.ndarray) -> numpy.ndarray + +---- + +.. py:class:: SlidingWelford(window_size) + + Rolling sample variance (ddof=1) — Welford algorithm with ring buffer, + O(1). + + :param window_size: int + + .. py:method:: update(value: float) + .. py:method:: get_variance() -> float + .. py:method:: process_batch(x: numpy.ndarray) -> numpy.ndarray + + .. code-block:: python + + sw = SlidingWelford(3) + for v in [1., 2., 3., 4.]: + sw.update(v) + sw.get_variance() # 1.0 + +---- + +.. py:class:: SlidingMoments(window_size) + + Rolling mean, skewness, and excess kurtosis — Terriberry's 4th-moment + algorithm, O(1). Requires ≥ 3 observations for skewness, ≥ 4 for + kurtosis. + + :param window_size: int + + .. py:method:: update(x: float) + .. py:method:: reset() + .. py:method:: current_size() -> int + .. py:method:: get_mean() -> float + .. py:method:: get_skewness() -> float + .. py:method:: get_kurtosis() -> float + .. py:method:: process_mean_batch(x: numpy.ndarray) -> numpy.ndarray + .. py:method:: process_skewness_batch(x: numpy.ndarray) -> numpy.ndarray + .. py:method:: process_kurtosis_batch(x: numpy.ndarray) -> numpy.ndarray + + .. code-block:: python + + sm = SlidingMoments(4) + for v in [1., 2., 3., 4.]: + sm.update(v) + sm.get_mean(), sm.get_skewness(), sm.get_kurtosis() + # (2.5, 0.0, -1.2) + +---- + +.. py:class:: SlidingCovariance(window_size) + + Rolling sample covariance and Pearson correlation — 2-D Welford + algorithm, O(1). + + :param window_size: int + + .. py:method:: update(x: float, y: float) + .. py:method:: get_covariance() -> float + .. py:method:: get_correlation() -> float + .. py:method:: get_mean_x() -> float + .. py:method:: get_mean_y() -> float + .. py:method:: process_covariance_batch(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray + .. py:method:: process_correlation_batch(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray + + .. code-block:: python + + sc = SlidingCovariance(3) + for x, y in [(1,2),(2,4),(3,6)]: + sc.update(x, y) + sc.get_covariance(), sc.get_correlation() + # (2.0, 1.0) diff --git a/docs/python/r_api/index.rst b/docs/python/r_api/index.rst new file mode 100644 index 0000000..0788208 --- /dev/null +++ b/docs/python/r_api/index.rst @@ -0,0 +1,20 @@ +R API Reference +=============== + +All functions accept a numeric vector ``x`` (and ``y`` for bivariate +functions), a ``window_size`` integer, and an optional ``min_periods`` +parameter compatible with *pandas* semantics. + +.. toctree:: + :maxdepth: 1 + :caption: Functions + + rolling_cor + rolling_cov + rolling_kurtosis + rolling_max + rolling_mean + rolling_median + rolling_min + rolling_skewness + rolling_variance diff --git a/docs/python/r_api/rolling_cor.rst b/docs/python/r_api/rolling_cor.rst new file mode 100644 index 0000000..4171c2a --- /dev/null +++ b/docs/python/r_api/rolling_cor.rst @@ -0,0 +1,44 @@ +Rolling Correlation +=================== + +Computes the rolling Pearson correlation between two numeric vectors. + +Usage +----- + +.. code-block:: r + + rolling_cor(x, y, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``y`` + - A numeric vector of type double, same length as ``x``. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling correlation values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + y <- as.double(c(2, 4, 6, 8, 10)) + rolling_cor(x, y, 3L) + diff --git a/docs/python/r_api/rolling_cov.rst b/docs/python/r_api/rolling_cov.rst new file mode 100644 index 0000000..183ac87 --- /dev/null +++ b/docs/python/r_api/rolling_cov.rst @@ -0,0 +1,44 @@ +Rolling Covariance +================== + +Computes the rolling sample covariance (ddof=1) between two numeric vectors. + +Usage +----- + +.. code-block:: r + + rolling_cov(x, y, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``y`` + - A numeric vector of type double, same length as ``x``. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of valid (non-``NA``) pairs required. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling covariance values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + y <- as.double(c(2, 4, 6, 8, 10)) + rolling_cov(x, y, 3L) + diff --git a/docs/python/r_api/rolling_kurtosis.rst b/docs/python/r_api/rolling_kurtosis.rst new file mode 100644 index 0000000..47adddb --- /dev/null +++ b/docs/python/r_api/rolling_kurtosis.rst @@ -0,0 +1,42 @@ +Rolling Kurtosis +================ + +Computes the rolling excess kurtosis (Fisher) over a numeric vector. +Requires at least 4 non-``NA`` observations per window. + +Usage +----- + +.. code-block:: r + + rolling_kurtosis(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling excess kurtosis values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + rolling_kurtosis(x, 4L) + diff --git a/docs/python/r_api/rolling_max.rst b/docs/python/r_api/rolling_max.rst new file mode 100644 index 0000000..29d9024 --- /dev/null +++ b/docs/python/r_api/rolling_max.rst @@ -0,0 +1,41 @@ +Rolling Maximum +=============== + +Computes the rolling maximum over a numeric vector using a monotonic deque. + +Usage +----- + +.. code-block:: r + + rolling_max(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling maximum values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_max(x, 3L) + diff --git a/docs/python/r_api/rolling_mean.rst b/docs/python/r_api/rolling_mean.rst new file mode 100644 index 0000000..eae9838 --- /dev/null +++ b/docs/python/r_api/rolling_mean.rst @@ -0,0 +1,41 @@ +Rolling Mean +============ + +Computes the rolling mean over a numeric vector. + +Usage +----- + +.. code-block:: r + + rolling_mean(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling mean values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4)) + rolling_mean(x, 3L) + diff --git a/docs/python/r_api/rolling_median.rst b/docs/python/r_api/rolling_median.rst new file mode 100644 index 0000000..e83bc42 --- /dev/null +++ b/docs/python/r_api/rolling_median.rst @@ -0,0 +1,42 @@ +Rolling Median +============== + +Computes the rolling median over a numeric vector using an ordered multiset +with a tracked median iterator. Time complexity: O(log n) per element. + +Usage +----- + +.. code-block:: r + + rolling_median(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling median values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_median(x, 3L) + diff --git a/docs/python/r_api/rolling_min.rst b/docs/python/r_api/rolling_min.rst new file mode 100644 index 0000000..7a48358 --- /dev/null +++ b/docs/python/r_api/rolling_min.rst @@ -0,0 +1,41 @@ +Rolling Minimum +=============== + +Computes the rolling minimum over a numeric vector using a monotonic deque. + +Usage +----- + +.. code-block:: r + + rolling_min(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling minimum values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 3, 2, 5, 4)) + rolling_min(x, 3L) + diff --git a/docs/python/r_api/rolling_skewness.rst b/docs/python/r_api/rolling_skewness.rst new file mode 100644 index 0000000..a2e5e3c --- /dev/null +++ b/docs/python/r_api/rolling_skewness.rst @@ -0,0 +1,42 @@ +Rolling Skewness +================ + +Computes the rolling adjusted Fisher-Pearson skewness over a numeric vector. +Requires at least 3 non-``NA`` observations per window. + +Usage +----- + +.. code-block:: r + + rolling_skewness(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size``. + +Returns +------- + +A numeric vector with rolling skewness values. + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4, 5)) + rolling_skewness(x, 3L) + diff --git a/docs/python/r_api/rolling_variance.rst b/docs/python/r_api/rolling_variance.rst new file mode 100644 index 0000000..71fefdc --- /dev/null +++ b/docs/python/r_api/rolling_variance.rst @@ -0,0 +1,44 @@ +Rolling Sample Variance +======================= + +Computes the rolling sample variance over a numeric vector. + +Usage +----- + +.. code-block:: r + + rolling_variance(x, window_size, min_periods = window_size) + +Parameters +---------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Parameter + - Description + * - ``x`` + - A numeric vector of type double. + * - ``window_size`` + - Positive integer window length. + * - ``min_periods`` + - Minimum number of non-``NA`` observations required in a window to return a result. Defaults to ``window_size`` (pandas semantics). Positions with fewer non-``NA`` values yield ``NA``. + +Returns +------- + +A numeric vector with rolling sample variance values. Entries are +``NA`` when fewer than ``min_periods`` non-``NA`` observations are +present in the window, and ``NaN`` when variance is undefined (fewer +than two values). + +Examples +-------- + +.. code-block:: r + + x <- as.double(c(1, 2, 3, 4)) + rolling_variance(x, 3L) + diff --git a/docs/rd_to_rst.py b/docs/rd_to_rst.py new file mode 100644 index 0000000..c4a0f86 --- /dev/null +++ b/docs/rd_to_rst.py @@ -0,0 +1,178 @@ +"""Convert man/*.Rd files to docs/python/r_api/*.rst for Sphinx.""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +ROOT = Path(__file__).parent.parent +MAN_DIR = ROOT / "man" +OUT_DIR = Path(__file__).parent / "python" / "r_api" + + +def _strip_rd(text: str) -> str: + """Remove simple Rd markup: \\code{x} → ``x``, \\code{NA} → ``NA``, etc.""" + text = re.sub(r"\\code\{([^}]*)\}", r"``\1``", text) + text = re.sub(r"\\pkg\{([^}]*)\}", r"**\1**", text) + text = re.sub(r"\\emph\{([^}]*)\}", r"*\1*", text) + text = re.sub(r"\\strong\{([^}]*)\}", r"**\1**", text) + text = re.sub(r"\\link\{([^}]*)\}", r"\1", text) + text = re.sub(r"\\href\{[^}]*\}\{([^}]*)\}", r"\1", text) + return text.strip() + + +def _extract(content: str, tag: str) -> str: + """Extract the body of a \\tag{...} block (handles nested braces).""" + pattern = f"\\{tag}{{" + start = content.find(pattern) + if start == -1: + return "" + idx = start + len(pattern) + depth = 1 + chars: list[str] = [] + while idx < len(content) and depth > 0: + ch = content[idx] + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + break + if depth > 0: + chars.append(ch) + idx += 1 + return "".join(chars).strip() + + +def _extract_items(content: str, tag: str) -> list[tuple[str, str]]: + """Extract all \\item{name}{desc} pairs inside a \\tag{} block.""" + block = _extract(content, tag) + items: list[tuple[str, str]] = [] + pos = 0 + while True: + m = re.search(r"\\item\s*\{", block[pos:]) + if not m: + break + abs_start = pos + m.start() + len(m.group()) + # extract first brace group (name) + depth = 1 + name_chars: list[str] = [] + i = abs_start + while i < len(block) and depth > 0: + ch = block[i] + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + break + if depth > 0: + name_chars.append(ch) + i += 1 + name = "".join(name_chars).strip() + i += 1 # skip closing } + # extract second brace group (description) + while i < len(block) and block[i] in (" ", "\n", "\r", "\t"): + i += 1 + desc_chars: list[str] = [] + if i < len(block) and block[i] == "{": + i += 1 + depth = 1 + while i < len(block) and depth > 0: + ch = block[i] + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + break + if depth > 0: + desc_chars.append(ch) + i += 1 + desc = "".join(desc_chars).strip() + items.append((name, desc)) + pos = i + 1 + return items + + +def rd_to_rst(rd_path: Path) -> str: + content = rd_path.read_text(encoding="utf-8") + + title = _strip_rd(_extract(content, "title")) + description = _strip_rd(_extract(content, "description")) + usage = _extract(content, "usage").strip() + value = _strip_rd(_extract(content, "value")) + examples_raw = _extract(content, "examples").strip() + args = _extract_items(content, "arguments") + + rst = f"{title}\n{'=' * len(title)}\n\n" + + rst += f"{description}\n\n" + + if usage: + rst += "Usage\n-----\n\n" + rst += f".. code-block:: r\n\n" + for line in usage.splitlines(): + rst += f" {line}\n" + rst += "\n" + + if args: + rst += "Parameters\n----------\n\n" + rst += ".. list-table::\n" + rst += " :header-rows: 1\n" + rst += " :widths: 20 80\n\n" + rst += " * - Parameter\n" + rst += " - Description\n" + for name, desc in args: + cleaned = _strip_rd(desc).replace("\n", " ") + rst += f" * - ``{name}``\n" + rst += f" - {cleaned}\n" + rst += "\n" + + if value: + rst += "Returns\n-------\n\n" + rst += f"{value}\n\n" + + if examples_raw: + lines = [l for l in examples_raw.splitlines() if not l.strip().startswith("%")] + example_code = "\n".join(lines).strip() + if example_code: + rst += "Examples\n--------\n\n" + rst += ".. code-block:: r\n\n" + for line in example_code.splitlines(): + rst += f" {line}\n" + rst += "\n" + + return rst + + +def main() -> None: + OUT_DIR.mkdir(parents=True, exist_ok=True) + rd_files = sorted(MAN_DIR.glob("*.Rd")) + # skip package-level Rd + rd_files = [f for f in rd_files if not f.stem.endswith("-package")] + + names: list[str] = [] + for rd in rd_files: + rst_content = rd_to_rst(rd) + out_path = OUT_DIR / f"{rd.stem}.rst" + out_path.write_text(rst_content, encoding="utf-8") + names.append(rd.stem) + print(f" {rd.name} → {out_path.relative_to(ROOT)}") + + index = "R API Reference\n===============\n\n" + index += "All functions accept a numeric vector ``x`` (and ``y`` for bivariate\n" + index += "functions), a ``window_size`` integer, and an optional ``min_periods``\n" + index += "parameter compatible with *pandas* semantics.\n\n" + index += ".. toctree::\n" + index += " :maxdepth: 1\n" + index += " :caption: Functions\n\n" + for name in sorted(names): + index += f" {name}\n" + (OUT_DIR / "index.rst").write_text(index, encoding="utf-8") + print(f" → r_api/index.rst ({len(names)} entries)") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..9b0a929 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx>=7.0 +furo>=2024.1.29 diff --git a/py_package/robustrolling/__init__.py b/py_package/robustrolling/__init__.py index fd23c55..42d9e8c 100644 --- a/py_package/robustrolling/__init__.py +++ b/py_package/robustrolling/__init__.py @@ -78,6 +78,33 @@ def _resolve_min_periods(min_periods: int | None, window_size: int) -> int: def rolling_max(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling maximum over a sliding window. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling maximum values. Positions with fewer than ``min_periods`` + valid observations are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + >>> rr.rolling_max(x, 3) + array([nan, nan, 3., 5., 5.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = MonotonicMax(window_size).process_batch(arr) @@ -86,6 +113,33 @@ def rolling_max(x, window_size: int, min_periods: int | None = None): def rolling_min(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling minimum over a sliding window. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling minimum values. Positions with fewer than ``min_periods`` + valid observations are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + >>> rr.rolling_min(x, 3) + array([nan, nan, 1., 2., 2.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = MonotonicMin(window_size).process_batch(arr) @@ -94,6 +148,36 @@ def rolling_min(x, window_size: int, min_periods: int | None = None): def rolling_variance(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling sample variance (ddof=1) over a sliding window. + + Uses the Welford online algorithm with a ring buffer for O(1) updates. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling sample variance. Returns ``nan`` when fewer than + ``min_periods`` valid observations are present, or when fewer than + two observations are available (variance undefined). + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> rr.rolling_variance(x, 3) + array([nan, nan, 1., 1., 1.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = SlidingWelford(window_size).process_batch(arr) @@ -102,6 +186,36 @@ def rolling_variance(x, window_size: int, min_periods: int | None = None): def rolling_median(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling median over a sliding window. + + Uses a ``std::multiset`` with a tracked median iterator. + Time complexity: O(log n) per element. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling median values. Positions with fewer than ``min_periods`` + valid observations are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 3.0, 2.0, 5.0, 4.0]) + >>> rr.rolling_median(x, 3) + array([nan, nan, 2., 3., 4.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = MultisetMedian(window_size).process_batch(arr) @@ -110,6 +224,33 @@ def rolling_median(x, window_size: int, min_periods: int | None = None): def rolling_mean(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling arithmetic mean over a sliding window. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling mean values. Positions with fewer than ``min_periods`` + valid observations are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> rr.rolling_mean(x, 3) + array([nan, nan, 2., 3., 4.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = SlidingMoments(window_size).process_mean_batch(arr) @@ -118,6 +259,37 @@ def rolling_mean(x, window_size: int, min_periods: int | None = None): def rolling_skewness(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling adjusted Fisher-Pearson skewness over a sliding window. + + Uses Terriberry's 4th-moment online algorithm for O(1) updates. + Requires at least 3 valid observations per window. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling skewness values. Returns ``nan`` when fewer than + ``min_periods`` valid observations are present, or when fewer than + three observations are available. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> rr.rolling_skewness(x, 3) + array([nan, nan, 0., 0., 0.]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = SlidingMoments(window_size).process_skewness_batch(arr) @@ -126,6 +298,38 @@ def rolling_skewness(x, window_size: int, min_periods: int | None = None): def rolling_kurtosis(x, window_size: int, min_periods: int | None = None): + """ + Compute the rolling excess kurtosis (Fisher definition) over a sliding window. + + Uses Terriberry's 4th-moment online algorithm for O(1) updates. + Returns excess kurtosis (normal distribution = 0). + Requires at least 4 valid observations per window. + + Parameters + ---------- + x : array-like + Input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of non-NaN observations required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling excess kurtosis values. Returns ``nan`` when fewer than + ``min_periods`` valid observations are present, or when fewer than + four observations are available. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> rr.rolling_kurtosis(x, 4) + array([nan, nan, nan, -1.2, -1.2]) + """ arr = _to_float64(x) mp = _resolve_min_periods(min_periods, window_size) result = SlidingMoments(window_size).process_kurtosis_batch(arr) @@ -155,6 +359,38 @@ def _apply_min_periods_pair(result: np.ndarray, x: np.ndarray, y: np.ndarray, def rolling_cov(x, y, window_size: int, min_periods: int | None = None): + """ + Compute the rolling sample covariance (ddof=1) over a sliding window. + + Uses the 2D Welford online algorithm for O(1) updates. + + Parameters + ---------- + x : array-like + First input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + y : array-like + Second input sequence, same length as ``x``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of valid (non-NaN) pairs required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling sample covariance values. Positions with fewer than + ``min_periods`` valid pairs are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> y = np.array([2.0, 4.0, 6.0, 8.0, 10.0]) + >>> rr.rolling_cov(x, y, 3) + array([nan, nan, 2., 2., 2.]) + """ ax = _to_float64(x) ay = _to_float64(y) mp = _resolve_min_periods(min_periods, window_size) @@ -164,6 +400,38 @@ def rolling_cov(x, y, window_size: int, min_periods: int | None = None): def rolling_cor(x, y, window_size: int, min_periods: int | None = None): + """ + Compute the rolling Pearson correlation coefficient over a sliding window. + + Uses the 2D Welford online algorithm for O(1) updates. + + Parameters + ---------- + x : array-like + First input sequence. Accepts ``np.ndarray`` and ``pd.Series``. + y : array-like + Second input sequence, same length as ``x``. + window_size : int + Number of observations in the sliding window. + min_periods : int, optional + Minimum number of valid (non-NaN) pairs required to return a result. + Defaults to ``window_size`` (pandas-compatible semantics). + + Returns + ------- + numpy.ndarray or pandas.Series + Rolling Pearson correlation values in [-1, 1]. Positions with fewer + than ``min_periods`` valid pairs are ``nan``. + + Examples + -------- + >>> import numpy as np + >>> import robustrolling as rr + >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> y = np.array([2.0, 4.0, 6.0, 8.0, 10.0]) + >>> rr.rolling_cor(x, y, 3) + array([nan, nan, 1., 1., 1.]) + """ ax = _to_float64(x) ay = _to_float64(y) mp = _resolve_min_periods(min_periods, window_size)