From ae7e9b0cc7d6eeef9ea25e109417a95b64f3333a Mon Sep 17 00:00:00 2001 From: Nitin Tony Paul <108007300+nitintonypaul@users.noreply.github.com> Date: Thu, 22 Jan 2026 21:41:23 +0530 Subject: [PATCH 1/4] [ENH] Added `HierarchicalRiskParity` - Added HRP optimizer - fixed minor bug which returns portfolio weights reference instead of copying - Improved documentation with minimal import syntax - Updated README/Documentation --- README.md | 4 +- docs/docs/backtesting.md | 4 +- docs/docs/objectives/dro.md | 10 +- docs/docs/objectives/heuristics.md | 193 ++++++++++++++++- docs/docs/objectives/markowitz.md | 16 +- docs/docs/objectives/online_learning.md | 8 +- docs/docs/objectives/risk_measures.md | 26 +-- docs/docs/objectives/utility_theory.md | 20 +- docs/docs/regularization.md | 7 +- docs/docs/resources/references.md | 1 + opes/__init__.py | 2 +- opes/objectives/__init__.py | 1 + opes/objectives/base_optimizer.py | 2 +- opes/objectives/distributionally_robust.py | 20 +- opes/objectives/heuristics.py | 231 +++++++++++++++++++-- opes/objectives/markowitz.py | 24 +-- opes/objectives/online.py | 14 +- opes/objectives/risk_measures.py | 40 ++-- opes/objectives/utility_theory.py | 30 +-- opes/regularizer.py | 1 + pyproject.toml | 2 +- 21 files changed, 516 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 5db42d7..c84b200 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ The information provided by OPES is for educational, research and informational | | Maximum Sharpe Ratio | | **Principled Heuristics** | Uniform (1/N) | | | Risk Parity | +| | Hierarchical Risk Parity | | | Inverse Volatility | | | Softmax Mean | | | Maximum Diversification | @@ -201,7 +202,6 @@ Also it eats up RAM like pac-man. These features are still in the works and may or may not appear in later updates: | **Objective Name (Category)** | -| ------------------------------------------------ | -| Hierarchical Risk Parity (Principled Heuristics) | +| ------------------------------------------------ | | Online Newton Step (Online Learning) | | ADA-BARRONS (Online Learning) | diff --git a/docs/docs/backtesting.md b/docs/docs/backtesting.md index 36a34ce..34aa631 100644 --- a/docs/docs/backtesting.md +++ b/docs/docs/backtesting.md @@ -252,8 +252,8 @@ def plot_wealth( OPES ships with a basic plotting utility for visualizing portfolio wealth over time. This method exists for quick inspection and debugging, not for deep performance analysis. -It visualizes cumulative wealth for one or multiple strategies using their periodic -returns. It also provides a breakeven reference line and optional saving of the plot to +It visualizes cumulative wealth for one or multiple strategies using their periodic +returns. It also provides a breakeven reference line and optional saving of the plot to a file. !!! tip "Recommendation:" diff --git a/docs/docs/objectives/dro.md b/docs/docs/objectives/dro.md index 38ac480..3490ade 100644 --- a/docs/docs/objectives/dro.md +++ b/docs/docs/objectives/dro.md @@ -132,7 +132,7 @@ $$ !!! example "Example:" ```python # Importing the dro Kelly module - from opes.objectives.distributionally_robust import KLRobustKelly + from opes.objectives import KLRobustKelly # Let this be your ticker data training_data = some_data() @@ -289,7 +289,7 @@ Uses the log-sum-exp technique to solve for numerical stability. !!! example "Example:" ```python # Importing the dro maximum mean module - from opes.objectives.distributionally_robust import KLRobustMaxMean + from opes.objectives import KLRobustMaxMean # Let this be your ticker data training_data = some_data() @@ -449,7 +449,7 @@ $$ !!! example "Example:" ```python # Importing the dro maximum mean module - from opes.objectives.distributionally_robust import WassRobustMaxMean + from opes.objectives import WassRobustMaxMean # Let this be your ticker data training_data = some_data() @@ -623,7 +623,7 @@ $$ !!! example "Example:" ```python # Importing the dro mean-variance module - from opes.objectives.distributionally_robust import WassRobustMeanVariance + from opes.objectives import WassRobustMeanVariance # Let this be your ticker data training_data = some_data() @@ -792,7 +792,7 @@ $$ !!! example "Example:" ```python # Importing the dro minimum variance module - from opes.objectives.distributionally_robust import WassRobustMinVariance + from opes.objectives import WassRobustMinVariance # Let this be your ticker data training_data = some_data() diff --git a/docs/docs/objectives/heuristics.md b/docs/docs/objectives/heuristics.md index edcf287..be77498 100644 --- a/docs/docs/objectives/heuristics.md +++ b/docs/docs/objectives/heuristics.md @@ -15,6 +15,179 @@ to outperform their theoretically optimal cousins outside the textbook. +## `HierarchicalRiskParity` + +```python +class HierarchicalRiskParity(cluster_method='average') +``` + +Hierarchical Risk Parity (HRP) optimization. + +Hierarchical Risk Parity (HRP), introduced by L≤pez de Prado, +is a portfolio construction methodology that allocates capital +through hierarchical clustering and recursive risk balancing +rather than direct optimization of a scalar objective. HRP +addresses several structural weaknesses of traditional +mean-variance and risk parity approaches, including sensitivity +to covariance matrix estimation error, numerical instability +arising from matrix inversion and over-concentration in highly +correlated assets. By organizing assets into a hierarchy based +on correlation structure and allocating weights inversely to +cluster-level variance, HRP achieves diversification across +both individual assets and correlated groups. This procedure +yields stable, fully invested portfolios that are robust +out-of-sample and well-suited for high-dimensional or noisy +return environments, particularly when covariance estimates are +ill-conditioned. + +**Args:** + +- `cluster_method` (*str, optional*): Clustering method to be used for hierarchical clustering. Defaults to `'average'`. Available methods are: + - '`average`': Merges clusters based on the minimum pairwise distance, often producing elongated, chain-like clusters. + - '`single'`: Merges clusters based on the maximum pairwise distance, favoring compact and tightly bound clusters. + - `'complete'`: Merges clusters using the average pairwise distance between all points in each cluster, providing a balanced compromise between single and complete linkage. + - '`ward`': Merges clusters to minimize the increase in within-cluster variance, producing balanced clusters aligned with variance-based portfolio construction. + +### Methods + +#### `clean_weights` + +```python +def clean_weights(threshold=1e-08) +``` + +Cleans the portfolio weights by setting very small positions to zero. + +Any weight whose absolute value is below the specified `threshold` is replaced with zero. +This helps remove negligible allocations while keeping the array structure intact. This method +requires portfolio optimization (`optimize()` method) to take place for `self.weights` to be +defined other than `None`. + +!!! warning "Warning:" + This method modifies the existing portfolio weights in place. After cleaning, re-optimization + is required to recover the original weights. + +**Args** + +- `threshold` (*float, optional*): Float specifying the minimum absolute weight to retain. Defaults to `1e-8`. + + +**Returns:** + +- `numpy.ndarray`: Cleaned and re-normalized portfolio weight vector. + +**Raises** + +- `PortfolioError`: If weights have not been calculated via optimization. + + +!!! note "Notes:" + - Weights are cleaned using absolute values, making this method compatible with long-short portfolios. + - Re-normalization ensures the portfolio remains properly scaled after cleaning. + - Increasing threshold promotes sparsity but may materially alter the portfolio composition. + +#### `optimize` + +```python +def optimize(data, custom_cov=None) +``` + +Computes the Hierarchical Risk Parity portfolio: + +$$ +\mathbf{w}_i \propto \frac{1}{\sigma^2_{\text{cluster}}} +$$ + +!!! note "Note" + Asset weight bounds are defaulted to (0,1). + +**Args** + +- `data` (*pd.DataFrame*): Ticker price data in either multi-index or single-index formats. Examples are given below: + ``` + # Single-Index Example + Ticker TSLA NVDA GME PFE AAPL ... + Date + 2015-01-02 14.620667 0.483011 6.288958 18.688917 24.237551 ... + 2015-01-05 14.006000 0.474853 6.460137 18.587513 23.554741 ... + 2015-01-06 14.085333 0.460456 6.268492 18.742599 23.556952 ... + 2015-01-07 14.063333 0.459257 6.195926 18.999102 23.887287 ... + 2015-01-08 14.041333 0.476533 6.268492 19.386841 24.805082 ... + ... + + # Multi-Index Example Structure (OHLCV) + Columns: + + Ticker (e.g. GME, PFE, AAPL, ...) + - Open + - High + - Low + - Close + - Volume + ``` +- `custom_cov` (*None or array-like of shape (n_assets, n_assets), optional*): Custom covariance matrix. Can be used to inject externally generated covariance matrices (eg. Ledoit-Wolf). Defaults to `None`. + + +**Returns:** + +- `np.ndarray`: Vector of optimized portfolio weights. + +**Raises** + +- `DataError`: For any data mismatch during integrity check. +- `PortfolioError`: For any invalid portfolio variable inputs during integrity check. + + +!!! example "Example:" + ```python + # Importing the HRP module + from opes.objectives import HierarchicalRiskParity as HRP + + # Let this be your ticker data + training_data = some_data() + + # Let this be your custom covariance matrix + cov_m = covMatrix() + + # Initialize with custom clustering method + hrp_portfolio = HRP(cluster_method='ward') + + # Optimize portfolio with custom covariance matrix + weights = hrp_portfolio.optimize(data=training_data, custom_cov=cov_m) + ``` + +#### `stats` + +```python +def stats() +``` + +Calculates and returns portfolio concentration and diversification statistics. + +These statistics help users to inspect portfolio's overall concentration in +allocation. For the method to work, the optimizer must have been initialized, i.e. +the `optimize()` method should have been called at least once for `self.weights` +to be defined other than `None`. + +**Returns:** + +- A `dict` containing the following keys: + - `'tickers'` (*list*): A list of tickers used for optimization. + - `'weights'` (*np.ndarry*): Portfolio weights, output from optimization. + - `'portfolio_entropy'` (*float*): Shannon entropy computed on portfolio weights. + - `'herfindahl_index'` (*float*): Herfindahl Index value, computed on portfolio weights. + - `'gini_coefficient'` (*float*): Gini Coefficient value, computed on portfolio weights. + - `'absolute_max_weight'` (*float*): Absolute maximum allocation for an asset. + +**Raises** + +- `PortfolioError`: If weights have not been calculated via optimization. + + +!!! note "Notes:" + - All statistics are computed on absolute normalized weights (within the simplex), ensuring compatibility with long-short portfolios. + - This method is diagnostic only and does not modify portfolio weights. + - For meaningful interpretation, use these metrics in conjunction with risk and performance measures. + ## `InverseVolatility` ```python @@ -125,7 +298,7 @@ $$ !!! example "Example:" ```python # Importing the Inverse Volatility Portfolio (IVP) module - from opes.objectives.heuristics import InverseVolatility as IVP + from opes.objectives import InverseVolatility as IVP # Let this be your ticker data training_data = some_data() @@ -297,7 +470,7 @@ $$ !!! example "Example:" ```python # Importing the maximum diversification module - from opes.objectives.heuristics import MaxDiversification + from opes.objectives import MaxDiversification # Let this be your ticker data training_data = some_data() @@ -333,7 +506,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MaxDiversification class - from opes.objectives.heuristics import MaxDiversification + from opes.objectives import MaxDiversification # Set with 'entropy' regularization optimizer = MaxDiversification(reg='entropy', strength=0.01) @@ -511,12 +684,12 @@ $$ !!! example "Example:" ```python # Importing the REPO module - from opes.objectives.heuristics import REPO + from opes.objectives import REPO # Let this be your ticker data training_data = some_data() - # Let these be your custom mean vector + # Let this be your custom mean vector mean_v = customMean() # Initialize with custom regularization @@ -547,7 +720,7 @@ initiating a new one. !!! example "Example:" ```python # Import the REPO class - from opes.objectives.heuristics import REPO + from opes.objectives import REPO # Set with 'entropy' regularization optimizer = REPO(reg='entropy', strength=0.01) @@ -714,7 +887,7 @@ $$ !!! example "Example:" ```python # Importing the risk parity module - from opes.objectives.heuristics import RiskParity + from opes.objectives import RiskParity # Let this be your ticker data training_data = some_data() @@ -750,7 +923,7 @@ initiating a new one. !!! example "Example:" ```python # Import the RiskParity class - from opes.objectives.heuristics import RiskParity + from opes.objectives import RiskParity # Set with 'entropy' regularization optimizer = RiskParity(reg='entropy', strength=0.01) @@ -911,7 +1084,7 @@ $$ !!! example "Example:" ```python # Importing the softmax mean module - from opes.objectives.heuristics import SoftmaxMean + from opes.objectives import SoftmaxMean # Let this be your ticker data training_data = some_data() @@ -1070,7 +1243,7 @@ $$ !!! example "Example:" ```python # Importing the equal-weight module - from opes.objectives.heuristics import Uniform + from opes.objectives import Uniform # Let this be your ticker data training_data = some_data() diff --git a/docs/docs/objectives/markowitz.md b/docs/docs/objectives/markowitz.md index 405e015..1367729 100644 --- a/docs/docs/objectives/markowitz.md +++ b/docs/docs/objectives/markowitz.md @@ -133,7 +133,7 @@ $$ !!! example "Example:" ```python # Importing the maximum mean module - from opes.objectives.markowitz import MaxMean + from opes.objectives import MaxMean # Let this be your ticker data training_data = some_data() @@ -170,7 +170,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MaxMean class - from opes.objectives.markowitz import MaxMean + from opes.objectives import MaxMean # Set with 'entropy' regularization optimizer = MaxMean(reg='entropy', strength=0.01) @@ -346,7 +346,7 @@ $$ !!! example "Example:" ```python # Importing the maximum sharpe module - from opes.objectives.markowitz import MaxSharpe + from opes.objectives import MaxSharpe # Let this be your ticker data training_data = some_data() @@ -384,7 +384,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MaxSharpe class - from opes.objectives.markowitz import MaxSharpe + from opes.objectives import MaxSharpe # Set with 'entropy' regularization optimizer = MaxSharpe(reg='entropy', strength=0.01) @@ -552,7 +552,7 @@ $$ !!! example "Example:" ```python # Importing the mean variance module - from opes.objectives.markowitz import MeanVariance + from opes.objectives import MeanVariance # Let this be your ticker data training_data = some_data() @@ -589,7 +589,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MeanVariance class - from opes.objectives.markowitz import MeanVariance + from opes.objectives import MeanVariance # Set with 'entropy' regularization optimizer = MeanVariance(reg='entropy', strength=0.01) @@ -759,7 +759,7 @@ $$ !!! example "Example:" ```python # Importing the Global Minimum Variance (GMV) module - from opes.objectives.markowitz import MinVariance + from opes.objectives import MinVariance # Let this be your ticker data training_data = some_data() @@ -796,7 +796,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MinVariance class - from opes.objectives.markowitz import MinVariance + from opes.objectives import MinVariance # Set with 'entropy' regularization optimizer = MinVariance(reg='entropy', strength=0.01) diff --git a/docs/docs/objectives/online_learning.md b/docs/docs/objectives/online_learning.md index e833502..e37bce7 100644 --- a/docs/docs/objectives/online_learning.md +++ b/docs/docs/objectives/online_learning.md @@ -134,7 +134,7 @@ $$ !!! example "Example:" ```python # Importing the BCRP module - from opes.objectives.online import BCRP + from opes.objectives import BCRP # Let this be your ticker data training_data = some_data() @@ -170,7 +170,7 @@ Leader (FTRL) or other adaptive optimization procedures. !!! example "Example:" ```python # Import the BCRP class - from opes.objectives.online import BCRP + from opes.objectives import BCRP # Set with 'entropy' regularization ftrl = BCRP(reg='entropy', strength=0.01) @@ -330,7 +330,7 @@ For this implementation, we have taken the reward function $f_{t} = \log \left(1 !!! example "Example:" ```python # Importing the exponential gradient module - from opes.objectives.online import ExponentialGradient as EG + from opes.objectives import ExponentialGradient as EG # Let this be your ticker data training_data = some_data() @@ -506,7 +506,7 @@ With $\mathcal P \sim \text{Grid}(k)$ or $\mathcal P \sim \text{Dirichlet}(\alph !!! example "Example:" ```python # Importing the universal portfolios module - from opes.objectives.online import UniversalPortfolios as UP + from opes.objectives import UniversalPortfolios as UP # Let this be your ticker data training_data = some_data() diff --git a/docs/docs/objectives/risk_measures.md b/docs/docs/objectives/risk_measures.md index f3d7ecb..1af09e5 100644 --- a/docs/docs/objectives/risk_measures.md +++ b/docs/docs/objectives/risk_measures.md @@ -142,7 +142,7 @@ $$ !!! example "Example:" ```python # Importing the CVaR module - from opes.objectives.risk_measures import CVaR + from opes.objectives import CVaR # Let this be your ticker data training_data = some_data() @@ -175,7 +175,7 @@ initiating a new one. !!! example "Example:" ```python # Import the CVaR class - from opes.objectives.risk_measures import CVaR + from opes.objectives import CVaR # Set with 'entropy' regularization optimizer = CVaR(reg='entropy', strength=0.01) @@ -343,7 +343,7 @@ $$ !!! example "Example:" ```python # Importing the EVaR module - from opes.objectives.risk_measures import EVaR + from opes.objectives import EVaR # Let this be your ticker data training_data = some_data() @@ -376,7 +376,7 @@ initiating a new one. !!! example "Example:" ```python # Import the EVaR class - from opes.objectives.risk_measures import EVaR + from opes.objectives import EVaR # Set with 'entropy' regularization optimizer = EVaR(reg='entropy', strength=0.01) @@ -546,7 +546,7 @@ $$ !!! example "Example:" ```python # Importing the ERM module - from opes.objectives.risk_measures import EntropicRisk + from opes.objectives import EntropicRisk # Let this be your ticker data training_data = some_data() @@ -579,7 +579,7 @@ initiating a new one. !!! example "Example:" ```python # Import the EntropicRisk class - from opes.objectives.risk_measures import EntropicRisk + from opes.objectives import EntropicRisk # Set with 'entropy' regularization optimizer = EntropicRisk(reg='entropy', strength=0.01) @@ -753,7 +753,7 @@ $$ !!! example "Example:" ```python # Importing the Mean-CVaR module - from opes.objectives.risk_measures import MeanCVaR + from opes.objectives import MeanCVaR # Let this be your ticker data training_data = some_data() @@ -790,7 +790,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MeanCVaR class - from opes.objectives.risk_measures import MeanCVaR + from opes.objectives import MeanCVaR # Set with 'entropy' regularization optimizer = MeanCVaR(reg='entropy', strength=0.01) @@ -966,7 +966,7 @@ $$ !!! example "Example:" ```python # Importing the Mean-EVaR module - from opes.objectives.risk_measures import MeanEVaR + from opes.objectives import MeanEVaR # Let this be your ticker data training_data = some_data() @@ -1003,7 +1003,7 @@ initiating a new one. !!! example "Example:" ```python # Import the MeanEVaR class - from opes.objectives.risk_measures import MeanEVaR + from opes.objectives import MeanEVaR # Set with 'entropy' regularization optimizer = MeanEVaR(reg='entropy', strength=0.01) @@ -1171,7 +1171,7 @@ $$ !!! example "Example:" ```python # Importing the VaR module - from opes.objectives.risk_measures import VaR + from opes.objectives import VaR # Let this be your ticker data training_data = some_data() @@ -1374,7 +1374,7 @@ $$ !!! example "Example:" ```python # Importing the worst-case loss module - from opes.objectives.risk_measures import WorstCaseLoss + from opes.objectives import WorstCaseLoss # Let this be your ticker data training_data = some_data() @@ -1407,7 +1407,7 @@ initiating a new one. !!! example "Example:" ```python # Import the WorstCaseLoss class - from opes.objectives.risk_measures import WorstCaseLoss + from opes.objectives import WorstCaseLoss # Set with 'entropy' regularization optimizer = WorstCaseLoss(reg='entropy', strength=0.01) diff --git a/docs/docs/objectives/utility_theory.md b/docs/docs/objectives/utility_theory.md index 674aa0e..8d885f3 100644 --- a/docs/docs/objectives/utility_theory.md +++ b/docs/docs/objectives/utility_theory.md @@ -126,7 +126,7 @@ $$ !!! example "Example:" ```python # Importing the CARA class - from opes.objectives.utility_theory import CARA + from opes.objectives import CARA # Let this be your ticker data training_data = some_data() @@ -159,7 +159,7 @@ initiating a new one. !!! example "Example:" ```python # Import the CARA class - from opes.objectives.utility_theory import CARA + from opes.objectives import CARA # Set with 'entropy' regularization optimizer = CARA(reg='entropy', strength=0.01) @@ -319,7 +319,7 @@ $$ !!! example "Example:" ```python # Importing the CRRA class - from opes.objectives.utility_theory import CRRA + from opes.objectives import CRRA # Let this be your ticker data training_data = some_data() @@ -352,7 +352,7 @@ initiating a new one. !!! example "Example:" ```python # Import the CRRA class - from opes.objectives.utility_theory import CRRA + from opes.objectives import CRRA # Set with 'entropy' regularization optimizer = CRRA(reg='entropy', strength=0.01) @@ -526,7 +526,7 @@ $$ !!! example "Example:" ```python # Importing the HARA class - from opes.objectives.utility_theory import HARA + from opes.objectives import HARA # Let this be your ticker data training_data = some_data() @@ -559,7 +559,7 @@ initiating a new one. !!! example "Example:" ```python # Import the HARA class - from opes.objectives.utility_theory import HARA + from opes.objectives import HARA # Set with 'entropy' regularization optimizer = HARA(reg='entropy', strength=0.01) @@ -720,7 +720,7 @@ $$ !!! example "Example:" ```python # Importing the kelly criterion module - from opes.objectives.utility_theory import Kelly + from opes.objectives import Kelly # Let this be your ticker data training_data = some_data() @@ -753,7 +753,7 @@ initiating a new one. !!! example "Example:" ```python # Import the Kelly Criterion class - from opes.objectives.utility_theory import Kelly + from opes.objectives import Kelly # Set with 'entropy' regularization optimizer = Kelly(reg='entropy', strength=0.01) @@ -912,7 +912,7 @@ $$ !!! example "Example:" ```python # Importing the Quadratic Utility class - from opes.objectives.utility_theory import QuadraticUtility as QU + from opes.objectives import QuadraticUtility as QU # Let this be your ticker data training_data = some_data() @@ -945,7 +945,7 @@ initiating a new one. !!! example "Example:" ```python # Import the Quadratic Utility class - from opes.objectives.utility_theory import QuadraticUtility + from opes.objectives import QuadraticUtility # Set with 'entropy' regularization optimizer = QuadraticUtility(reg='entropy', strength=0.01) diff --git a/docs/docs/regularization.md b/docs/docs/regularization.md index 81531a4..97ed296 100644 --- a/docs/docs/regularization.md +++ b/docs/docs/regularization.md @@ -29,10 +29,10 @@ where $R(\mathbf{w})$ encodes structural preferences over the weights $\mathbf{w | Wasserstein-1 Distance from Uniform Weights | `wass-1` | $\text{W}_{1}(\mathbf{w}, \mathbf{u})$ | !!! note "Notes" - - `l1` regularization is mainly used for long-short portfolios to encourage less extreme + - `l1` regularization is mainly used for long-short portfolios to encourage less extreme allocations to meet the net exposure of 1. Using it on long-only portfolios is redundant. - - For long-short portfolios, mathematically grounded regularizers such as `entropy`, `jsd` - and `wass-1` first normalize the weights and constrain them to the simplex before applying + - For long-short portfolios, mathematically grounded regularizers such as `entropy`, `jsd` + and `wass-1` first normalize the weights and constrain them to the simplex before applying the regularization, ensuring mathematical coherence is not violated. --- @@ -46,6 +46,7 @@ The following objectives do not support regularization: - `Uniform` - `InverseVolatility` - `SoftmaxMean` +- `HierarchicalRiskParity` - `UniversalPortfolios` - `ExponentialGradient` - `KLRobustMaxMean` diff --git a/docs/docs/resources/references.md b/docs/docs/resources/references.md index cf62ecc..6615b08 100644 --- a/docs/docs/resources/references.md +++ b/docs/docs/resources/references.md @@ -27,6 +27,7 @@ * **Choueifaty, Y., & Coignard, Y. (2008).** Toward Maximum Diversification. *Journal of Portfolio Management*, 35(1), 40–51. * **Mercurio, P. J., Wu, Y., & Xie, H. (2020).** An Entropy-Based Approach to Portfolio Optimization. *Entropy*, 22(3), 332. https://doi.org/10.3390/e22030332 * **Mercurio, P. J., Wu, Y., & Xie, H. (2020).** Portfolio Optimization for Binary Options Based on Relative Entropy. *Entropy*, 22(7), 752. https://doi.org/10.3390/e22070752 +* **López de Prado, Marcos**, Building Diversified Portfolios that Outperform Out-of-Sample (May 23, 2016). *The Journal of Portfolio Management Summer 2016*, 42 (4) 59 - 69. https://doi.org/10.3905/jpm.2016.42.4.059 --- diff --git a/opes/__init__.py b/opes/__init__.py index 1cfce80..14a6b0b 100644 --- a/opes/__init__.py +++ b/opes/__init__.py @@ -1,5 +1,5 @@ # Version Log -__version__ = "0.9.1" +__version__ = "0.10.0" # Backtester easy import from .backtester import Backtester diff --git a/opes/objectives/__init__.py b/opes/objectives/__init__.py index 12dbf18..e64837d 100644 --- a/opes/objectives/__init__.py +++ b/opes/objectives/__init__.py @@ -23,6 +23,7 @@ MaxDiversification, RiskParity, REPO, + HierarchicalRiskParity, ) # Online Portfolios diff --git a/opes/objectives/base_optimizer.py b/opes/objectives/base_optimizer.py index 9dad131..9a0a780 100644 --- a/opes/objectives/base_optimizer.py +++ b/opes/objectives/base_optimizer.py @@ -144,4 +144,4 @@ def clean_weights(self, threshold=1e-8): else: self.weights[np.abs(self.weights) < threshold] = 0 self.weights /= np.abs(self.weights).sum() - return self.weights + return self.weights.copy() diff --git a/opes/objectives/distributionally_robust.py b/opes/objectives/distributionally_robust.py index 1a62326..043a299 100644 --- a/opes/objectives/distributionally_robust.py +++ b/opes/objectives/distributionally_robust.py @@ -111,7 +111,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the dro maximum mean module - from opes.objectives.distributionally_robust import KLRobustMaxMean + from opes.objectives import KLRobustMaxMean # Let this be your ticker data training_data = some_data() @@ -147,7 +147,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError( f"KL robust maximum mean optimization failed: {result.message}" @@ -242,7 +242,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the dro Kelly module - from opes.objectives.distributionally_robust import KLRobustKelly + from opes.objectives import KLRobustKelly # Let this be your ticker data training_data = some_data() @@ -277,7 +277,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError( f"KL robust kelly criterion optimization failed: {result.message}" @@ -396,7 +396,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_mean=None): !!! example "Example:" ```python # Importing the dro maximum mean module - from opes.objectives.distributionally_robust import WassRobustMaxMean + from opes.objectives import WassRobustMaxMean # Let this be your ticker data training_data = some_data() @@ -431,7 +431,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Wasserstein Robust Maximum Mean optimization failed: {result.message}" @@ -564,7 +564,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_cov=None): !!! example "Example:" ```python # Importing the dro minimum variance module - from opes.objectives.distributionally_robust import WassRobustMinVariance + from opes.objectives import WassRobustMinVariance # Let this be your ticker data training_data = some_data() @@ -600,7 +600,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Wasserstein Robust Minimum Variance optimization failed: {result.message}" @@ -746,7 +746,7 @@ def optimize( !!! example "Example:" ```python # Importing the dro mean-variance module - from opes.objectives.distributionally_robust import WassRobustMeanVariance + from opes.objectives import WassRobustMeanVariance # Let this be your ticker data training_data = some_data() @@ -791,7 +791,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Wasserstein Robust Mean-Variance Optimization failed: {result.message}" diff --git a/opes/objectives/heuristics.py b/opes/objectives/heuristics.py index 421c540..bf85897 100644 --- a/opes/objectives/heuristics.py +++ b/opes/objectives/heuristics.py @@ -16,6 +16,8 @@ import numpy as np import pandas as pd from scipy.optimize import minimize, differential_evolution +from scipy.cluster.hierarchy import linkage, dendrogram +from scipy.spatial.distance import squareform from opes.objectives.base_optimizer import Optimizer from ..utils import extract_trim, test_integrity, find_constraint @@ -104,7 +106,7 @@ def optimize(self, data=None, n_assets=None, **kwargs): !!! example "Example:" ```python # Importing the equal-weight module - from opes.objectives.heuristics import Uniform + from opes.objectives import Uniform # Let this be your ticker data training_data = some_data() @@ -123,7 +125,7 @@ def optimize(self, data=None, n_assets=None, **kwargs): # Assigning weights and returning the same self.weights = np.array(np.ones(len(self.tickers)) / len(self.tickers)) - return self.weights + return self.weights.copy() class InverseVolatility(Optimizer): @@ -203,7 +205,7 @@ def optimize(self, data=None, **kwargs): !!! example "Example:" ```python # Importing the Inverse Volatility Portfolio (IVP) module - from opes.objectives.heuristics import InverseVolatility as IVP + from opes.objectives import InverseVolatility as IVP # Let this be your ticker data training_data = some_data() @@ -218,7 +220,7 @@ def optimize(self, data=None, **kwargs): # Preparing inputs for finding weights self._prepare_inputs(data) self.weights = (1 / self.volarray) / (1 / self.volarray).sum() - return self.weights + return self.weights.copy() class SoftmaxMean(Optimizer): @@ -306,7 +308,7 @@ def optimize(self, data=None, custom_mean=None, **kwargs): !!! example "Example:" ```python # Importing the softmax mean module - from opes.objectives.heuristics import SoftmaxMean + from opes.objectives import SoftmaxMean # Let this be your ticker data training_data = some_data() @@ -330,7 +332,7 @@ def optimize(self, data=None, custom_mean=None, **kwargs): self.mean / self.temperature - np.max(self.mean / self.temperature) ) self.weights /= self.weights.sum() - return self.weights + return self.weights.copy() class MaxDiversification(Optimizer): @@ -443,7 +445,7 @@ def optimize(self, data=None, custom_cov=None, seed=100, **kwargs): !!! example "Example:" ```python # Importing the maximum diversification module - from opes.objectives.heuristics import MaxDiversification + from opes.objectives import MaxDiversification # Let this be your ticker data training_data = some_data() @@ -476,7 +478,7 @@ def f(w): ) if result.success: self.weights = result.x / (result.x.sum() + 1e-12) - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Maximum diversification optimization failed: {result.message}" @@ -497,7 +499,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MaxDiversification class - from opes.objectives.heuristics import MaxDiversification + from opes.objectives import MaxDiversification # Set with 'entropy' regularization optimizer = MaxDiversification(reg='entropy', strength=0.01) @@ -617,7 +619,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_cov=None): !!! example "Example:" ```python # Importing the risk parity module - from opes.objectives.heuristics import RiskParity + from opes.objectives import RiskParity # Let this be your ticker data training_data = some_data() @@ -655,7 +657,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Risk parity optimization failed: {result.message}" @@ -676,7 +678,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the RiskParity class - from opes.objectives.heuristics import RiskParity + from opes.objectives import RiskParity # Set with 'entropy' regularization optimizer = RiskParity(reg='entropy', strength=0.01) @@ -796,12 +798,12 @@ def optimize(self, data=None, bin=20, custom_mean=None, seed=100, **kwargs): !!! example "Example:" ```python # Importing the REPO module - from opes.objectives.heuristics import REPO + from opes.objectives import REPO # Let this be your ticker data training_data = some_data() - # Let these be your custom mean vector + # Let this be your custom mean vector mean_v = customMean() # Initialize with custom regularization @@ -841,7 +843,7 @@ def f(w): ) if result.success: self.weights = result.x / (result.x.sum() + 1e-12) - return self.weights + return self.weights.copy() else: raise OptimizationError(f"REPO optimization failed: {result.message}") @@ -860,7 +862,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the REPO class - from opes.objectives.heuristics import REPO + from opes.objectives import REPO # Set with 'entropy' regularization optimizer = REPO(reg='entropy', strength=0.01) @@ -877,3 +879,200 @@ def set_regularizer(self, reg=None, strength=1): """ self.reg = _find_regularizer(reg) self.strength = strength + + +class HierarchicalRiskParity(Optimizer): + """ + Hierarchical Risk Parity (HRP) optimization. + + Hierarchical Risk Parity (HRP), introduced by López de Prado, + is a portfolio construction methodology that allocates capital + through hierarchical clustering and recursive risk balancing + rather than direct optimization of a scalar objective. HRP + addresses several structural weaknesses of traditional + mean-variance and risk parity approaches, including sensitivity + to covariance matrix estimation error, numerical instability + arising from matrix inversion and over-concentration in highly + correlated assets. By organizing assets into a hierarchy based + on correlation structure and allocating weights inversely to + cluster-level variance, HRP achieves diversification across + both individual assets and correlated groups. This procedure + yields stable, fully invested portfolios that are robust + out-of-sample and well-suited for high-dimensional or noisy + return environments, particularly when covariance estimates are + ill-conditioned. + """ + + def __init__(self, cluster_method="average"): + """ + **Args:** + + - `cluster_method` (*str, optional*): Clustering method to be used for hierarchical clustering. Defaults to `'average'`. Available methods are: + - '`average`': Merges clusters based on the minimum pairwise distance, often producing elongated, chain-like clusters. + - '`single'`: Merges clusters based on the maximum pairwise distance, favoring compact and tightly bound clusters. + - `'complete'`: Merges clusters using the average pairwise distance between all points in each cluster, providing a balanced compromise between single and complete linkage. + - '`ward`': Merges clusters to minimize the increase in within-cluster variance, producing balanced clusters aligned with variance-based portfolio construction. + """ + self.cluster_method = cluster_method.lower() + self.covariance = None + self.correlation = None + + self.weights = None + self.tickers = None + + # Helper method to prepare inputs for optimization + # Also validates parameters and checks for data inconsistency + def _prepare_optimization_inputs(self, data, custom_cov): + + # Checking for cluster method validity + if self.cluster_method not in ["single", "average", "complete", "ward"]: + raise PortfolioError(f"Unknown cluster_method: {self.cluster_method}") + + # Obtaining return and ticker data + self.tickers, data = extract_trim(data) + + # Computing correlation matrix + self.correlation = np.corrcoef(data, rowvar=False) + + # Checking for covariance and weights and assigning optimization data accordingly + if custom_cov is None: + # Invertibility need not be handled as usual since HRP does not invert covariance + self.covariance = np.cov(data, rowvar=False) + else: + self.covariance = custom_cov + # We start with all assets set to 1 + # This is different from the usual equal weight since HRP allocates step by step + # But is initialized as 1 before allocation + self.weights = np.ones(len(self.tickers)) + + # Functions to test data integrity and find optimization constraint + test_integrity(tickers=self.tickers, weights=self.weights, cov=self.covariance) + + # Function to get leaf order of dendogram + def _get_leaf_order(self, condensed_distance_matrix): + clustered_linkage = linkage( + condensed_distance_matrix, method=self.cluster_method + ) + cluster_dendogram = dendrogram(clustered_linkage, no_plot=True) + return np.array(cluster_dendogram["leaves"]) + + # Function to get cluster variance + def _get_cluster_variance(self, cluster): + # Slicing covariance which is required for the cluster + cov_slice = self.covariance[np.ix_(cluster, cluster)] + + # Finding variance (diagonal elements) + diag = np.diag(cov_slice) + + # Computing cluster variance + # NOTE: 1e-12 added for numerical stability + inv_diag = 1.0 / (diag + 1e-12) + return 1.0 / inv_diag.sum() + + # 'Recursive' bisection function + # NOTE: while loop + queue is used for stability + def _recursive_bisection(self, leaf_order): + clusters = [leaf_order] + + # Instead of recursion, which might hit a stack limit in python, we use a while loop and a queue + # Not a queue exactly, but we similate one using pop(0) and append() + while clusters: + cluster = clusters.pop(0) + + # Checking if cluster is a single element or none + if len(cluster) <= 1: + continue + + # Splitting clusters into left and right subsections + split_var = len(cluster) // 2 + left_cluster = cluster[:split_var] + right_cluster = cluster[split_var:] + + # Obtaining cluster variance for left and right + left_variance = self._get_cluster_variance(left_cluster) + right_variance = self._get_cluster_variance(right_cluster) + + # Computing left weights (alpha) + alpha = 1 - left_variance / (left_variance + right_variance) + + # Assigning weights to left and right clusters respectively + self.weights[left_cluster] *= alpha + self.weights[right_cluster] *= 1 - alpha + + # Appending for recursion + clusters.append(left_cluster) + clusters.append(right_cluster) + + self.weights = self.weights / self.weights.sum() + + def optimize(self, data, custom_cov=None): + """ + Computes the Hierarchical Risk Parity portfolio: + + $$ + \\mathbf{w}_i \\propto \\frac{1}{\\sigma^2_{\\text{cluster}}} + $$ + + !!! note "Note" + Asset weight bounds are defaulted to (0,1). + + Args: + data (*pd.DataFrame*): Ticker price data in either multi-index or single-index formats. Examples are given below: + ``` + # Single-Index Example + Ticker TSLA NVDA GME PFE AAPL ... + Date + 2015-01-02 14.620667 0.483011 6.288958 18.688917 24.237551 ... + 2015-01-05 14.006000 0.474853 6.460137 18.587513 23.554741 ... + 2015-01-06 14.085333 0.460456 6.268492 18.742599 23.556952 ... + 2015-01-07 14.063333 0.459257 6.195926 18.999102 23.887287 ... + 2015-01-08 14.041333 0.476533 6.268492 19.386841 24.805082 ... + ... + + # Multi-Index Example Structure (OHLCV) + Columns: + + Ticker (e.g. GME, PFE, AAPL, ...) + - Open + - High + - Low + - Close + - Volume + ``` + `custom_cov` (*None or array-like of shape (n_assets, n_assets), optional*): Custom covariance matrix. Can be used to inject externally generated covariance matrices (eg. Ledoit-Wolf). Defaults to `None`. + + **Returns:** + + - `np.ndarray`: Vector of optimized portfolio weights. + + Raises: + DataError: For any data mismatch during integrity check. + PortfolioError: For any invalid portfolio variable inputs during integrity check. + + !!! example "Example:" + ```python + # Importing the HRP module + from opes.objectives import HierarchicalRiskParity as HRP + + # Let this be your ticker data + training_data = some_data() + + # Let this be your custom covariance matrix + cov_m = covMatrix() + + # Initialize with custom clustering method + hrp_portfolio = HRP(cluster_method='ward') + + # Optimize portfolio with custom covariance matrix + weights = hrp_portfolio.optimize(data=training_data, custom_cov=cov_m) + ``` + """ + # Preparing inputs for HRP optimization + self._prepare_optimization_inputs(data, custom_cov=custom_cov) + + # Computing distance matrix and condensing it for leaf order + distance_matrix = np.sqrt(0.5 * (1 - self.correlation)) + condensed_distance = squareform(distance_matrix, checks=False) + leaf_order = self._get_leaf_order(condensed_distance) + + self._recursive_bisection(leaf_order) + return self.weights.copy() diff --git a/opes/objectives/markowitz.py b/opes/objectives/markowitz.py index 56785c5..49575a0 100644 --- a/opes/objectives/markowitz.py +++ b/opes/objectives/markowitz.py @@ -112,7 +112,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_mean=None): !!! example "Example:" ```python # Importing the maximum mean module - from opes.objectives.markowitz import MaxMean + from opes.objectives import MaxMean # Let this be your ticker data training_data = some_data() @@ -148,7 +148,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Maximum mean optimization failed: {result.message}" @@ -169,7 +169,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MaxMean class - from opes.objectives.markowitz import MaxMean + from opes.objectives import MaxMean # Set with 'entropy' regularization optimizer = MaxMean(reg='entropy', strength=0.01) @@ -292,7 +292,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_cov=None): !!! example "Example:" ```python # Importing the Global Minimum Variance (GMV) module - from opes.objectives.markowitz import MinVariance + from opes.objectives import MinVariance # Let this be your ticker data training_data = some_data() @@ -326,7 +326,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Global minimum optimization failed: {result.message}" @@ -347,7 +347,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MinVariance class - from opes.objectives.markowitz import MinVariance + from opes.objectives import MinVariance # Set with 'entropy' regularization optimizer = MinVariance(reg='entropy', strength=0.01) @@ -474,7 +474,7 @@ def optimize( !!! example "Example:" ```python # Importing the mean variance module - from opes.objectives.markowitz import MeanVariance + from opes.objectives import MeanVariance # Let this be your ticker data training_data = some_data() @@ -514,7 +514,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Mean variance optimization failed: {result.message}" @@ -535,7 +535,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MeanVariance class - from opes.objectives.markowitz import MeanVariance + from opes.objectives import MeanVariance # Set with 'entropy' regularization optimizer = MeanVariance(reg='entropy', strength=0.01) @@ -662,7 +662,7 @@ def optimize( !!! example "Example:" ```python # Importing the maximum sharpe module - from opes.objectives.markowitz import MaxSharpe + from opes.objectives import MaxSharpe # Let this be your ticker data training_data = some_data() @@ -701,7 +701,7 @@ def f(w): ) if result.success: self.weights = result.x / (result.x.sum() + 1e-12) - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Maximum sharpe optimization failed: {result.message}" @@ -722,7 +722,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MaxSharpe class - from opes.objectives.markowitz import MaxSharpe + from opes.objectives import MaxSharpe # Set with 'entropy' regularization optimizer = MaxSharpe(reg='entropy', strength=0.01) diff --git a/opes/objectives/online.py b/opes/objectives/online.py index 3d2c330..1eb237b 100644 --- a/opes/objectives/online.py +++ b/opes/objectives/online.py @@ -159,7 +159,7 @@ def optimize(self, data=None, seed=100, **kwargs): !!! example "Example:" ```python # Importing the universal portfolios module - from opes.objectives.online import UniversalPortfolios as UP + from opes.objectives import UniversalPortfolios as UP # Let this be your ticker data training_data = some_data() @@ -192,7 +192,7 @@ def optimize(self, data=None, seed=100, **kwargs): # Normalization and finding optimal weights self.weights = wealth_weighted_portfolio / portfolio_wealths.sum() - return self.weights + return self.weights.copy() class BCRP(Optimizer): @@ -280,7 +280,7 @@ def optimize(self, data=None, w=None): !!! example "Example:" ```python # Importing the BCRP module - from opes.objectives.online import BCRP + from opes.objectives import BCRP # Let this be your ticker data training_data = some_data() @@ -310,7 +310,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError(f"BCRP optimization failed: {result.message}") @@ -330,7 +330,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the BCRP class - from opes.objectives.online import BCRP + from opes.objectives import BCRP # Set with 'entropy' regularization ftrl = BCRP(reg='entropy', strength=0.01) @@ -431,7 +431,7 @@ def optimize(self, data=None, w=None): !!! example "Example:" ```python # Importing the exponential gradient module - from opes.objectives.online import ExponentialGradient as EG + from opes.objectives import ExponentialGradient as EG # Let this be your ticker data training_data = some_data() @@ -464,4 +464,4 @@ def optimize(self, data=None, w=None): new_weights = np.exp(log_w) self.weights = new_weights / new_weights.sum() - return self.weights + return self.weights.copy() diff --git a/opes/objectives/risk_measures.py b/opes/objectives/risk_measures.py index 060c291..dabe8eb 100644 --- a/opes/objectives/risk_measures.py +++ b/opes/objectives/risk_measures.py @@ -123,7 +123,7 @@ def optimize(self, data=None, seed=100, **kwargs): !!! example "Example:" ```python # Importing the VaR module - from opes.objectives.risk_measures import VaR + from opes.objectives import VaR # Let this be your ticker data training_data = some_data() @@ -153,7 +153,7 @@ def f(w): ) if result.success: self.weights = result.x / (result.x.sum() + 1e-12) - return self.weights + return self.weights.copy() else: raise OptimizationError(f"VaR optimization failed: {result.message}") @@ -285,7 +285,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the CVaR module - from opes.objectives.risk_measures import CVaR + from opes.objectives import CVaR # Let this be your ticker data training_data = some_data() @@ -321,7 +321,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError(f"CVaR optimization failed: {result.message}") @@ -340,7 +340,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the CVaR class - from opes.objectives.risk_measures import CVaR + from opes.objectives import CVaR # Set with 'entropy' regularization optimizer = CVaR(reg='entropy', strength=0.01) @@ -455,7 +455,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_mean=None): !!! example "Example:" ```python # Importing the Mean-CVaR module - from opes.objectives.risk_measures import MeanCVaR + from opes.objectives import MeanCVaR # Let this be your ticker data training_data = some_data() @@ -502,7 +502,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError(f"Mean CVaR optimization failed: {result.message}") @@ -521,7 +521,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MeanCVaR class - from opes.objectives.risk_measures import MeanCVaR + from opes.objectives import MeanCVaR # Set with 'entropy' regularization optimizer = MeanCVaR(reg='entropy', strength=0.01) @@ -636,7 +636,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the EVaR module - from opes.objectives.risk_measures import EVaR + from opes.objectives import EVaR # Let this be your ticker data training_data = some_data() @@ -673,7 +673,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError(f"EVaR optimization failed: {result.message}") @@ -692,7 +692,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the EVaR class - from opes.objectives.risk_measures import EVaR + from opes.objectives import EVaR # Set with 'entropy' regularization optimizer = EVaR(reg='entropy', strength=0.01) @@ -808,7 +808,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None, custom_mean=None): !!! example "Example:" ```python # Importing the Mean-EVaR module - from opes.objectives.risk_measures import MeanEVaR + from opes.objectives import MeanEVaR # Let this be your ticker data training_data = some_data() @@ -855,7 +855,7 @@ def f(x): ) if result.success: self.weights = result.x[:-1] - return self.weights + return self.weights.copy() else: raise OptimizationError(f"Mean EVaR optimization failed: {result.message}") @@ -874,7 +874,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the MeanEVaR class - from opes.objectives.risk_measures import MeanEVaR + from opes.objectives import MeanEVaR # Set with 'entropy' regularization optimizer = MeanEVaR(reg='entropy', strength=0.01) @@ -993,7 +993,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the ERM module - from opes.objectives.risk_measures import EntropicRisk + from opes.objectives import EntropicRisk # Let this be your ticker data training_data = some_data() @@ -1026,7 +1026,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Entropic risk metric optimization failed: {result.message}" @@ -1047,7 +1047,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the EntropicRisk class - from opes.objectives.risk_measures import EntropicRisk + from opes.objectives import EntropicRisk # Set with 'entropy' regularization optimizer = EntropicRisk(reg='entropy', strength=0.01) @@ -1160,7 +1160,7 @@ def optimize(self, data=None, seed=100, **kwargs): !!! example "Example:" ```python # Importing the worst-case loss module - from opes.objectives.risk_measures import WorstCaseLoss + from opes.objectives import WorstCaseLoss # Let this be your ticker data training_data = some_data() @@ -1190,7 +1190,7 @@ def f(w): ) if result.success: self.weights = result.x / (result.x.sum() + 1e-12) - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Worst-Case Loss optimization failed: {result.message}" @@ -1211,7 +1211,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the WorstCaseLoss class - from opes.objectives.risk_measures import WorstCaseLoss + from opes.objectives import WorstCaseLoss # Set with 'entropy' regularization optimizer = WorstCaseLoss(reg='entropy', strength=0.01) diff --git a/opes/objectives/utility_theory.py b/opes/objectives/utility_theory.py index 51d7eec..0ee7c51 100644 --- a/opes/objectives/utility_theory.py +++ b/opes/objectives/utility_theory.py @@ -109,7 +109,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the kelly criterion module - from opes.objectives.utility_theory import Kelly + from opes.objectives import Kelly # Let this be your ticker data training_data = some_data() @@ -140,7 +140,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Kelly criterion optimization failed: {result.message}" @@ -161,7 +161,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the Kelly Criterion class - from opes.objectives.utility_theory import Kelly + from opes.objectives import Kelly # Set with 'entropy' regularization optimizer = Kelly(reg='entropy', strength=0.01) @@ -262,7 +262,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the Quadratic Utility class - from opes.objectives.utility_theory import QuadraticUtility as QU + from opes.objectives import QuadraticUtility as QU # Let this be your ticker data training_data = some_data() @@ -295,7 +295,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError( f"Quadratic utility optimization failed: {result.message}" @@ -316,7 +316,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the Quadratic Utility class - from opes.objectives.utility_theory import QuadraticUtility + from opes.objectives import QuadraticUtility # Set with 'entropy' regularization optimizer = QuadraticUtility(reg='entropy', strength=0.01) @@ -425,7 +425,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the CARA class - from opes.objectives.utility_theory import CARA + from opes.objectives import CARA # Let this be your ticker data training_data = some_data() @@ -458,7 +458,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError(f"CARA optimization failed: {result.message}") @@ -477,7 +477,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the CARA class - from opes.objectives.utility_theory import CARA + from opes.objectives import CARA # Set with 'entropy' regularization optimizer = CARA(reg='entropy', strength=0.01) @@ -585,7 +585,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the CRRA class - from opes.objectives.utility_theory import CRRA + from opes.objectives import CRRA # Let this be your ticker data training_data = some_data() @@ -618,7 +618,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError(f"CRRA optimization failed: {result.message}") @@ -637,7 +637,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the CRRA class - from opes.objectives.utility_theory import CRRA + from opes.objectives import CRRA # Set with 'entropy' regularization optimizer = CRRA(reg='entropy', strength=0.01) @@ -759,7 +759,7 @@ def optimize(self, data=None, weight_bounds=(0, 1), w=None): !!! example "Example:" ```python # Importing the HARA class - from opes.objectives.utility_theory import HARA + from opes.objectives import HARA # Let this be your ticker data training_data = some_data() @@ -793,7 +793,7 @@ def f(w): ) if result.success: self.weights = result.x - return self.weights + return self.weights.copy() else: raise OptimizationError(f"HARA optimization failed: {result.message}") @@ -812,7 +812,7 @@ def set_regularizer(self, reg=None, strength=1): !!! example "Example:" ```python # Import the HARA class - from opes.objectives.utility_theory import HARA + from opes.objectives import HARA # Set with 'entropy' regularization optimizer = HARA(reg='entropy', strength=0.01) diff --git a/opes/regularizer.py b/opes/regularizer.py index 8560da9..0245a44 100644 --- a/opes/regularizer.py +++ b/opes/regularizer.py @@ -45,6 +45,7 @@ - `Uniform` - `InverseVolatility` - `SoftmaxMean` +- `HierarchicalRiskParity` - `UniversalPortfolios` - `ExponentialGradient` - `KLRobustMaxMean` diff --git a/pyproject.toml b/pyproject.toml index bfed9d6..106cdef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dev = ["pytest", "other-dev-packages"] [project] name = "opes" -version = "0.9.1" +version = "0.10.0" description = "A research-focused portfolio optimization and backtesting engine." readme = "README.md" requires-python = ">=3.10" From 6293a4f2ef1aaebc6f80c8670f6ec183e8445c1c Mon Sep 17 00:00:00 2001 From: Nitin Tony Paul <108007300+nitintonypaul@users.noreply.github.com> Date: Thu, 22 Jan 2026 23:44:41 +0530 Subject: [PATCH 2/4] [MNT] Improving documentation Improving docstrings and markdown documentation website. --- README.md | 6 +-- docs/docs/index.md | 4 +- docs/docs/installation.md | 85 +++++++++++++++++++++++++++------------ 3 files changed, 65 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index c84b200..a5c68d9 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Alternatively, you are also welcome to install directly from the GitHub reposito ```bash git clone https://github.com/opes-core/opes.git -cd opes +cd opes-main pip install . ``` @@ -163,7 +163,7 @@ asset_data = yf.download( kelly_portfolio = Kelly(fraction=0.8, reg="l2", strength=0.001) # Compute portfolio weights with custom weight bounds -kelly_portfolio.optimize(data, weight_bounds=(0.05, 0.8)) +kelly_portfolio.optimize(asset_data, weight_bounds=(0.05, 0.8)) # Clean negligible allocations cleaned_weights = kelly_portfolio.clean_weights(threshold=1e-6) @@ -172,7 +172,7 @@ cleaned_weights = kelly_portfolio.clean_weights(threshold=1e-6) print(cleaned_weights) ``` -This showcases the simplicty of the module. However there are far more diverse features you can still explore. If you're looking for more examples, preferably some of them with *"context"*, I recommend you check out the [examples](https://opes.pages.dev/examples/good_strategy/) page within the documentation. +This showcases the simplicity of the module. However there are far more diverse features you can still explore. If you're looking for more examples, preferably some of them with *"context"*, I recommend you check out the [examples](https://opes.pages.dev/examples/good_strategy/) page within the documentation. --- diff --git a/docs/docs/index.md b/docs/docs/index.md index 5e07ebc..ae1ceb8 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -23,7 +23,7 @@ OPES is a research-oriented and experimentation-focused Python module for portfo !!! example "Demo" ```python # Demonstration of portfolio optimization using the Kelly Criterion - # 'data' represents OHLCV market data grouped by ticker symbols + # `return_data` represents OHLCV market data grouped by ticker symbols from opes.objectives import Kelly @@ -31,7 +31,7 @@ OPES is a research-oriented and experimentation-focused Python module for portfo kelly_portfolio = Kelly(fraction=0.8, reg="l2", strength=0.01) # Compute portfolio weights with custom bounds and clean negligible allocations - kelly_portfolio.optimize(data, weight_bounds=(0.05, 0.8)) + kelly_portfolio.optimize(return_data, weight_bounds=(0.05, 0.8)) cleaned_weights = kelly_portfolio.clean_weights(threshold=1e-6) # Output the final portfolio weights diff --git a/docs/docs/installation.md b/docs/docs/installation.md index c981f2c..bc69ecd 100644 --- a/docs/docs/installation.md +++ b/docs/docs/installation.md @@ -7,59 +7,94 @@ This page guides you through installing OPES for experimentation & research. !!! warning "Warning:" OPES is currently under development. While it is relatively stable for experimentation, some features may change or break. Use at your own discretion and always verify results when testing. -## Prerequisites - -- Python 3.10+ (tested up to 3.12) -- `pip` package manager - --- ## Procedure -### 1. Install OPES +Python 3.10+ is required for `opes` to run (although it *may* work on some lower versions). `opes` is tested upto Python 3.14. To install a stable release of `opes`, `pip` is recommended for convenience. -You can install OPES easily via PyPI: +### Installation + +You can install OPES easily via PyPI using `pip`. ```bash pip install opes ``` -This will fetch the latest stable release and all required dependencies. - -You are also welcome to install the module directly from GitHub: +This will fetch the latest stable release and all required dependencies. Alternatively, you are also welcome to install the module directly from GitHub. ```bash git clone https://github.com/opes-core/opes.git cd opes-main -pip install -e . +pip install . ``` -!!! note "Note:" - The `-e` flag installs OPES in editable mode, so any changes you make to the source code are reflected immediately without reinstalling. This is great for developers or those tinkering with advanced features. +You can also install in editable mode if you plan on making any changes to the source code. + +```bash +# After cloning and in the root of the project +pip install -e . +``` --- -### 2. Verify the Installation +### Verification -After installation, make sure everything works by opening Python and importing OPES: +After installation, make sure everything works by opening Python and importing `opes`. ```python >>> import opes >>> opes.__version__ -'1.0.0' +>>> '0.10.0' # May not be the current version but you get the idea ``` -If no errors appear, OPES is ready to use. +You can also verify your installation by using `pip`. + +```bash +pip show opes +``` + +If no errors appear, `opes` is ready to use. --- -## Dependencies +## Getting Started -OPES requires the following Python modules: +`opes` is designed to be minimalistic and easy to use and learn for any user. Here is an example script which implements my favorite portfolio, the Kelly Criterion. + +```python +# I recommend you use yfinance for testing. +# However for serious research, using an external, faster API would be more fruitful. +import yfinance as yf + +# Importing our Kelly class +from opes.objectives import Kelly + +# ---------- FETCHING DATA ---------- +# Obtaining ticker data +# Basic yfinance stuff +TICKERS = ["AAPL", "NVDA", "PFE", "TSLA", "BRK-B", "SHV", "TLT"] +asset_data = yf.download( + tickers=TICKERS, + start="2010-01-01", + end="2020-01-01", + group_by="ticker", + auto_adjust=True +) + +# ---------- OPES USAGE ---------- +# Initialize a Kelly portfolio with fractional exposure and L2 regularization +# Fractional exposure produces less risky weights and L2 regularization contributes in penalizing concentration +kelly_portfolio = Kelly(fraction=0.8, reg="l2", strength=0.001) + +# Compute portfolio weights with custom weight bounds +kelly_portfolio.optimize(asset_data, weight_bounds=(0.05, 0.8)) + +# Clean negligible allocations +cleaned_weights = kelly_portfolio.clean_weights(threshold=1e-6) + +# Output the final portfolio weights +print(cleaned_weights) +``` -| Module name | Minimum version | Maximum version | -| --------------- | --------------- | --------------- | -| **NumPy** | 2.2.6 | < 3.0 | -| **pandas** | 2.3.3 | < 3.0 | -| **SciPy** | 1.15.2 | < 2.0 | -| **matplotlib** | 3.10.1 | < 4.0 | \ No newline at end of file +This showcases the simplicity of the module. However there are far more diverse features you can still explore. If you're looking for more examples, preferably some of them with *"context"*, I recommend you check out the [examples](./examples/good_strategy.md) page within the documentation. \ No newline at end of file From 3d5eef9a6402a8a15eb1490de6aa7dc1ccc546f4 Mon Sep 17 00:00:00 2001 From: Nitin Tony Paul <108007300+nitintonypaul@users.noreply.github.com> Date: Fri, 23 Jan 2026 23:43:45 +0530 Subject: [PATCH 3/4] [ENH] Improved `Backtester` Decoupled `rebalance_freq` and `reopt_freq` enabling users to customize the portfolio style. Also refactored `Backtester` class for better readability. Updated documentation and Readme with changes. --- README.md | 21 +- docs/docs/backtesting.md | 63 ++-- docs/docs/examples/good_strategy.md | 10 +- docs/docs/examples/if_you_knew_the_future.md | 10 +- docs/docs/examples/the_alpha_engine.md | 4 +- docs/docs/examples/which_kelly_is_best.md | 16 +- docs/docs/objectives/heuristics.md | 2 +- opes/__init__.py | 2 +- opes/backtester.py | 359 +++++++++++-------- opes/objectives/heuristics.py | 2 +- opes/utils.py | 62 ---- pyproject.toml | 2 +- tests/backtester_test.py | 6 +- 13 files changed, 296 insertions(+), 263 deletions(-) diff --git a/README.md b/README.md index a5c68d9..33cdf6c 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,12 @@ Visit the [documentation](https://opes.pages.dev) for detailed insights on OPES. --- +## Project Methodology + +This project follows an Agile development approach. Every feature is designed to be extensible, exploratory and open to modification as the system evolves. Each GitHub commit represents a usable and coherent version of OPES. While not every commit is feature-complete or fully refined, each serves as a stable minimum viable product and a reliable snapshot of progress. Features marked as *experimental* are subject to active evaluation and will be either validated and promoted or removed entirely based on feasibility and empirical performance. + +--- + ## Disclaimer The information provided by OPES is for educational, research and informational purposes only. It is not intended as financial, investment or legal advice. Users should conduct their own due diligence and consult with licensed financial professionals before making any investment decisions. OPES and its contributors are not liable for any financial losses or decisions made based on this content. Past performance is not indicative of future results. @@ -191,17 +197,4 @@ This will run three scripts, each dedicated to testing the optimizer, regularize GOOG, AAPL, AMZN, MSFT ``` -The price data is stored in the `prices.csv` file within the `tests/` directory. The number of tickers are limited to 4 since there are computationally heavy portfolio objectives (like `UniversalPortfolios`) included which may take an eternity to test well using multiple tickers. - -Also it eats up RAM like pac-man. - ---- - -## Upcoming Features (Unconfirmed) - -These features are still in the works and may or may not appear in later updates: - -| **Objective Name (Category)** | -| ------------------------------------------------ | -| Online Newton Step (Online Learning) | -| ADA-BARRONS (Online Learning) | +The price data is stored in the `prices.csv` file within the `tests/` directory. The number of tickers are limited to 4 since there are computationally heavy portfolio objectives (like `UniversalPortfolios`) included which may take an eternity to test well using multiple tickers. \ No newline at end of file diff --git a/docs/docs/backtesting.md b/docs/docs/backtesting.md index 34aa631..bc714b4 100644 --- a/docs/docs/backtesting.md +++ b/docs/docs/backtesting.md @@ -71,8 +71,9 @@ It also stores transaction cost parameters for portfolio simulations. ```python def backtest( optimizer, - rebalance_freq=None, - seed=None, + rebalance_freq=1, + reopt_freq=1, + seed=100, weight_bounds=None, clean_weights=False ) @@ -80,24 +81,23 @@ def backtest( Execute a portfolio backtest over the test dataset using a given optimizer. -This method performs either a static-weight backtest or a rolling-weight -backtest depending on whether `rebalance_freq` is specified. It also -applies transaction costs and ensures no lookahead bias during rebalancing. +This method performs a walk-forward backtest using the user defined `rebalance_freq` +and `reopt_freq`. It also applies transaction costs and ensures no lookahead bias. For a rolling backtest, any common date values are dropped, the first occurrence is considered to be original and kept. !!! warning "Warning:" Some online learning methods such as `ExponentialGradient` update weights based - on the most recent observations. Setting `rebalance_freq` to any value other - than `1` (or possibly `None`) may result in suboptimal performance, as - intermediate data points will be ignored and not used for weight updates. - Proceed with caution when using other rebalancing frequencies with online learning algorithms. + on the most recent observations. Setting `reopt_freq` to any value other + than `1` may result in suboptimal performance, as intermediate data points will + be ignored and not used for weight updates. **Args:** - `optimizer`: An optimizer object containing the optimization strategy. Accepts both OPES built-in objectives and externally constructed optimizer objects. -- `rebalance_freq` (*int or None, optional*): Frequency of rebalancing (re-optimization) in time steps. If `None`, a static weight backtest is performed. Defaults to `None`. -- `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `None`. +- `rebalance_freq` (*int, optional*): Frequency of rebalancing in time steps. Must be `>= 1`. Defaults to `1`. +- `reopt_freq` (*int, optional*): Frequency of re-optimization in time steps. Must be `>= 1`. Defaults to `1`. +- `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `100`. - `weight_bounds` (*tuple, optional*): Bounds for portfolio weights passed to the optimizer if supported. !!! abstract "Rules for `optimizer` Object" @@ -107,24 +107,35 @@ is considered to be original and kept. - `**kwargs`: For safety against breaking changes. - `optimize` must output weights for the timestep. +!!! note "Note" + - Re-optimization does not automatically imply rebalancing. When the portfolio is re-optimized at a given timestep, weights may or may not be updated depending on the value of `rebalance_freq`. + - To ensure a coherent backtest, a common practice is to choose frequencies such that `reopt_freq % rebalance_freq == 0`. This guarantees that whenever optimization occurs, a rebalance is also performed. + - Also note that within a given timestep, rebalancing, if it occurs, is performed after optimization when optimization is scheduled for that timestep. + +!!! tip "Tip" + Common portfolio styles can be constructed by appropriate choices of `rebalance_freq` and `reopt_freq`: + + - Buy-and-Hold: `rebalance_freq > horizon`, `reopt_freq > horizon` + - Constantly Rebalanced: `rebalance_freq = 1`, `reopt_freq > horizon` + - Fully Dynamic: `rebalance_freq = 1`, `reopt_freq = 1` + **Returns:** - `dict`: Backtest results containing the following keys: - `'returns'` (*np.ndarray*): Portfolio returns after accounting for costs. - `'weights'` (*np.ndarray*): Portfolio weights at each timestep. - `'costs'` (*np.ndarray*): Transaction costs applied at each timestep. - - `'dates'` (*np.ndarray*): Dates on which the backtest was conducted. + - `'timeline'` (*np.ndarray*): Timeline on which the backtest was conducted. **Raises** - `DataError`: If the optimizer does not accept weight bounds but `weight_bounds` are provided. - `PortfolioError`: If input validation fails (via `_backtest_integrity_check`). +- `OptimizationError`: If the underlying optimizer uses optimization and if it fails to optimize. !!! note "Notes:" - All returned arrays are aligned in time and have length equal to the test dataset. - - Static weight backtest: Uses a single set of optimized weights for all test data. This denotes a constant rebalanced portfolio. - - Rolling weight backtest: Re-optimizes weights at intervals defined by `rebalance_freq` using only historical data up to the current point to prevent lookahead bias. - Returns and weights are stored in arrays aligned with test data indices. !!! example "Example:" @@ -132,8 +143,8 @@ is considered to be original and kept. import numpy as np # Importing necessary OPES modules - from opes.objectives.utility_theory import Kelly - from opes.backtester import Backtester + from opes.objectives import Kelly + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -149,7 +160,11 @@ is considered to be original and kept. tester = Backtester(train_data=training, test_data=testing) # Obtaining backtest data for kelly optimizer - kelly_backtest = tester.backtest(optimizer=kelly_optimizer, rebalance_freq=21) + kelly_backtest = tester.backtest( + optimizer=kelly_optimizer, + rebalance_freq=1, # Rebalance daily + reopt_freq=21 # Re-optimize monthly + ) # Printing results for key in kelly_backtest: @@ -214,8 +229,8 @@ commonly used in finance, including volatility, drawdowns and tail risk metrics. !!! example "Example:" ```python # Importing portfolio method and backtester - from opes.objectives.markowitz import MaxSharpe - from opes.backtester import Backtester + from opes.objectives import MaxSharpe + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -280,8 +295,8 @@ a file. !!! example "Example:" ```python # Importing portfolio methods and backtester - from opes.objectives.markowitz import MaxMean, MeanVariance - from opes.backtester import Backtester + from opes.objectives import MaxMean, MeanVariance + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -297,9 +312,9 @@ a file. # Initializing Backtest with constant costs tester = Backtester(train_data=training, test_data=testing) - # Obtaining returns array from backtest for both optimizers (Monthly Rebalancing) - scenario_1 = tester.backtest(optimizer=maxmeanl2, rebalance_freq=21) - scenario_2 = tester.backtest(optimizer=mvo1_5, rebalance_freq=21)['returns'] + # Obtaining returns array from backtest for both optimizers + scenario_1 = tester.backtest(optimizer=maxmeanl2) + scenario_2 = tester.backtest(optimizer=mvo1_5)['returns'] # Plotting wealth tester.plot_wealth( diff --git a/docs/docs/examples/good_strategy.md b/docs/docs/examples/good_strategy.md index 479690b..4429f11 100644 --- a/docs/docs/examples/good_strategy.md +++ b/docs/docs/examples/good_strategy.md @@ -95,10 +95,16 @@ tester = Backtester(train_data=train, test_data=test, cost={'gamma' : (5, 1)}) # Obtaining returns # For now, weights and costs dont matter, so we discard them -return_scenario = tester.backtest(optimizer=mvo_ra08, rebalance_freq=1, clean_weights=True, seed=100)['returns'] +return_scenario = tester.backtest( + optimizer=mvo_ra08, + rebalance_freq=1, + reopt_freq=1, + clean_weights=True, + seed=100 +)['returns'] ``` -We use `rebalance_freq=1` so we can see how the portfolio adapts to changes quickly. `seed=100` gaurantees reproducibility and Gamma slippage captures asymmetric execution costs where extreme liquidity events are rare but painful. After obtaining `return_scenario` we can get the metrics and plot wealth. +We use `rebalance_freq=1` and `reopt_freq=1` so we can see how the portfolio adapts to changes quickly. `seed=100` gaurantees reproducibility and Gamma slippage captures asymmetric execution costs where extreme liquidity events are rare but painful. After obtaining `return_scenario` we can get the metrics and plot wealth. --- diff --git a/docs/docs/examples/if_you_knew_the_future.md b/docs/docs/examples/if_you_knew_the_future.md index 27d5626..f98f297 100644 --- a/docs/docs/examples/if_you_knew_the_future.md +++ b/docs/docs/examples/if_you_knew_the_future.md @@ -90,14 +90,14 @@ The in-sample backtester can be constructed by enforcing `train_data=test` as we # In-sample backtester # zero-cost backtesting tester_in_sample = Backtester(train_data=test, test_data=test, cost={'const' : 0}) -in_sample_results = tester_in_sample.backtest(optimizer=mean_variance, clean_weights=True) +in_sample_results = tester_in_sample.backtest(optimizer=mean_variance, clean_weights=True, reopt_freq=1000) # Obtaining weights and returns from the backtest in_weights = in_sample_results["weights"][0] return_scenario_in = in_sample_results["returns"] ``` -The `rebalance_freq` parameter is defaulted to `None`, imposing a static weight backtest. +The `rebalance_freq` parameter is defaulted to `1` and `reopt_freq` is set to `1000`, imposing a constant rebalanced backtest. ### Out-of-Sample Backtester @@ -107,21 +107,21 @@ The out-of-sample backtester is normally written by feeding training and testing # Out-of-sample backtester # Zero-cost backtesting tester_out_of_sample = Backtester(train_data=train, test_data=test, cost={'const' : 0}) -out_of_sample_results = tester_out_of_sample.backtest(optimizer=mean_variance, clean_weights=True) +out_of_sample_results = tester_out_of_sample.backtest(optimizer=mean_variance, clean_weights=True, reopt_freq=1000) # Obtaining weights and returns from the backtest out_weights = out_of_sample_results["weights"][0] return_scenario_out = out_of_sample_results["returns"] ``` -This is also a static weight backtest. +This is also a constant rebalanced backtest. ### Uniform Portfolio Backtester Since uniform equal weight has constant weights, regardless of test and train data, we can use any backtester to obtain returns. Here we use `tester_in_sample`. ```python -uniform_results = tester_in_sample.backtest(optimizer=uniform_port) +uniform_results = tester_in_sample.backtest(optimizer=uniform_port, reopt_freq=1000) uniform_weights = uniform_results["weights"][0] uniform_scenario = uniform_results["returns"] ``` diff --git a/docs/docs/examples/the_alpha_engine.md b/docs/docs/examples/the_alpha_engine.md index 6dd1c05..1916e02 100644 --- a/docs/docs/examples/the_alpha_engine.md +++ b/docs/docs/examples/the_alpha_engine.md @@ -192,8 +192,8 @@ alpha_strategy = SuperDuperAlphaEngine() # Initialize our backtester tester = Backtester(train_data=train, test_data=test, cost={'const': 40}) -# Backtest with `rebalance_freq` set to 1 for daily momentum -alpha_returns = tester.backtest(optimizer=alpha_strategy, rebalance_freq=1) +# Backtest with `rebalance_freq` and `reopt_freq` set to 1 for daily momentum +alpha_returns = tester.backtest(optimizer=alpha_strategy, rebalance_freq=1, reopt_freq=1) ``` Upon having `alpha_returns` we can use it to plot wealth and get metrics. diff --git a/docs/docs/examples/which_kelly_is_best.md b/docs/docs/examples/which_kelly_is_best.md index 0329581..9010179 100644 --- a/docs/docs/examples/which_kelly_is_best.md +++ b/docs/docs/examples/which_kelly_is_best.md @@ -6,7 +6,7 @@ The Kelly Criterion, proposed by John Larry Kelly Jr., is the mathematically opt There are numerous variants of the Kelly Criterion introduced to combat this fragile dependency, such as fractional Kelly, popularized by Ed Thorpe, and distributionally robust Kelly models. In this example, we compare several of the most well-known Kelly variants under identical out-of-sample conditions, evaluating their realized performance and wealth dynamics using `opes`. !!! warning "Warning:" - This example may be computationally heavy because of multiple optimization models running with a low `rebalance_freq=5`. If you prefer better performance, increase `rebalance_freq` to monthly (`21`) or any value much greater than `5`. + This example may be computationally heavy because of multiple optimization models running with a low `reopt_freq=5`. If you prefer better performance, increase `reopt_freq` to monthly (`21`) or any value much greater than `5`. --- @@ -121,7 +121,7 @@ for distributionally robust variants, we utilize `KLradius` for the ambiguity ra ## Backtesting -Using the `Backtester` class from `opes`, we backtest these strategies under a constant, but high, cost of 20 bps and `rebalance_freq=5` (weekly). Oh, and we clean weights too. +Using the `Backtester` class from `opes`, we backtest these strategies under a constant, but high, cost of 20 bps and `reopt_freq=5` (weekly). `rebalance_freq` is defaulted to `1`. Oh, and we clean weights too. ```python # A constant slippage backtest @@ -129,12 +129,12 @@ tester = Backtester(train_data=train, test_data=test, cost={'const' : 20}) # Obtaining returns # For now, weights and costs dont matter, so we discard them -ck_scenario = tester.backtest(optimizer=classic_kelly, rebalance_freq=5, clean_weights=True)['returns'] -hk_scenario = tester.backtest(optimizer=half_kelly, rebalance_freq=5, clean_weights=True)['returns'] -qk_scenario = tester.backtest(optimizer=quarter_kelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrk_scenario = tester.backtest(optimizer=kldr_kelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrhk_scenario = tester.backtest(optimizer=kldr_halfkelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrqk_scenario = tester.backtest(optimizer=kldr_quarterkelly, rebalance_freq=5, clean_weights=True)['returns'] +ck_scenario = tester.backtest(optimizer=classic_kelly, reopt_freq=5, clean_weights=True)['returns'] +hk_scenario = tester.backtest(optimizer=half_kelly, reopt_freq=5, clean_weights=True)['returns'] +qk_scenario = tester.backtest(optimizer=quarter_kelly, reopt_freq=5, clean_weights=True)['returns'] +kldrk_scenario = tester.backtest(optimizer=kldr_kelly, reopt_freq=5, clean_weights=True)['returns'] +kldrhk_scenario = tester.backtest(optimizer=kldr_halfkelly, reopt_freq=5, clean_weights=True)['returns'] +kldrqk_scenario = tester.backtest(optimizer=kldr_quarterkelly, reopt_freq=5, clean_weights=True)['returns'] ``` --- diff --git a/docs/docs/objectives/heuristics.md b/docs/docs/objectives/heuristics.md index be77498..e61311c 100644 --- a/docs/docs/objectives/heuristics.md +++ b/docs/docs/objectives/heuristics.md @@ -23,7 +23,7 @@ class HierarchicalRiskParity(cluster_method='average') Hierarchical Risk Parity (HRP) optimization. -Hierarchical Risk Parity (HRP), introduced by L≤pez de Prado, +Hierarchical Risk Parity (HRP), introduced by Lopez de Prado, is a portfolio construction methodology that allocates capital through hierarchical clustering and recursive risk balancing rather than direct optimization of a scalar objective. HRP diff --git a/opes/__init__.py b/opes/__init__.py index 14a6b0b..89bcd1e 100644 --- a/opes/__init__.py +++ b/opes/__init__.py @@ -1,5 +1,5 @@ # Version Log -__version__ = "0.10.0" +__version__ = "0.11.0" # Backtester easy import from .backtester import Backtester diff --git a/opes/backtester.py b/opes/backtester.py index a67c55a..0fa6209 100644 --- a/opes/backtester.py +++ b/opes/backtester.py @@ -10,7 +10,7 @@ --- """ -from numbers import Real +from numbers import Real, Integral as Integer import time import inspect @@ -20,7 +20,7 @@ import matplotlib.pyplot as plt from opes.errors import PortfolioError, DataError -from opes.utils import slippage, extract_trim +from opes.utils import extract_trim class Backtester: @@ -79,7 +79,7 @@ def __init__(self, train_data=None, test_data=None, cost={"const": 10.0}): self.cost = cost def _backtest_integrity_check( - self, optimizer, rebalance_freq, seed, cleanweights=False + self, optimizer, rebalance_freq, reopt_freq, seed, cleanweights=False ): # Checking train and test data validity if not isinstance(self.train, pd.DataFrame): @@ -111,11 +111,15 @@ def _backtest_integrity_check( f"Expected optimizer object to have 'optimize' attribute." ) # Checking rebalance frequency type and validity - if rebalance_freq is not None: - if rebalance_freq <= 0 or not isinstance(rebalance_freq, int): - raise PortfolioError( - f"Invalid rebalance frequency. Expected integer within bounds [1,T], Got {rebalance_freq}" - ) + if not isinstance(rebalance_freq, Integer) or rebalance_freq <= 0: + raise PortfolioError( + f"Invalid rebalance frequency. Expected integer within bounds [1,T], Got {rebalance_freq}" + ) + # Checking re-optimization frequency type validity + if not isinstance(reopt_freq, Integer) or reopt_freq <= 0: + raise PortfolioError( + f"Invalid re-optimization frequency. Expected integer within bounds [1,T], Got {reopt_freq}" + ) # Validiating numpy seed if seed is not None and not isinstance(seed, int): raise PortfolioError(f"Invalid seed. Expected integer or None, Got {seed}") @@ -150,35 +154,116 @@ def _backtest_integrity_check( f"Invalid jump cost model parameter length. Expected 3, got {len(self.cost[first_key])}" ) + # Helper method to combine datasets + # Combines training and testing data upto a particular timestep + def _combine_datasets(self, upto_timestep): + # NO LOOKAHEAD BIAS + # Rebalance at timestep t using only past data (up to t, exclusive) to avoid lookahead bias + # Training data is pre-cleaned (no NaNs), test data up to t is also NaN-free + # Concatenating them preserves this property; dropna() handles edge cases safely + # The optimizer therefore only sees information available until the current decision point + combined_dataset = pd.concat([self.train, self.test.iloc[:upto_timestep]]) + combined_dataset = combined_dataset[ + ~combined_dataset.index.duplicated(keep="first") + ].dropna() + + return combined_dataset + + # Helper method to compute drifted weights for timestep + # Returns the realized drifted weights for the timestep + def _compute_drifted_weights(self, w_prev, returns): + w_prev = np.asarray(w_prev) + w_realized = (w_prev * (1 + returns)) / (1 + np.sum(w_prev * returns)) + + return w_realized + + # Helper method to compute costs array + # Returns either a constant or an array of length=horizon of cost + def _slippage_costs(self, cost, horizon, numpy_seed=None): + # Setting numpy seed and finding cost parameters + numpy_rng = np.random.default_rng(numpy_seed) + cost_key = next(iter(cost)).lower() + cost_params = cost[cost_key] + + # ---------- COST MODELS ---------- + match cost_key: + # Constant slippage + case "const": + return cost_params / 10000 + # Gamma distributed slippage + case "gamma": + return ( + numpy_rng.gamma( + shape=cost_params[0], scale=cost_params[1], size=horizon + ) + / 10000 + ) + # Lognormally distributed slippage + case "lognormal": + return ( + numpy_rng.lognormal( + mean=cost_params[0], sigma=cost_params[1], size=horizon + ) + / 10000 + ) + # Inverse gaussian slippage + case "inversegaussian": + return ( + numpy_rng.wald( + mean=cost_params[0], scale=cost_params[1], size=horizon + ) + / 10000 + ) + # Compound-poisson lognormal slippage (jump process) + case "jump": + N = numpy_rng.poisson(cost_params[0], size=horizon) + jump_cost = np.array( + [ + ( + np.sum( + numpy_rng.lognormal( + mean=cost_params[1], sigma=cost_params[2], size=n + ) + ) + if n > 0 + else 0 + ) + for n in N + ] + ) + return jump_cost / 10000 + case _: + raise DataError(f"Unknown cost model: {cost_key}") + def backtest( self, optimizer, - rebalance_freq=None, - seed=None, + rebalance_freq=1, + reopt_freq=1, + seed=100, weight_bounds=None, clean_weights=False, ): """ Execute a portfolio backtest over the test dataset using a given optimizer. - This method performs either a static-weight backtest or a rolling-weight - backtest depending on whether `rebalance_freq` is specified. It also - applies transaction costs and ensures no lookahead bias during rebalancing. + This method performs a walk-forward backtest using the user defined `rebalance_freq` + and `reopt_freq`. It also applies transaction costs and ensures no lookahead bias. For a rolling backtest, any common date values are dropped, the first occurrence is considered to be original and kept. !!! warning "Warning:" Some online learning methods such as `ExponentialGradient` update weights based - on the most recent observations. Setting `rebalance_freq` to any value other - than `1` (or possibly `None`) may result in suboptimal performance, as - intermediate data points will be ignored and not used for weight updates. - Proceed with caution when using other rebalancing frequencies with online learning algorithms. + on the most recent observations. Setting `reopt_freq` to any value other + than `1` may result in suboptimal performance, as intermediate data points will + be ignored and not used for weight updates. **Args:** - `optimizer`: An optimizer object containing the optimization strategy. Accepts both OPES built-in objectives and externally constructed optimizer objects. - - `rebalance_freq` (*int or None, optional*): Frequency of rebalancing (re-optimization) in time steps. If `None`, a static weight backtest is performed. Defaults to `None`. - - `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `None`. + - `rebalance_freq` (*int, optional*): Frequency of rebalancing in time steps. Must be `>= 1`. Defaults to `1`. + - `reopt_freq` (*int, optional*): Frequency of re-optimization in time steps. Must be `>= 1`. Defaults to `1`. + - `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `100`. - `weight_bounds` (*tuple, optional*): Bounds for portfolio weights passed to the optimizer if supported. !!! abstract "Rules for `optimizer` Object" @@ -188,22 +273,33 @@ def backtest( - `**kwargs`: For safety against breaking changes. - `optimize` must output weights for the timestep. + !!! note "Note" + - Re-optimization does not automatically imply rebalancing. When the portfolio is re-optimized at a given timestep, weights may or may not be updated depending on the value of `rebalance_freq`. + - To ensure a coherent backtest, a common practice is to choose frequencies such that `reopt_freq % rebalance_freq == 0`. This guarantees that whenever optimization occurs, a rebalance is also performed. + - Also note that within a given timestep, rebalancing, if it occurs, is performed after optimization when optimization is scheduled for that timestep. + + !!! tip "Tip" + Common portfolio styles can be constructed by appropriate choices of `rebalance_freq` and `reopt_freq`: + + - Buy-and-Hold: `rebalance_freq > horizon`, `reopt_freq > horizon` + - Constantly Rebalanced: `rebalance_freq = 1`, `reopt_freq > horizon` + - Fully Dynamic: `rebalance_freq = 1`, `reopt_freq = 1` + **Returns:** - `dict`: Backtest results containing the following keys: - `'returns'` (*np.ndarray*): Portfolio returns after accounting for costs. - `'weights'` (*np.ndarray*): Portfolio weights at each timestep. - `'costs'` (*np.ndarray*): Transaction costs applied at each timestep. - - `'dates'` (*np.ndarray*): Dates on which the backtest was conducted. + - `'timeline'` (*np.ndarray*): Timeline on which the backtest was conducted. Raises: DataError: If the optimizer does not accept weight bounds but `weight_bounds` are provided. PortfolioError: If input validation fails (via `_backtest_integrity_check`). + OptimizationError: If the underlying optimizer uses optimization and if it fails to optimize. !!! note "Notes:" - All returned arrays are aligned in time and have length equal to the test dataset. - - Static weight backtest: Uses a single set of optimized weights for all test data. This denotes a constant rebalanced portfolio. - - Rolling weight backtest: Re-optimizes weights at intervals defined by `rebalance_freq` using only historical data up to the current point to prevent lookahead bias. - Returns and weights are stored in arrays aligned with test data indices. !!! example "Example:" @@ -211,8 +307,8 @@ def backtest( import numpy as np # Importing necessary OPES modules - from opes.objectives.utility_theory import Kelly - from opes.backtester import Backtester + from opes.objectives import Kelly + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -228,137 +324,120 @@ def backtest( tester = Backtester(train_data=training, test_data=testing) # Obtaining backtest data for kelly optimizer - kelly_backtest = tester.backtest(optimizer=kelly_optimizer, rebalance_freq=21) + kelly_backtest = tester.backtest( + optimizer=kelly_optimizer, + rebalance_freq=1, # Rebalance daily + reopt_freq=21 # Re-optimize monthly + ) # Printing results for key in kelly_backtest: print(f"{key}: {kelly_backtest[key]}") ``` """ - # Running backtester integrity checks, extracting test return data and caching optimizer parameters + # Running backtester integrity checks, extracting test return data and caching values self._backtest_integrity_check( - optimizer, rebalance_freq, seed, cleanweights=clean_weights + optimizer, rebalance_freq, reopt_freq, seed, cleanweights=clean_weights ) test_data = extract_trim(self.test)[1] optimizer_parameters = inspect.signature(optimizer.optimize).parameters - - # ---------- BACKTEST LOOPS ---------- - - # Static weight backtest - if rebalance_freq is None: - - # ---------- STATIC OPTIMIZATION BLOCK ---------- - - # Using weight bounds if it is given AND if it is present as a parameter within optimize method - # Otherwise weights are optimized without weight bounds argument - kwargs = {} - # Checking for weight_bounds - if weight_bounds is not None and "weight_bounds" in optimizer_parameters: - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - weights = optimizer.optimize(self.train, **kwargs) - - # ------------------------------------------------ - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - weights = optimizer.clean_weights() - # Repeating same weights for remaining timeline for static backtest - weights_array = np.tile(weights, (len(test_data), 1)) - - # Rolling weight (Walk-forward) backtest - if rebalance_freq is not None: - # Initializing weights list - # NOTE: More readable than initializing a 2D numpy array - weights = [None] * len(test_data) - - # ---------- INITIAL OPTIMIZATION BLOCK ---------- - - # First optimization is done manually using training data - # Using weight bounds if it is given AND if it is present as a parameter within optimize method - # Otherwise weights are optimized without weight bounds argument - kwargs = {} - # Checking for weight_bounds - if weight_bounds is not None and "weight_bounds" in optimizer_parameters: - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - temp_weights = optimizer.optimize(self.train, **kwargs) - - # -------------------------------------------------- - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - temp_weights = optimizer.clean_weights() + horizon = len(test_data) + + # ---------- BACKTEST LOOP ---------- + + # Initializing weights list and turnover array + # NOTE: More readable than initializing a 2D numpy array + weights = [None] * horizon + turnover_array = np.zeros(horizon) + + # First optimization is done manually using training data + # Using weight bounds if it is given AND if it is present as a parameter within optimize method + # Otherwise weights are optimized without weight bounds argument + kwargs = {} + # Checking for weight_bounds + if weight_bounds is not None and "weight_bounds" in optimizer_parameters: + kwargs["weight_bounds"] = weight_bounds + # Optimizing for the timestep + optimized_weights = optimizer.optimize(self.train, **kwargs) + + # Cleaning weights if true and if optimizer has method + if clean_weights and hasattr(optimizer, "clean_weights"): + optimized_weights = optimizer.clean_weights() + + # Assigning computed weights to weight array + weights[0] = optimized_weights + optimizer_parameters = optimizer_parameters + + # For loop through timesteps to automate remaining walk-forward test + for t in range(1, horizon): + + # ---------- RE-OPTIMIZATION BLOCK ---------- + # Re-optimization check during appropriate frequency + # If the check is satisfied optimization is taken place and the new weights are computed + # NOTE: Rebalancing is handled separately using `rebalance_freq` + if t % reopt_freq == 0: + + combined_dataset = self._combine_datasets(upto_timestep=t) + + # We find if 'w' and 'weight_bounds' parameters are present within the optimizer + # The parameters which are present are leveraged (Eg. warm start, weight updates for 'w') + # Otherwise it is optimized without any extra arguments + kwargs = {} + if "w" in optimizer_parameters: + kwargs["w"] = optimized_weights + if ( + weight_bounds is not None + and "weight_bounds" in optimizer_parameters + ): + kwargs["weight_bounds"] = weight_bounds + + # Optimizing for the timestep + optimized_weights = optimizer.optimize(combined_dataset, **kwargs) + + # Cleaning weights if true and if optimizer has method + if clean_weights and hasattr(optimizer, "clean_weights"): + optimized_weights = optimizer.clean_weights() + + # ---------- REBALANCING BLOCK ---------- + # Computing drifted weights + # This is necessary for turnover and slippage modelling + drifted_weights = self._compute_drifted_weights( + weights[t - 1], test_data[t] + ) # Assigning computed weights to weight array - weights[0] = temp_weights - optimizer_parameters = optimizer_parameters - - # For loop through timesteps to automate remaining walk-forward test - for t in range(1, len(test_data)): - - # Rebalancing (Re-optimizing) during appropriate frequency - if t % rebalance_freq == 0: - - # ---------- WALK FORWARD OPTIMIZATION BLOCK ---------- - - # NO LOOKAHEAD BIAS - # Rebalance at timestep t using only past data (up to t, exclusive) to avoid lookahead bias - # Training data is pre-cleaned (no NaNs), test data up to t is also NaN-free - # Concatenating them preserves this property; dropna() handles edge cases safely - # The optimizer therefore only sees information available until the current decision point - combined_dataset = pd.concat([self.train, self.test.iloc[:t]]) - combined_dataset = combined_dataset[ - ~combined_dataset.index.duplicated(keep="first") - ].dropna() - # We find if 'w' and 'weight_bounds' parameters are present within the optimizer - # The parameters which are present are leveraged (Eg. warm start, weight updates for 'w') - # Otherwise it is optimized without any extra arguments - kwargs = {} - # Checking for w - if "w" in optimizer_parameters: - kwargs["w"] = temp_weights - # Checking for weight_bounds - if ( - weight_bounds is not None - and "weight_bounds" in optimizer_parameters - ): - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - temp_weights = optimizer.optimize(combined_dataset, **kwargs) - - # ------------------------------------------------------------ - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - temp_weights = optimizer.clean_weights(temp_weights) - - # Assigning computed weights to weight array - weights[t] = temp_weights - - # Creating vertical stack for vectorization - weights_array = np.vstack(weights) + # If rebalance frequency is satisfied, then the weights for the timestep is the optimized weights + # Otherwise, the weights for the timestep is the drifted (realized) weights + if t % rebalance_freq == 0: + weights[t] = optimized_weights + else: + weights[t] = drifted_weights + + # ---------- TURNOVER BLOCK ---------- + # Computing turnover + # turnover is the L1 distance from current weights to drifted weights + # If not rebalanced, turnover is 0 + turnover_for_timestep = np.sum(np.abs(weights[t] - drifted_weights)) + turnover_array[t] = turnover_for_timestep # --------- POST PROCESSING BLOCK --------- - - # Vectorizing portfolio returns, finding cost array and finding final portfolio returns after costs - portfolio_returns = np.einsum("ij,ij->i", weights_array, test_data) - costs_array = slippage( - weights=weights_array, - returns=portfolio_returns, - cost=self.cost, - numpy_seed=seed, + # Creating vertical stack for vectorization + weights_array = np.vstack(weights) + # Computing slippage costs over time, vectorizing portfolio returns and finding final portfolio returns after costs + costs_array = turnover_array * self._slippage_costs( + cost=self.cost, horizon=horizon, numpy_seed=seed ) + portfolio_returns = np.einsum("ij,ij->i", weights_array, test_data) portfolio_returns -= costs_array - # Finding dates array from test data + # Finding timeline array from test data # NOTE: the first value is excluded since pct_change() drops the first date for return construction - dates = self.test.index.to_numpy()[1:] + timeline_array = self.test.index.to_numpy()[1:] return { "returns": portfolio_returns, "weights": weights_array, "costs": costs_array, - "timeline": dates, + "timeline": timeline_array, } def get_metrics(self, returns): @@ -413,8 +492,8 @@ def get_metrics(self, returns): !!! example "Example:" ```python # Importing portfolio method and backtester - from opes.objectives.markowitz import MaxSharpe - from opes.backtester import Backtester + from opes.objectives import MaxSharpe + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -513,8 +592,8 @@ def plot_wealth( !!! example "Example:" ```python # Importing portfolio methods and backtester - from opes.objectives.markowitz import MaxMean, MeanVariance - from opes.backtester import Backtester + from opes.objectives import MaxMean, MeanVariance + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -530,9 +609,9 @@ def plot_wealth( # Initializing Backtest with constant costs tester = Backtester(train_data=training, test_data=testing) - # Obtaining returns array from backtest for both optimizers (Monthly Rebalancing) - scenario_1 = tester.backtest(optimizer=maxmeanl2, rebalance_freq=21) - scenario_2 = tester.backtest(optimizer=mvo1_5, rebalance_freq=21)['returns'] + # Obtaining returns array from backtest for both optimizers + scenario_1 = tester.backtest(optimizer=maxmeanl2) + scenario_2 = tester.backtest(optimizer=mvo1_5)['returns'] # Plotting wealth tester.plot_wealth( diff --git a/opes/objectives/heuristics.py b/opes/objectives/heuristics.py index bf85897..ab4d4c2 100644 --- a/opes/objectives/heuristics.py +++ b/opes/objectives/heuristics.py @@ -885,7 +885,7 @@ class HierarchicalRiskParity(Optimizer): """ Hierarchical Risk Parity (HRP) optimization. - Hierarchical Risk Parity (HRP), introduced by López de Prado, + Hierarchical Risk Parity (HRP), introduced by Lopez de Prado, is a portfolio construction methodology that allocates capital through hierarchical clustering and recursive risk balancing rather than direct optimization of a scalar objective. HRP diff --git a/opes/utils.py b/opes/utils.py index 7f3acc6..315fa07 100644 --- a/opes/utils.py +++ b/opes/utils.py @@ -61,68 +61,6 @@ def find_constraint(bounds, constraint_type=1): return constraint_list -# Slippage function -def slippage(weights, returns, cost, numpy_seed=None): - numpy_rng = np.random.default_rng(numpy_seed) - turnover_array = np.zeros(len(weights)) - # Loop range is from 1 to horizon. Rebalancing happens from t=1 - for i in range(1, len(weights)): - w_current = weights[i] - w_prev = weights[i - 1] - w_realized = (w_prev * (1 + returns[i])) / (1 + np.sum(w_prev * returns[i])) - turnover = np.sum(np.abs(w_current - w_realized)) - turnover_array[i] = turnover - # Deciding slippage model using cost key - cost_key = next(iter(cost)).lower() - cost_params = cost[cost_key] - # Constant slippage - if cost_key == "const": - return turnover_array * cost_params / 10000 - horizon = len(turnover_array) - # Gamma distributed slippage - if cost_key == "gamma": - return ( - turnover_array - * numpy_rng.gamma(shape=cost_params[0], scale=cost_params[1], size=horizon) - / 10000 - ) - # Lognormally distributed slippage - elif cost_key == "lognormal": - return ( - turnover_array - * numpy_rng.lognormal( - mean=cost_params[0], sigma=cost_params[1], size=horizon - ) - / 10000 - ) - # Inverse gaussian slippage - elif cost_key == "inversegaussian": - return ( - turnover_array - * numpy_rng.wald(mean=cost_params[0], scale=cost_params[1], size=horizon) - / 10000 - ) - # Compound poisson slippage (jump process) - elif cost_key == "jump": - N = numpy_rng.poisson(cost_params[0], size=horizon) - jump_cost = np.array( - [ - ( - np.sum( - numpy_rng.lognormal( - mean=cost_params[1], sigma=cost_params[2], size=n - ) - ) - if n > 0 - else 0 - ) - for n in N - ] - ) - return turnover_array * jump_cost / 10000 - raise DataError(f"Unknown cost model: {cost_key}") - - # Data integrity checker def test_integrity( tickers, diff --git a/pyproject.toml b/pyproject.toml index 106cdef..deea1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dev = ["pytest", "other-dev-packages"] [project] name = "opes" -version = "0.10.0" +version = "0.11.0" description = "A research-focused portfolio optimization and backtesting engine." readme = "README.md" requires-python = ">=3.10" diff --git a/tests/backtester_test.py b/tests/backtester_test.py index abc3857..25c5698 100644 --- a/tests/backtester_test.py +++ b/tests/backtester_test.py @@ -250,8 +250,10 @@ def test_backtest_engine(prices_df): # Executing static backtest static_details = test_backtester.backtest(opt) - # Executing rolling backtest (Daily rebalancing) - rolling_details = test_backtester.backtest(opt, rebalance_freq=1) + # Executing rolling backtest (Daily rebalancing & reoptimization) + rolling_details = test_backtester.backtest( + opt, rebalance_freq=1, reopt_freq=1 + ) # Getting static and rolling results static_results = validate_backtest_results(static_details) From b127e4246501bf3972de782b7ac4c6cee883d41c Mon Sep 17 00:00:00 2001 From: Nitin Tony Paul <108007300+nitintonypaul@users.noreply.github.com> Date: Mon, 26 Jan 2026 18:14:58 +0530 Subject: [PATCH 4/4] [FIX] `backtest` bug in `Backtester` Fixed backtesting bug where the return data was misaligned for computing drifted weights. --- opes/backtester.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/opes/backtester.py b/opes/backtester.py index 0fa6209..330a305 100644 --- a/opes/backtester.py +++ b/opes/backtester.py @@ -274,6 +274,7 @@ def backtest( - `optimize` must output weights for the timestep. !!! note "Note" + - The backtest assumes portfolio weights are applied at the open of each timestep, with zero execution delay. - Re-optimization does not automatically imply rebalancing. When the portfolio is re-optimized at a given timestep, weights may or may not be updated depending on the value of `rebalance_freq`. - To ensure a coherent backtest, a common practice is to choose frequencies such that `reopt_freq % rebalance_freq == 0`. This guarantees that whenever optimization occurs, a rebalance is also performed. - Also note that within a given timestep, rebalancing, if it occurs, is performed after optimization when optimization is scheduled for that timestep. @@ -401,8 +402,11 @@ def backtest( # ---------- REBALANCING BLOCK ---------- # Computing drifted weights # This is necessary for turnover and slippage modelling + # NOTE: weights and returns of the previous timestep are passed in to compute drifted weights + # This is because weights[0], which is to be set on the beginning of the zeroth day is separately computed + # Therefore, as the loop starts from 1, the return from the zeroth day will cause the first drifted weights on the start of the first day (end of zeroth day) drifted_weights = self._compute_drifted_weights( - weights[t - 1], test_data[t] + weights[t - 1], test_data[t - 1] ) # Assigning computed weights to weight array