diff --git a/README.md b/README.md index a5c68d9..9b0a556 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,12 @@ Visit the [documentation](https://opes.pages.dev) for detailed insights on OPES. --- +## Project Methodology + +This project follows an Agile development approach. Every feature is designed to be extensible, exploratory and open to modification as the system evolves. Each GitHub commit represents a usable and coherent version of OPES. While not every commit is feature-complete or fully refined, each serves as a stable minimum viable product and a reliable snapshot of progress. Features marked as *experimental* are subject to active evaluation and will be either validated and promoted or removed entirely based on feasibility and empirical performance. + +--- + ## Disclaimer The information provided by OPES is for educational, research and informational purposes only. It is not intended as financial, investment or legal advice. Users should conduct their own due diligence and consult with licensed financial professionals before making any investment decisions. OPES and its contributors are not liable for any financial losses or decisions made based on this content. Past performance is not indicative of future results. @@ -192,16 +198,3 @@ GOOG, AAPL, AMZN, MSFT ``` The price data is stored in the `prices.csv` file within the `tests/` directory. The number of tickers are limited to 4 since there are computationally heavy portfolio objectives (like `UniversalPortfolios`) included which may take an eternity to test well using multiple tickers. - -Also it eats up RAM like pac-man. - ---- - -## Upcoming Features (Unconfirmed) - -These features are still in the works and may or may not appear in later updates: - -| **Objective Name (Category)** | -| ------------------------------------------------ | -| Online Newton Step (Online Learning) | -| ADA-BARRONS (Online Learning) | diff --git a/docs/docs/backtesting.md b/docs/docs/backtesting.md index 34aa631..bc714b4 100644 --- a/docs/docs/backtesting.md +++ b/docs/docs/backtesting.md @@ -71,8 +71,9 @@ It also stores transaction cost parameters for portfolio simulations. ```python def backtest( optimizer, - rebalance_freq=None, - seed=None, + rebalance_freq=1, + reopt_freq=1, + seed=100, weight_bounds=None, clean_weights=False ) @@ -80,24 +81,23 @@ def backtest( Execute a portfolio backtest over the test dataset using a given optimizer. -This method performs either a static-weight backtest or a rolling-weight -backtest depending on whether `rebalance_freq` is specified. It also -applies transaction costs and ensures no lookahead bias during rebalancing. +This method performs a walk-forward backtest using the user defined `rebalance_freq` +and `reopt_freq`. It also applies transaction costs and ensures no lookahead bias. For a rolling backtest, any common date values are dropped, the first occurrence is considered to be original and kept. !!! warning "Warning:" Some online learning methods such as `ExponentialGradient` update weights based - on the most recent observations. Setting `rebalance_freq` to any value other - than `1` (or possibly `None`) may result in suboptimal performance, as - intermediate data points will be ignored and not used for weight updates. - Proceed with caution when using other rebalancing frequencies with online learning algorithms. + on the most recent observations. Setting `reopt_freq` to any value other + than `1` may result in suboptimal performance, as intermediate data points will + be ignored and not used for weight updates. **Args:** - `optimizer`: An optimizer object containing the optimization strategy. Accepts both OPES built-in objectives and externally constructed optimizer objects. -- `rebalance_freq` (*int or None, optional*): Frequency of rebalancing (re-optimization) in time steps. If `None`, a static weight backtest is performed. Defaults to `None`. -- `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `None`. +- `rebalance_freq` (*int, optional*): Frequency of rebalancing in time steps. Must be `>= 1`. Defaults to `1`. +- `reopt_freq` (*int, optional*): Frequency of re-optimization in time steps. Must be `>= 1`. Defaults to `1`. +- `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `100`. - `weight_bounds` (*tuple, optional*): Bounds for portfolio weights passed to the optimizer if supported. !!! abstract "Rules for `optimizer` Object" @@ -107,24 +107,35 @@ is considered to be original and kept. - `**kwargs`: For safety against breaking changes. - `optimize` must output weights for the timestep. +!!! note "Note" + - Re-optimization does not automatically imply rebalancing. When the portfolio is re-optimized at a given timestep, weights may or may not be updated depending on the value of `rebalance_freq`. + - To ensure a coherent backtest, a common practice is to choose frequencies such that `reopt_freq % rebalance_freq == 0`. This guarantees that whenever optimization occurs, a rebalance is also performed. + - Also note that within a given timestep, rebalancing, if it occurs, is performed after optimization when optimization is scheduled for that timestep. + +!!! tip "Tip" + Common portfolio styles can be constructed by appropriate choices of `rebalance_freq` and `reopt_freq`: + + - Buy-and-Hold: `rebalance_freq > horizon`, `reopt_freq > horizon` + - Constantly Rebalanced: `rebalance_freq = 1`, `reopt_freq > horizon` + - Fully Dynamic: `rebalance_freq = 1`, `reopt_freq = 1` + **Returns:** - `dict`: Backtest results containing the following keys: - `'returns'` (*np.ndarray*): Portfolio returns after accounting for costs. - `'weights'` (*np.ndarray*): Portfolio weights at each timestep. - `'costs'` (*np.ndarray*): Transaction costs applied at each timestep. - - `'dates'` (*np.ndarray*): Dates on which the backtest was conducted. + - `'timeline'` (*np.ndarray*): Timeline on which the backtest was conducted. **Raises** - `DataError`: If the optimizer does not accept weight bounds but `weight_bounds` are provided. - `PortfolioError`: If input validation fails (via `_backtest_integrity_check`). +- `OptimizationError`: If the underlying optimizer uses optimization and if it fails to optimize. !!! note "Notes:" - All returned arrays are aligned in time and have length equal to the test dataset. - - Static weight backtest: Uses a single set of optimized weights for all test data. This denotes a constant rebalanced portfolio. - - Rolling weight backtest: Re-optimizes weights at intervals defined by `rebalance_freq` using only historical data up to the current point to prevent lookahead bias. - Returns and weights are stored in arrays aligned with test data indices. !!! example "Example:" @@ -132,8 +143,8 @@ is considered to be original and kept. import numpy as np # Importing necessary OPES modules - from opes.objectives.utility_theory import Kelly - from opes.backtester import Backtester + from opes.objectives import Kelly + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -149,7 +160,11 @@ is considered to be original and kept. tester = Backtester(train_data=training, test_data=testing) # Obtaining backtest data for kelly optimizer - kelly_backtest = tester.backtest(optimizer=kelly_optimizer, rebalance_freq=21) + kelly_backtest = tester.backtest( + optimizer=kelly_optimizer, + rebalance_freq=1, # Rebalance daily + reopt_freq=21 # Re-optimize monthly + ) # Printing results for key in kelly_backtest: @@ -214,8 +229,8 @@ commonly used in finance, including volatility, drawdowns and tail risk metrics. !!! example "Example:" ```python # Importing portfolio method and backtester - from opes.objectives.markowitz import MaxSharpe - from opes.backtester import Backtester + from opes.objectives import MaxSharpe + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -280,8 +295,8 @@ a file. !!! example "Example:" ```python # Importing portfolio methods and backtester - from opes.objectives.markowitz import MaxMean, MeanVariance - from opes.backtester import Backtester + from opes.objectives import MaxMean, MeanVariance + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -297,9 +312,9 @@ a file. # Initializing Backtest with constant costs tester = Backtester(train_data=training, test_data=testing) - # Obtaining returns array from backtest for both optimizers (Monthly Rebalancing) - scenario_1 = tester.backtest(optimizer=maxmeanl2, rebalance_freq=21) - scenario_2 = tester.backtest(optimizer=mvo1_5, rebalance_freq=21)['returns'] + # Obtaining returns array from backtest for both optimizers + scenario_1 = tester.backtest(optimizer=maxmeanl2) + scenario_2 = tester.backtest(optimizer=mvo1_5)['returns'] # Plotting wealth tester.plot_wealth( diff --git a/docs/docs/examples/good_strategy.md b/docs/docs/examples/good_strategy.md index 479690b..4429f11 100644 --- a/docs/docs/examples/good_strategy.md +++ b/docs/docs/examples/good_strategy.md @@ -95,10 +95,16 @@ tester = Backtester(train_data=train, test_data=test, cost={'gamma' : (5, 1)}) # Obtaining returns # For now, weights and costs dont matter, so we discard them -return_scenario = tester.backtest(optimizer=mvo_ra08, rebalance_freq=1, clean_weights=True, seed=100)['returns'] +return_scenario = tester.backtest( + optimizer=mvo_ra08, + rebalance_freq=1, + reopt_freq=1, + clean_weights=True, + seed=100 +)['returns'] ``` -We use `rebalance_freq=1` so we can see how the portfolio adapts to changes quickly. `seed=100` gaurantees reproducibility and Gamma slippage captures asymmetric execution costs where extreme liquidity events are rare but painful. After obtaining `return_scenario` we can get the metrics and plot wealth. +We use `rebalance_freq=1` and `reopt_freq=1` so we can see how the portfolio adapts to changes quickly. `seed=100` gaurantees reproducibility and Gamma slippage captures asymmetric execution costs where extreme liquidity events are rare but painful. After obtaining `return_scenario` we can get the metrics and plot wealth. --- diff --git a/docs/docs/examples/if_you_knew_the_future.md b/docs/docs/examples/if_you_knew_the_future.md index 27d5626..f98f297 100644 --- a/docs/docs/examples/if_you_knew_the_future.md +++ b/docs/docs/examples/if_you_knew_the_future.md @@ -90,14 +90,14 @@ The in-sample backtester can be constructed by enforcing `train_data=test` as we # In-sample backtester # zero-cost backtesting tester_in_sample = Backtester(train_data=test, test_data=test, cost={'const' : 0}) -in_sample_results = tester_in_sample.backtest(optimizer=mean_variance, clean_weights=True) +in_sample_results = tester_in_sample.backtest(optimizer=mean_variance, clean_weights=True, reopt_freq=1000) # Obtaining weights and returns from the backtest in_weights = in_sample_results["weights"][0] return_scenario_in = in_sample_results["returns"] ``` -The `rebalance_freq` parameter is defaulted to `None`, imposing a static weight backtest. +The `rebalance_freq` parameter is defaulted to `1` and `reopt_freq` is set to `1000`, imposing a constant rebalanced backtest. ### Out-of-Sample Backtester @@ -107,21 +107,21 @@ The out-of-sample backtester is normally written by feeding training and testing # Out-of-sample backtester # Zero-cost backtesting tester_out_of_sample = Backtester(train_data=train, test_data=test, cost={'const' : 0}) -out_of_sample_results = tester_out_of_sample.backtest(optimizer=mean_variance, clean_weights=True) +out_of_sample_results = tester_out_of_sample.backtest(optimizer=mean_variance, clean_weights=True, reopt_freq=1000) # Obtaining weights and returns from the backtest out_weights = out_of_sample_results["weights"][0] return_scenario_out = out_of_sample_results["returns"] ``` -This is also a static weight backtest. +This is also a constant rebalanced backtest. ### Uniform Portfolio Backtester Since uniform equal weight has constant weights, regardless of test and train data, we can use any backtester to obtain returns. Here we use `tester_in_sample`. ```python -uniform_results = tester_in_sample.backtest(optimizer=uniform_port) +uniform_results = tester_in_sample.backtest(optimizer=uniform_port, reopt_freq=1000) uniform_weights = uniform_results["weights"][0] uniform_scenario = uniform_results["returns"] ``` diff --git a/docs/docs/examples/the_alpha_engine.md b/docs/docs/examples/the_alpha_engine.md index 6dd1c05..1916e02 100644 --- a/docs/docs/examples/the_alpha_engine.md +++ b/docs/docs/examples/the_alpha_engine.md @@ -192,8 +192,8 @@ alpha_strategy = SuperDuperAlphaEngine() # Initialize our backtester tester = Backtester(train_data=train, test_data=test, cost={'const': 40}) -# Backtest with `rebalance_freq` set to 1 for daily momentum -alpha_returns = tester.backtest(optimizer=alpha_strategy, rebalance_freq=1) +# Backtest with `rebalance_freq` and `reopt_freq` set to 1 for daily momentum +alpha_returns = tester.backtest(optimizer=alpha_strategy, rebalance_freq=1, reopt_freq=1) ``` Upon having `alpha_returns` we can use it to plot wealth and get metrics. diff --git a/docs/docs/examples/which_kelly_is_best.md b/docs/docs/examples/which_kelly_is_best.md index 0329581..9010179 100644 --- a/docs/docs/examples/which_kelly_is_best.md +++ b/docs/docs/examples/which_kelly_is_best.md @@ -6,7 +6,7 @@ The Kelly Criterion, proposed by John Larry Kelly Jr., is the mathematically opt There are numerous variants of the Kelly Criterion introduced to combat this fragile dependency, such as fractional Kelly, popularized by Ed Thorpe, and distributionally robust Kelly models. In this example, we compare several of the most well-known Kelly variants under identical out-of-sample conditions, evaluating their realized performance and wealth dynamics using `opes`. !!! warning "Warning:" - This example may be computationally heavy because of multiple optimization models running with a low `rebalance_freq=5`. If you prefer better performance, increase `rebalance_freq` to monthly (`21`) or any value much greater than `5`. + This example may be computationally heavy because of multiple optimization models running with a low `reopt_freq=5`. If you prefer better performance, increase `reopt_freq` to monthly (`21`) or any value much greater than `5`. --- @@ -121,7 +121,7 @@ for distributionally robust variants, we utilize `KLradius` for the ambiguity ra ## Backtesting -Using the `Backtester` class from `opes`, we backtest these strategies under a constant, but high, cost of 20 bps and `rebalance_freq=5` (weekly). Oh, and we clean weights too. +Using the `Backtester` class from `opes`, we backtest these strategies under a constant, but high, cost of 20 bps and `reopt_freq=5` (weekly). `rebalance_freq` is defaulted to `1`. Oh, and we clean weights too. ```python # A constant slippage backtest @@ -129,12 +129,12 @@ tester = Backtester(train_data=train, test_data=test, cost={'const' : 20}) # Obtaining returns # For now, weights and costs dont matter, so we discard them -ck_scenario = tester.backtest(optimizer=classic_kelly, rebalance_freq=5, clean_weights=True)['returns'] -hk_scenario = tester.backtest(optimizer=half_kelly, rebalance_freq=5, clean_weights=True)['returns'] -qk_scenario = tester.backtest(optimizer=quarter_kelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrk_scenario = tester.backtest(optimizer=kldr_kelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrhk_scenario = tester.backtest(optimizer=kldr_halfkelly, rebalance_freq=5, clean_weights=True)['returns'] -kldrqk_scenario = tester.backtest(optimizer=kldr_quarterkelly, rebalance_freq=5, clean_weights=True)['returns'] +ck_scenario = tester.backtest(optimizer=classic_kelly, reopt_freq=5, clean_weights=True)['returns'] +hk_scenario = tester.backtest(optimizer=half_kelly, reopt_freq=5, clean_weights=True)['returns'] +qk_scenario = tester.backtest(optimizer=quarter_kelly, reopt_freq=5, clean_weights=True)['returns'] +kldrk_scenario = tester.backtest(optimizer=kldr_kelly, reopt_freq=5, clean_weights=True)['returns'] +kldrhk_scenario = tester.backtest(optimizer=kldr_halfkelly, reopt_freq=5, clean_weights=True)['returns'] +kldrqk_scenario = tester.backtest(optimizer=kldr_quarterkelly, reopt_freq=5, clean_weights=True)['returns'] ``` --- diff --git a/docs/docs/objectives/heuristics.md b/docs/docs/objectives/heuristics.md index be77498..e61311c 100644 --- a/docs/docs/objectives/heuristics.md +++ b/docs/docs/objectives/heuristics.md @@ -23,7 +23,7 @@ class HierarchicalRiskParity(cluster_method='average') Hierarchical Risk Parity (HRP) optimization. -Hierarchical Risk Parity (HRP), introduced by L≤pez de Prado, +Hierarchical Risk Parity (HRP), introduced by Lopez de Prado, is a portfolio construction methodology that allocates capital through hierarchical clustering and recursive risk balancing rather than direct optimization of a scalar objective. HRP diff --git a/opes/__init__.py b/opes/__init__.py index 14a6b0b..89bcd1e 100644 --- a/opes/__init__.py +++ b/opes/__init__.py @@ -1,5 +1,5 @@ # Version Log -__version__ = "0.10.0" +__version__ = "0.11.0" # Backtester easy import from .backtester import Backtester diff --git a/opes/backtester.py b/opes/backtester.py index a67c55a..0fa6209 100644 --- a/opes/backtester.py +++ b/opes/backtester.py @@ -10,7 +10,7 @@ --- """ -from numbers import Real +from numbers import Real, Integral as Integer import time import inspect @@ -20,7 +20,7 @@ import matplotlib.pyplot as plt from opes.errors import PortfolioError, DataError -from opes.utils import slippage, extract_trim +from opes.utils import extract_trim class Backtester: @@ -79,7 +79,7 @@ def __init__(self, train_data=None, test_data=None, cost={"const": 10.0}): self.cost = cost def _backtest_integrity_check( - self, optimizer, rebalance_freq, seed, cleanweights=False + self, optimizer, rebalance_freq, reopt_freq, seed, cleanweights=False ): # Checking train and test data validity if not isinstance(self.train, pd.DataFrame): @@ -111,11 +111,15 @@ def _backtest_integrity_check( f"Expected optimizer object to have 'optimize' attribute." ) # Checking rebalance frequency type and validity - if rebalance_freq is not None: - if rebalance_freq <= 0 or not isinstance(rebalance_freq, int): - raise PortfolioError( - f"Invalid rebalance frequency. Expected integer within bounds [1,T], Got {rebalance_freq}" - ) + if not isinstance(rebalance_freq, Integer) or rebalance_freq <= 0: + raise PortfolioError( + f"Invalid rebalance frequency. Expected integer within bounds [1,T], Got {rebalance_freq}" + ) + # Checking re-optimization frequency type validity + if not isinstance(reopt_freq, Integer) or reopt_freq <= 0: + raise PortfolioError( + f"Invalid re-optimization frequency. Expected integer within bounds [1,T], Got {reopt_freq}" + ) # Validiating numpy seed if seed is not None and not isinstance(seed, int): raise PortfolioError(f"Invalid seed. Expected integer or None, Got {seed}") @@ -150,35 +154,116 @@ def _backtest_integrity_check( f"Invalid jump cost model parameter length. Expected 3, got {len(self.cost[first_key])}" ) + # Helper method to combine datasets + # Combines training and testing data upto a particular timestep + def _combine_datasets(self, upto_timestep): + # NO LOOKAHEAD BIAS + # Rebalance at timestep t using only past data (up to t, exclusive) to avoid lookahead bias + # Training data is pre-cleaned (no NaNs), test data up to t is also NaN-free + # Concatenating them preserves this property; dropna() handles edge cases safely + # The optimizer therefore only sees information available until the current decision point + combined_dataset = pd.concat([self.train, self.test.iloc[:upto_timestep]]) + combined_dataset = combined_dataset[ + ~combined_dataset.index.duplicated(keep="first") + ].dropna() + + return combined_dataset + + # Helper method to compute drifted weights for timestep + # Returns the realized drifted weights for the timestep + def _compute_drifted_weights(self, w_prev, returns): + w_prev = np.asarray(w_prev) + w_realized = (w_prev * (1 + returns)) / (1 + np.sum(w_prev * returns)) + + return w_realized + + # Helper method to compute costs array + # Returns either a constant or an array of length=horizon of cost + def _slippage_costs(self, cost, horizon, numpy_seed=None): + # Setting numpy seed and finding cost parameters + numpy_rng = np.random.default_rng(numpy_seed) + cost_key = next(iter(cost)).lower() + cost_params = cost[cost_key] + + # ---------- COST MODELS ---------- + match cost_key: + # Constant slippage + case "const": + return cost_params / 10000 + # Gamma distributed slippage + case "gamma": + return ( + numpy_rng.gamma( + shape=cost_params[0], scale=cost_params[1], size=horizon + ) + / 10000 + ) + # Lognormally distributed slippage + case "lognormal": + return ( + numpy_rng.lognormal( + mean=cost_params[0], sigma=cost_params[1], size=horizon + ) + / 10000 + ) + # Inverse gaussian slippage + case "inversegaussian": + return ( + numpy_rng.wald( + mean=cost_params[0], scale=cost_params[1], size=horizon + ) + / 10000 + ) + # Compound-poisson lognormal slippage (jump process) + case "jump": + N = numpy_rng.poisson(cost_params[0], size=horizon) + jump_cost = np.array( + [ + ( + np.sum( + numpy_rng.lognormal( + mean=cost_params[1], sigma=cost_params[2], size=n + ) + ) + if n > 0 + else 0 + ) + for n in N + ] + ) + return jump_cost / 10000 + case _: + raise DataError(f"Unknown cost model: {cost_key}") + def backtest( self, optimizer, - rebalance_freq=None, - seed=None, + rebalance_freq=1, + reopt_freq=1, + seed=100, weight_bounds=None, clean_weights=False, ): """ Execute a portfolio backtest over the test dataset using a given optimizer. - This method performs either a static-weight backtest or a rolling-weight - backtest depending on whether `rebalance_freq` is specified. It also - applies transaction costs and ensures no lookahead bias during rebalancing. + This method performs a walk-forward backtest using the user defined `rebalance_freq` + and `reopt_freq`. It also applies transaction costs and ensures no lookahead bias. For a rolling backtest, any common date values are dropped, the first occurrence is considered to be original and kept. !!! warning "Warning:" Some online learning methods such as `ExponentialGradient` update weights based - on the most recent observations. Setting `rebalance_freq` to any value other - than `1` (or possibly `None`) may result in suboptimal performance, as - intermediate data points will be ignored and not used for weight updates. - Proceed with caution when using other rebalancing frequencies with online learning algorithms. + on the most recent observations. Setting `reopt_freq` to any value other + than `1` may result in suboptimal performance, as intermediate data points will + be ignored and not used for weight updates. **Args:** - `optimizer`: An optimizer object containing the optimization strategy. Accepts both OPES built-in objectives and externally constructed optimizer objects. - - `rebalance_freq` (*int or None, optional*): Frequency of rebalancing (re-optimization) in time steps. If `None`, a static weight backtest is performed. Defaults to `None`. - - `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `None`. + - `rebalance_freq` (*int, optional*): Frequency of rebalancing in time steps. Must be `>= 1`. Defaults to `1`. + - `reopt_freq` (*int, optional*): Frequency of re-optimization in time steps. Must be `>= 1`. Defaults to `1`. + - `seed` (*int or None, optional*): Random seed for reproducible cost simulations. Defaults to `100`. - `weight_bounds` (*tuple, optional*): Bounds for portfolio weights passed to the optimizer if supported. !!! abstract "Rules for `optimizer` Object" @@ -188,22 +273,33 @@ def backtest( - `**kwargs`: For safety against breaking changes. - `optimize` must output weights for the timestep. + !!! note "Note" + - Re-optimization does not automatically imply rebalancing. When the portfolio is re-optimized at a given timestep, weights may or may not be updated depending on the value of `rebalance_freq`. + - To ensure a coherent backtest, a common practice is to choose frequencies such that `reopt_freq % rebalance_freq == 0`. This guarantees that whenever optimization occurs, a rebalance is also performed. + - Also note that within a given timestep, rebalancing, if it occurs, is performed after optimization when optimization is scheduled for that timestep. + + !!! tip "Tip" + Common portfolio styles can be constructed by appropriate choices of `rebalance_freq` and `reopt_freq`: + + - Buy-and-Hold: `rebalance_freq > horizon`, `reopt_freq > horizon` + - Constantly Rebalanced: `rebalance_freq = 1`, `reopt_freq > horizon` + - Fully Dynamic: `rebalance_freq = 1`, `reopt_freq = 1` + **Returns:** - `dict`: Backtest results containing the following keys: - `'returns'` (*np.ndarray*): Portfolio returns after accounting for costs. - `'weights'` (*np.ndarray*): Portfolio weights at each timestep. - `'costs'` (*np.ndarray*): Transaction costs applied at each timestep. - - `'dates'` (*np.ndarray*): Dates on which the backtest was conducted. + - `'timeline'` (*np.ndarray*): Timeline on which the backtest was conducted. Raises: DataError: If the optimizer does not accept weight bounds but `weight_bounds` are provided. PortfolioError: If input validation fails (via `_backtest_integrity_check`). + OptimizationError: If the underlying optimizer uses optimization and if it fails to optimize. !!! note "Notes:" - All returned arrays are aligned in time and have length equal to the test dataset. - - Static weight backtest: Uses a single set of optimized weights for all test data. This denotes a constant rebalanced portfolio. - - Rolling weight backtest: Re-optimizes weights at intervals defined by `rebalance_freq` using only historical data up to the current point to prevent lookahead bias. - Returns and weights are stored in arrays aligned with test data indices. !!! example "Example:" @@ -211,8 +307,8 @@ def backtest( import numpy as np # Importing necessary OPES modules - from opes.objectives.utility_theory import Kelly - from opes.backtester import Backtester + from opes.objectives import Kelly + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -228,137 +324,120 @@ def backtest( tester = Backtester(train_data=training, test_data=testing) # Obtaining backtest data for kelly optimizer - kelly_backtest = tester.backtest(optimizer=kelly_optimizer, rebalance_freq=21) + kelly_backtest = tester.backtest( + optimizer=kelly_optimizer, + rebalance_freq=1, # Rebalance daily + reopt_freq=21 # Re-optimize monthly + ) # Printing results for key in kelly_backtest: print(f"{key}: {kelly_backtest[key]}") ``` """ - # Running backtester integrity checks, extracting test return data and caching optimizer parameters + # Running backtester integrity checks, extracting test return data and caching values self._backtest_integrity_check( - optimizer, rebalance_freq, seed, cleanweights=clean_weights + optimizer, rebalance_freq, reopt_freq, seed, cleanweights=clean_weights ) test_data = extract_trim(self.test)[1] optimizer_parameters = inspect.signature(optimizer.optimize).parameters - - # ---------- BACKTEST LOOPS ---------- - - # Static weight backtest - if rebalance_freq is None: - - # ---------- STATIC OPTIMIZATION BLOCK ---------- - - # Using weight bounds if it is given AND if it is present as a parameter within optimize method - # Otherwise weights are optimized without weight bounds argument - kwargs = {} - # Checking for weight_bounds - if weight_bounds is not None and "weight_bounds" in optimizer_parameters: - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - weights = optimizer.optimize(self.train, **kwargs) - - # ------------------------------------------------ - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - weights = optimizer.clean_weights() - # Repeating same weights for remaining timeline for static backtest - weights_array = np.tile(weights, (len(test_data), 1)) - - # Rolling weight (Walk-forward) backtest - if rebalance_freq is not None: - # Initializing weights list - # NOTE: More readable than initializing a 2D numpy array - weights = [None] * len(test_data) - - # ---------- INITIAL OPTIMIZATION BLOCK ---------- - - # First optimization is done manually using training data - # Using weight bounds if it is given AND if it is present as a parameter within optimize method - # Otherwise weights are optimized without weight bounds argument - kwargs = {} - # Checking for weight_bounds - if weight_bounds is not None and "weight_bounds" in optimizer_parameters: - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - temp_weights = optimizer.optimize(self.train, **kwargs) - - # -------------------------------------------------- - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - temp_weights = optimizer.clean_weights() + horizon = len(test_data) + + # ---------- BACKTEST LOOP ---------- + + # Initializing weights list and turnover array + # NOTE: More readable than initializing a 2D numpy array + weights = [None] * horizon + turnover_array = np.zeros(horizon) + + # First optimization is done manually using training data + # Using weight bounds if it is given AND if it is present as a parameter within optimize method + # Otherwise weights are optimized without weight bounds argument + kwargs = {} + # Checking for weight_bounds + if weight_bounds is not None and "weight_bounds" in optimizer_parameters: + kwargs["weight_bounds"] = weight_bounds + # Optimizing for the timestep + optimized_weights = optimizer.optimize(self.train, **kwargs) + + # Cleaning weights if true and if optimizer has method + if clean_weights and hasattr(optimizer, "clean_weights"): + optimized_weights = optimizer.clean_weights() + + # Assigning computed weights to weight array + weights[0] = optimized_weights + optimizer_parameters = optimizer_parameters + + # For loop through timesteps to automate remaining walk-forward test + for t in range(1, horizon): + + # ---------- RE-OPTIMIZATION BLOCK ---------- + # Re-optimization check during appropriate frequency + # If the check is satisfied optimization is taken place and the new weights are computed + # NOTE: Rebalancing is handled separately using `rebalance_freq` + if t % reopt_freq == 0: + + combined_dataset = self._combine_datasets(upto_timestep=t) + + # We find if 'w' and 'weight_bounds' parameters are present within the optimizer + # The parameters which are present are leveraged (Eg. warm start, weight updates for 'w') + # Otherwise it is optimized without any extra arguments + kwargs = {} + if "w" in optimizer_parameters: + kwargs["w"] = optimized_weights + if ( + weight_bounds is not None + and "weight_bounds" in optimizer_parameters + ): + kwargs["weight_bounds"] = weight_bounds + + # Optimizing for the timestep + optimized_weights = optimizer.optimize(combined_dataset, **kwargs) + + # Cleaning weights if true and if optimizer has method + if clean_weights and hasattr(optimizer, "clean_weights"): + optimized_weights = optimizer.clean_weights() + + # ---------- REBALANCING BLOCK ---------- + # Computing drifted weights + # This is necessary for turnover and slippage modelling + drifted_weights = self._compute_drifted_weights( + weights[t - 1], test_data[t] + ) # Assigning computed weights to weight array - weights[0] = temp_weights - optimizer_parameters = optimizer_parameters - - # For loop through timesteps to automate remaining walk-forward test - for t in range(1, len(test_data)): - - # Rebalancing (Re-optimizing) during appropriate frequency - if t % rebalance_freq == 0: - - # ---------- WALK FORWARD OPTIMIZATION BLOCK ---------- - - # NO LOOKAHEAD BIAS - # Rebalance at timestep t using only past data (up to t, exclusive) to avoid lookahead bias - # Training data is pre-cleaned (no NaNs), test data up to t is also NaN-free - # Concatenating them preserves this property; dropna() handles edge cases safely - # The optimizer therefore only sees information available until the current decision point - combined_dataset = pd.concat([self.train, self.test.iloc[:t]]) - combined_dataset = combined_dataset[ - ~combined_dataset.index.duplicated(keep="first") - ].dropna() - # We find if 'w' and 'weight_bounds' parameters are present within the optimizer - # The parameters which are present are leveraged (Eg. warm start, weight updates for 'w') - # Otherwise it is optimized without any extra arguments - kwargs = {} - # Checking for w - if "w" in optimizer_parameters: - kwargs["w"] = temp_weights - # Checking for weight_bounds - if ( - weight_bounds is not None - and "weight_bounds" in optimizer_parameters - ): - kwargs["weight_bounds"] = weight_bounds - # Optimizing for the timestep - temp_weights = optimizer.optimize(combined_dataset, **kwargs) - - # ------------------------------------------------------------ - - # Cleaning weights if true and if optimizer has method - if clean_weights and hasattr(optimizer, "clean_weights"): - temp_weights = optimizer.clean_weights(temp_weights) - - # Assigning computed weights to weight array - weights[t] = temp_weights - - # Creating vertical stack for vectorization - weights_array = np.vstack(weights) + # If rebalance frequency is satisfied, then the weights for the timestep is the optimized weights + # Otherwise, the weights for the timestep is the drifted (realized) weights + if t % rebalance_freq == 0: + weights[t] = optimized_weights + else: + weights[t] = drifted_weights + + # ---------- TURNOVER BLOCK ---------- + # Computing turnover + # turnover is the L1 distance from current weights to drifted weights + # If not rebalanced, turnover is 0 + turnover_for_timestep = np.sum(np.abs(weights[t] - drifted_weights)) + turnover_array[t] = turnover_for_timestep # --------- POST PROCESSING BLOCK --------- - - # Vectorizing portfolio returns, finding cost array and finding final portfolio returns after costs - portfolio_returns = np.einsum("ij,ij->i", weights_array, test_data) - costs_array = slippage( - weights=weights_array, - returns=portfolio_returns, - cost=self.cost, - numpy_seed=seed, + # Creating vertical stack for vectorization + weights_array = np.vstack(weights) + # Computing slippage costs over time, vectorizing portfolio returns and finding final portfolio returns after costs + costs_array = turnover_array * self._slippage_costs( + cost=self.cost, horizon=horizon, numpy_seed=seed ) + portfolio_returns = np.einsum("ij,ij->i", weights_array, test_data) portfolio_returns -= costs_array - # Finding dates array from test data + # Finding timeline array from test data # NOTE: the first value is excluded since pct_change() drops the first date for return construction - dates = self.test.index.to_numpy()[1:] + timeline_array = self.test.index.to_numpy()[1:] return { "returns": portfolio_returns, "weights": weights_array, "costs": costs_array, - "timeline": dates, + "timeline": timeline_array, } def get_metrics(self, returns): @@ -413,8 +492,8 @@ def get_metrics(self, returns): !!! example "Example:" ```python # Importing portfolio method and backtester - from opes.objectives.markowitz import MaxSharpe - from opes.backtester import Backtester + from opes.objectives import MaxSharpe + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -513,8 +592,8 @@ def plot_wealth( !!! example "Example:" ```python # Importing portfolio methods and backtester - from opes.objectives.markowitz import MaxMean, MeanVariance - from opes.backtester import Backtester + from opes.objectives import MaxMean, MeanVariance + from opes import Backtester # Place holder for your price data from some_random_module import trainData, testData @@ -530,9 +609,9 @@ def plot_wealth( # Initializing Backtest with constant costs tester = Backtester(train_data=training, test_data=testing) - # Obtaining returns array from backtest for both optimizers (Monthly Rebalancing) - scenario_1 = tester.backtest(optimizer=maxmeanl2, rebalance_freq=21) - scenario_2 = tester.backtest(optimizer=mvo1_5, rebalance_freq=21)['returns'] + # Obtaining returns array from backtest for both optimizers + scenario_1 = tester.backtest(optimizer=maxmeanl2) + scenario_2 = tester.backtest(optimizer=mvo1_5)['returns'] # Plotting wealth tester.plot_wealth( diff --git a/opes/objectives/heuristics.py b/opes/objectives/heuristics.py index bf85897..ab4d4c2 100644 --- a/opes/objectives/heuristics.py +++ b/opes/objectives/heuristics.py @@ -885,7 +885,7 @@ class HierarchicalRiskParity(Optimizer): """ Hierarchical Risk Parity (HRP) optimization. - Hierarchical Risk Parity (HRP), introduced by López de Prado, + Hierarchical Risk Parity (HRP), introduced by Lopez de Prado, is a portfolio construction methodology that allocates capital through hierarchical clustering and recursive risk balancing rather than direct optimization of a scalar objective. HRP diff --git a/opes/utils.py b/opes/utils.py index 7f3acc6..315fa07 100644 --- a/opes/utils.py +++ b/opes/utils.py @@ -61,68 +61,6 @@ def find_constraint(bounds, constraint_type=1): return constraint_list -# Slippage function -def slippage(weights, returns, cost, numpy_seed=None): - numpy_rng = np.random.default_rng(numpy_seed) - turnover_array = np.zeros(len(weights)) - # Loop range is from 1 to horizon. Rebalancing happens from t=1 - for i in range(1, len(weights)): - w_current = weights[i] - w_prev = weights[i - 1] - w_realized = (w_prev * (1 + returns[i])) / (1 + np.sum(w_prev * returns[i])) - turnover = np.sum(np.abs(w_current - w_realized)) - turnover_array[i] = turnover - # Deciding slippage model using cost key - cost_key = next(iter(cost)).lower() - cost_params = cost[cost_key] - # Constant slippage - if cost_key == "const": - return turnover_array * cost_params / 10000 - horizon = len(turnover_array) - # Gamma distributed slippage - if cost_key == "gamma": - return ( - turnover_array - * numpy_rng.gamma(shape=cost_params[0], scale=cost_params[1], size=horizon) - / 10000 - ) - # Lognormally distributed slippage - elif cost_key == "lognormal": - return ( - turnover_array - * numpy_rng.lognormal( - mean=cost_params[0], sigma=cost_params[1], size=horizon - ) - / 10000 - ) - # Inverse gaussian slippage - elif cost_key == "inversegaussian": - return ( - turnover_array - * numpy_rng.wald(mean=cost_params[0], scale=cost_params[1], size=horizon) - / 10000 - ) - # Compound poisson slippage (jump process) - elif cost_key == "jump": - N = numpy_rng.poisson(cost_params[0], size=horizon) - jump_cost = np.array( - [ - ( - np.sum( - numpy_rng.lognormal( - mean=cost_params[1], sigma=cost_params[2], size=n - ) - ) - if n > 0 - else 0 - ) - for n in N - ] - ) - return turnover_array * jump_cost / 10000 - raise DataError(f"Unknown cost model: {cost_key}") - - # Data integrity checker def test_integrity( tickers, diff --git a/pyproject.toml b/pyproject.toml index 106cdef..deea1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dev = ["pytest", "other-dev-packages"] [project] name = "opes" -version = "0.10.0" +version = "0.11.0" description = "A research-focused portfolio optimization and backtesting engine." readme = "README.md" requires-python = ">=3.10" diff --git a/tests/backtester_test.py b/tests/backtester_test.py index abc3857..25c5698 100644 --- a/tests/backtester_test.py +++ b/tests/backtester_test.py @@ -250,8 +250,10 @@ def test_backtest_engine(prices_df): # Executing static backtest static_details = test_backtester.backtest(opt) - # Executing rolling backtest (Daily rebalancing) - rolling_details = test_backtester.backtest(opt, rebalance_freq=1) + # Executing rolling backtest (Daily rebalancing & reoptimization) + rolling_details = test_backtester.backtest( + opt, rebalance_freq=1, reopt_freq=1 + ) # Getting static and rolling results static_results = validate_backtest_results(static_details)