diff --git a/alephnull/algorithm.py b/alephnull/algorithm.py index d2e5d51..0a1705c 100644 --- a/alephnull/algorithm.py +++ b/alephnull/algorithm.py @@ -14,7 +14,7 @@ # limitations under the License. from copy import copy from datetime import datetime -from itertools import groupby, ifilter +from itertools import groupby from operator import attrgetter import pytz @@ -197,7 +197,7 @@ def __repr__(self): recorded_vars=repr(self.recorded_vars)) def _init_positions(self): - for sid, pos in self._portfolio.positions.iteritems(): + for sid, pos in self._portfolio.positions.items(): for perf_period in self.perf_tracker.perf_periods: perf_period.update_position( sid=sid, @@ -234,7 +234,7 @@ def _create_data_generator(self, source_filter, sim_params): date_sorted = date_sorted_sources(*self.sources) if source_filter: - date_sorted = ifilter(source_filter, date_sorted) + date_sorted = filter(source_filter, date_sorted) with_tnfms = sequential_transforms(date_sorted, *self.transforms) @@ -353,7 +353,7 @@ def run(self, source, sim_params=None, benchmark_return_source=None): # Create transforms by wrapping them into StatefulTransforms self.transforms = [] - for namestring, trans_descr in self.registered_transforms.iteritems(): + for namestring, trans_descr in self.registered_transforms.items(): sf = StatefulTransform( trans_descr['class'], *trans_descr['args'], @@ -421,7 +421,7 @@ def record(self, **kwargs): """ Track and record local variable (i.e. attributes) each day. """ - for name, value in kwargs.items(): + for name, value in list(kwargs.items()): self._recorded_vars[name] = value def order(self, sid, amount, limit_price=None, stop_price=None): @@ -443,7 +443,7 @@ def get_open_orders(self, sid=None): if sid is None: return {key: [order.to_api_obj() for order in orders] for key, orders - in self.blotter.open_orders.iteritems()} + in self.blotter.open_orders.items()} if sid in self.blotter.open_orders: orders = self.blotter.open_orders[sid] return [order.to_api_obj() for order in orders] @@ -456,8 +456,8 @@ def get_orders(self, sid=None): orders = {id_: {(self.blotter.orders[id_].__dict__['sid'], self.blotter.orders[id_].__dict__['contract']): self.blotter.orders[id_].__dict__} for id_ in - self.blotter.orders.keys()} - orders = [{sym: {key: v} for sym, v in orders[key].iteritems()} for key in orders.keys()] + list(self.blotter.orders.keys())} + orders = [{sym: {key: v} for sym, v in orders[key].items()} for key in list(orders.keys())] orders_flat = {} for d in orders: orders_flat.update(d) diff --git a/alephnull/algorithm.py.bak b/alephnull/algorithm.py.bak new file mode 100644 index 0000000..d2e5d51 --- /dev/null +++ b/alephnull/algorithm.py.bak @@ -0,0 +1,596 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from copy import copy +from datetime import datetime +from itertools import groupby, ifilter +from operator import attrgetter + +import pytz +import pandas as pd +import numpy as np + +from alephnull.errors import ( + UnsupportedSlippageModel, + OverrideSlippagePostInit, + UnsupportedCommissionModel, + OverrideCommissionPostInit +) +from alephnull.finance.performance import PerformanceTracker, FuturesPerformanceTracker +from alephnull.sources import DataFrameSource, DataPanelSource +from alephnull.utils.factory import create_simulation_parameters +from alephnull.transforms.utils import StatefulTransform +from alephnull.finance.slippage import ( + VolumeShareSlippage, + SlippageModel, + transact_partial +) +from alephnull.finance.commission import PerShare, PerTrade +from alephnull.finance.blotter import Blotter +from alephnull.finance.constants import ANNUALIZER +import alephnull.finance.trading as trading +import alephnull.protocol +from alephnull.protocol import Event +from alephnull.gens.composites import ( + date_sorted_sources, + sequential_transforms, + alias_dt +) +from alephnull.gens.tradesimulation import AlgorithmSimulator + + +DEFAULT_CAPITAL_BASE = float("1.0e5") + + +class AssetTypeEnum(object): + def __init__(self): + self.EQUITY = 0 + self.FUTURES = 1 + + +class TradingAlgorithm(object): + """ + Base class for trading algorithms. Inherit and overload + initialize() and handle_data(data). + + A new algorithm could look like this: + ``` + class MyAlgo(TradingAlgorithm): + def initialize(self, sids, amount): + self.sids = sids + self.amount = amount + + def handle_data(self, data): + sid = self.sids[0] + amount = self.amount + self.order(sid, amount) + ``` + To then to run this algorithm: + + my_algo = MyAlgo([0], 100) # first argument has to be list of sids + stats = my_algo.run(data) + + """ + asset_types = AssetTypeEnum() + + def __init__(self, *args, **kwargs): + """Initialize sids and other state variables. + + :Arguments: + data_frequency : str (daily, hourly or minutely) + The duration of the bars. + annualizer : int + Which constant to use for annualizing risk metrics. + If not provided, will extract from data_frequency. + capital_base : float + How much capital to start with. + """ + + self._portfolio = None + self.datetime = None + + # can be reset in a subclass's initialize() method + self.asset_type = self.asset_types.EQUITY + + self.registered_transforms = {} + self.transforms = [] + self.sources = [] + + self._recorded_vars = {} + + self.logger = None + + self.benchmark_return_source = None + + # default components for transact + self.slippage = VolumeShareSlippage() + self.commission = PerShare() + + if 'data_frequency' in kwargs: + self.set_data_frequency(kwargs.pop('data_frequency')) + else: + self.data_frequency = None + + self.instant_fill = kwargs.pop('instant_fill', False) + + # Override annualizer if set + if 'annualizer' in kwargs: + self.annualizer = kwargs['annualizer'] + + # set the capital base + self.capital_base = kwargs.pop('capital_base', DEFAULT_CAPITAL_BASE) + + self.sim_params = kwargs.pop('sim_params', None) + if self.sim_params: + self.sim_params.data_frequency = self.data_frequency + + self.live_execution = kwargs.pop('live_execution', False) + + if self.live_execution: + # Only import and instantiate an IB Execution instance + # If it is explicitly requested in kwargs + # Todo: [BUG FIX] object instantiates on import and + # therefore connects to the IB API + + from alephnull.live.broker import LiveExecution + + self.live_execution = LiveExecution(call_msg=False) + + # reconcile algo with InteractiveBrokers + self.capital_base = self.live_execution.total_cash() + self._portfolio = self.live_execution.ib_portfolio() + self._portfolio.cash = self._portfolio.starting_cash = self.capital_base + self._portfolio.portfolio_value = self._portfolio.cash + \ + self._portfolio.positions_value + + kwargs['blotter'] = self.live_execution.blotter + + self.blotter = kwargs.pop('blotter', None) + if not self.blotter: + self.blotter = Blotter() + + + + + # an algorithm subclass needs to set initialized to True when + # it is fully initialized. + self.initialized = False + + # call to user-defined constructor method + self.initialize(*args, **kwargs) + + def __repr__(self): + """ + N.B. this does not yet represent a string that can be used + to instantiate an exact copy of an algorithm. + + However, it is getting close, and provides some value as something + that can be inspected interactively. + """ + return """ +{class_name}( + capital_base={capital_base} + sim_params={sim_params}, + initialized={initialized}, + slippage={slippage}, + commission={commission}, + blotter={blotter}, + recorded_vars={recorded_vars}) +""".strip().format(class_name=self.__class__.__name__, + capital_base=self.capital_base, + sim_params=repr(self.sim_params), + initialized=self.initialized, + slippage=repr(self.slippage), + commission=repr(self.commission), + blotter=repr(self.blotter), + recorded_vars=repr(self.recorded_vars)) + + def _init_positions(self): + for sid, pos in self._portfolio.positions.iteritems(): + for perf_period in self.perf_tracker.perf_periods: + perf_period.update_position( + sid=sid, + contract=pos.contract if hasattr(pos, 'contract') else None, + amount=pos.amount, + last_sale_price=pos.last_sale_price, + last_sale_date=None, + cost_basis=pos.cost_basis) + + + def _create_data_generator(self, source_filter, sim_params): + """ + Create a merged data generator using the sources and + transforms attached to this algorithm. + + ::source_filter:: is a method that receives events in date + sorted order, and returns True for those events that should be + processed by the zipline, and False for those that should be + skipped. + """ + if self.benchmark_return_source is None: + benchmark_return_source = [ + Event({'dt': dt, + 'returns': ret, + 'type': alephnull.protocol.DATASOURCE_TYPE.BENCHMARK, + 'source_id': 'benchmarks'}) + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() + ] + else: + benchmark_return_source = self.benchmark_return_source + + date_sorted = date_sorted_sources(*self.sources) + + if source_filter: + date_sorted = ifilter(source_filter, date_sorted) + + with_tnfms = sequential_transforms(date_sorted, + *self.transforms) + with_alias_dt = alias_dt(with_tnfms) + + with_benchmarks = date_sorted_sources(benchmark_return_source, + with_alias_dt) + + # Group together events with the same dt field. This depends on the + # events already being sorted. + return groupby(with_benchmarks, attrgetter('dt')) + + def _create_generator(self, sim_params, source_filter=None): + """ + Create a basic generator setup using the sources and + transforms attached to this algorithm. + + ::source_filter:: is a method that receives events in date + sorted order, and returns True for those events that should be + processed by the zipline, and False for those that should be + skipped. + """ + sim_params.data_frequency = self.data_frequency + self.data_gen = self._create_data_generator(source_filter, + sim_params) + + # if live execution is active instantiate perf_tracker with + # the portfolio downloaded from IB + + if self.asset_type == self.asset_types.EQUITY: + self.perf_tracker = PerformanceTracker(sim_params) + + elif self.asset_type == self.asset_types.FUTURES: + self.perf_tracker = FuturesPerformanceTracker(sim_params) + + else: + self.perf_tracker = PerformanceTracker(sim_params) + + if self.live_execution: + self._init_positions() + + self.trading_client = AlgorithmSimulator(self, sim_params) + + transact_method = transact_partial(self.slippage, self.commission) + self.set_transact(transact_method) + return self.trading_client.transform(self.data_gen) + + def get_generator(self): + """ + Override this method to add new logic to the construction + of the generator. Overrides can use the _create_generator + method to get a standard construction generator. + """ + return self._create_generator(self.sim_params) + + def initialize(self, *args, **kwargs): + pass + + # TODO: make a new subclass, e.g. BatchAlgorithm, and move + # the run method to the subclass, and refactor to put the + # generator creation logic into get_generator. + def run(self, source, sim_params=None, benchmark_return_source=None): + """Run the algorithm. + + :Arguments: + source : can be either: + - pandas.DataFrame + - zipline source + - list of zipline sources + + If pandas.DataFrame is provided, it must have the + following structure: + * column names must consist of ints representing the + different sids + * index must be DatetimeIndex + * array contents should be price info. + + :Returns: + daily_stats : pandas.DataFrame + Daily performance metrics such as returns, alpha etc. + + """ + if isinstance(source, (list, tuple)): + assert self.sim_params is not None or sim_params is not None, \ + """When providing a list of sources, \ + sim_params have to be specified as a parameter + or in the constructor.""" + elif isinstance(source, pd.DataFrame): + # if DataFrame provided, wrap in DataFrameSource + source = DataFrameSource(source) + elif isinstance(source, pd.Panel): + source = DataPanelSource(source) + + if not isinstance(source, (list, tuple)): + self.sources = [source] + else: + self.sources = source + + # Check for override of sim_params. + # If it isn't passed to this function, + # use the default params set with the algorithm. + # Else, we create simulation parameters using the start and end of the + # source provided. + if not sim_params: + if not self.sim_params: + start = source.start + end = source.end + + sim_params = create_simulation_parameters( + start=start, + end=end, + capital_base=self.capital_base + ) + else: + sim_params = self.sim_params + + # Create transforms by wrapping them into StatefulTransforms + self.transforms = [] + for namestring, trans_descr in self.registered_transforms.iteritems(): + sf = StatefulTransform( + trans_descr['class'], + *trans_descr['args'], + **trans_descr['kwargs'] + ) + sf.namestring = namestring + + self.transforms.append(sf) + + # create transforms and zipline + self.gen = self._create_generator(sim_params) + + # loop through simulated_trading, each iteration returns a + # perf dictionary + perfs = [] + for perf in self.gen: + perfs.append(perf) + + # convert perf dict to pandas dataframe + daily_stats = self._create_daily_stats(perfs) + + return daily_stats + + def _create_daily_stats(self, perfs): + # create daily and cumulative stats dataframe + daily_perfs = [] + # TODO: the loop here could overwrite expected properties + # of daily_perf. Could potentially raise or log a + # warning. + for perf in perfs: + if 'daily_perf' in perf: + + perf['daily_perf'].update( + perf['daily_perf'].pop('recorded_vars') + ) + daily_perfs.append(perf['daily_perf']) + else: + self.risk_report = perf + + daily_dts = [np.datetime64(perf['period_close'], utc=True) + for perf in daily_perfs] + daily_stats = pd.DataFrame(daily_perfs, index=daily_dts) + + return daily_stats + + def add_transform(self, transform_class, tag, *args, **kwargs): + """Add a single-sid, sequential transform to the model. + + :Arguments: + transform_class : class + Which transform to use. E.g. mavg. + tag : str + How to name the transform. Can later be access via: + data[sid].tag() + + Extra args and kwargs will be forwarded to the transform + instantiation. + + """ + self.registered_transforms[tag] = {'class': transform_class, + 'args': args, + 'kwargs': kwargs} + + def record(self, **kwargs): + """ + Track and record local variable (i.e. attributes) each day. + """ + for name, value in kwargs.items(): + self._recorded_vars[name] = value + + def order(self, sid, amount, limit_price=None, stop_price=None): + self.blotter.update_account(self.portfolio) + return self.blotter.order(sid, amount, limit_price, stop_price) + + + def order_value(self, sid, value, limit_price=None, stop_price=None): + last_price = self.trading_client.current_data[sid].price + return self.blotter.order_value(sid, value, last_price, + limit_price=limit_price, + stop_price=stop_price) + + def get_open_orders(self, sid=None): + """ + Return open order events + """ + + if sid is None: + return {key: [order.to_api_obj() for order in orders] + for key, orders + in self.blotter.open_orders.iteritems()} + if sid in self.blotter.open_orders: + orders = self.blotter.open_orders[sid] + return [order.to_api_obj() for order in orders] + return [] + + def get_orders(self, sid=None): + """ + Return order events + """ + orders = {id_: {(self.blotter.orders[id_].__dict__['sid'], + self.blotter.orders[id_].__dict__['contract']): + self.blotter.orders[id_].__dict__} for id_ in + self.blotter.orders.keys()} + orders = [{sym: {key: v} for sym, v in orders[key].iteritems()} for key in orders.keys()] + orders_flat = {} + for d in orders: + orders_flat.update(d) + if sid: + return orders_flat[sid] + else: + return orders_flat + + + @property + def recorded_vars(self): + return copy(self._recorded_vars) + + @property + def portfolio(self): + return self._portfolio + + def set_portfolio(self, portfolio): + self._portfolio = portfolio + + def set_logger(self, logger): + self.logger = logger + + def set_datetime(self, dt): + assert isinstance(dt, datetime), \ + "Attempt to set algorithm's current time with non-datetime" + assert dt.tzinfo == pytz.utc, \ + "Algorithm expects a utc datetime" + self.datetime = dt + + def get_datetime(self): + """ + Returns a copy of the datetime. + """ + date_copy = copy(self.datetime) + assert date_copy.tzinfo == pytz.utc, \ + "Algorithm should have a utc datetime" + return date_copy + + def set_transact(self, transact): + """ + Set the method that will be called to create a + transaction from open orders and trade events. + """ + self.blotter.transact = transact + + def set_slippage(self, slippage): + if not isinstance(slippage, SlippageModel): + raise UnsupportedSlippageModel() + if self.initialized: + raise OverrideSlippagePostInit() + self.slippage = slippage + + def set_commission(self, commission): + if not isinstance(commission, (PerShare, PerTrade)): + raise UnsupportedCommissionModel() + + if self.initialized: + raise OverrideCommissionPostInit() + self.commission = commission + + def set_sources(self, sources): + assert isinstance(sources, list) + self.sources = sources + + def set_transforms(self, transforms): + assert isinstance(transforms, list) + self.transforms = transforms + + def set_data_frequency(self, data_frequency): + assert data_frequency in ('daily', 'minute') + self.data_frequency = data_frequency + self.annualizer = ANNUALIZER[self.data_frequency] + + def order_percent(self, sid, percent, limit_price=None, stop_price=None): + """ + Place an order in the specified security corresponding to the given + percent of the current portfolio value. + + Note that percent must expressed as a decimal (0.50 means 50\%). + """ + value = self.portfolio.portfolio_value * percent + return self.order_value(sid, value, limit_price, stop_price) + + def target(self, sid, target, limit_price=None, stop_price=None): + """ + Place an order to adjust a position to a target number of shares. If + the position doesn't already exist, this is equivalent to placing a new + order. If the position does exist, this is equivalent to placing an + order for the difference between the target number of shares and the + current number of shares. + """ + if sid in self.portfolio.positions: + current_position = self.portfolio.positions[sid].amount + req_shares = target - current_position + return self.order(sid, req_shares, limit_price, stop_price) + else: + return self.order(sid, target, limit_price, stop_price) + + def target_value(self, sid, target, limit_price=None, stop_price=None): + """ + Place an order to adjust a position to a target value. If + the position doesn't already exist, this is equivalent to placing a new + order. If the position does exist, this is equivalent to placing an + order for the difference between the target value and the + current value. + """ + if sid in self.portfolio.positions: + current_position = self.portfolio.positions[sid].amount + current_price = self.portfolio.positions[sid].last_sale_price + current_value = current_position * current_price + req_value = target - current_value + return self.order_value(sid, req_value, limit_price, stop_price) + else: + return self.order_value(sid, target, limit_price, stop_price) + + def target_percent(self, sid, target, limit_price=None, stop_price=None): + """ + Place an order to adjust a position to a target percent of the + current portfolio value. If the position doesn't already exist, this is + equivalent to placing a new order. If the position does exist, this is + equivalent to placing an order for the difference between the target + percent and the current percent. + + Note that target must expressed as a decimal (0.50 means 50\%). + """ + if sid in self.portfolio.positions: + current_position = self.portfolio.positions[sid].amount + current_price = self.portfolio.positions[sid].last_sale_price + current_value = current_position * current_price + else: + current_value = 0 + target_value = self.portfolio.portfolio_value * target + + req_value = target_value - current_value + return self.order_value(sid, req_value, limit_price, stop_price) diff --git a/alephnull/data/benchmarks.py b/alephnull/data/benchmarks.py index bc938c0..7069743 100644 --- a/alephnull/data/benchmarks.py +++ b/alephnull/data/benchmarks.py @@ -50,7 +50,7 @@ class BenchmarkDataNotFoundError(Exception): def benchmark_mappings(): return {key: Mapping(*value) for key, value - in _BENCHMARK_MAPPING.iteritems()} + in _BENCHMARK_MAPPING.items()} def get_raw_benchmark_data(start_date, end_date, symbol): diff --git a/alephnull/data/benchmarks.py.bak b/alephnull/data/benchmarks.py.bak new file mode 100644 index 0000000..bc938c0 --- /dev/null +++ b/alephnull/data/benchmarks.py.bak @@ -0,0 +1,134 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections + +from datetime import datetime + +import csv + +from functools import partial + +import requests +import pandas as pd + +from . loader_utils import ( + date_conversion, + source_to_records, + Mapping +) + +DailyReturn = collections.namedtuple('DailyReturn', ['date', 'returns']) + + +class BenchmarkDataNotFoundError(Exception): + pass + +_BENCHMARK_MAPPING = { + # Need to add 'symbol' + 'volume': (int, 'Volume'), + 'open': (float, 'Open'), + 'close': (float, 'Close'), + 'high': (float, 'High'), + 'low': (float, 'Low'), + 'adj_close': (float, 'Adj Close'), + 'date': (partial(date_conversion, date_pattern='%Y-%m-%d'), 'Date') +} + + +def benchmark_mappings(): + return {key: Mapping(*value) + for key, value + in _BENCHMARK_MAPPING.iteritems()} + + +def get_raw_benchmark_data(start_date, end_date, symbol): + + # create benchmark files + # ^GSPC 19500103 + params = collections.OrderedDict(( + ('s', symbol), + # start_date month, zero indexed + ('a', start_date.month - 1), + # start_date day + ('b', start_date.day), + # start_date year + ('c', start_date.year), + # end_date month, zero indexed + ('d', end_date.month - 1), + # end_date day str(int(todate[6:8])) #day + ('e', end_date.day), + # end_date year str(int(todate[0:4])) + ('f', end_date.year), + # daily frequency + ('g', 'd'), + )) + + res = requests.get('http://ichart.finance.yahoo.com/table.csv', + params=params, stream=True) + + if not res.ok: + raise BenchmarkDataNotFoundError(""" +No benchmark data found for date range. +start_date={start_date}, end_date={end_date}, url={url}""".strip(). + format(start_date=start_date, + end_date=end_date, + url=res.url)) + + return csv.DictReader(res.iter_lines()) + + +def get_benchmark_data(symbol, start_date=None, end_date=None): + """ + Benchmarks from Yahoo. + """ + if start_date is None: + start_date = datetime(year=1950, month=1, day=3) + if end_date is None: + end_date = datetime.utcnow() + + raw_benchmark_data = get_raw_benchmark_data(start_date, end_date, symbol) + + mappings = benchmark_mappings() + + return source_to_records(mappings, raw_benchmark_data) + + +def get_benchmark_returns(symbol, start_date=None, end_date=None): + """ + Returns a list of return percentages in chronological order. + """ + if start_date is None: + start_date = datetime(year=1950, month=1, day=3) + if end_date is None: + end_date = datetime.utcnow() + + # Get the benchmark data and convert it to a list in chronological order. + data_points = list(get_benchmark_data(symbol, start_date, end_date)) + data_points.reverse() + + # Calculate the return percentages. + benchmark_returns = [] + for i, data_point in enumerate(data_points): + if i == 0: + curr_open = data_points[i]['open'] + returns = (data_points[i]['close'] - curr_open) / curr_open + else: + prev_close = data_points[i - 1]['close'] + returns = (data_point['close'] - prev_close) / prev_close + date = pd.tseries.tools.normalize_date(data_point['date']) + daily_return = DailyReturn(date=date, returns=returns) + benchmark_returns.append(daily_return) + + return benchmark_returns diff --git a/alephnull/data/loader.py b/alephnull/data/loader.py index f805822..768e49e 100644 --- a/alephnull/data/loader.py +++ b/alephnull/data/loader.py @@ -160,10 +160,10 @@ def load_market_data(bm_symbol='^GSPC'): try: fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") except IOError: - print(""" + print((""" data files aren't distributed with source. Fetching data from Yahoo Finance. -""").strip() +""").strip()) dump_benchmarks(bm_symbol) fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") @@ -208,10 +208,10 @@ def load_market_data(bm_symbol='^GSPC'): try: fp_tr = get_datafile(filename, "rb") except IOError: - print(""" + print((""" data files aren't distributed with source. Fetching data from {0} -""").format(source).strip() +""").format(source).strip()) dump_treasury_curves(module, filename) fp_tr = get_datafile(filename, "rb") @@ -239,7 +239,7 @@ def load_market_data(bm_symbol='^GSPC'): fp_tr.close() tr_curves = OrderedDict(sorted( - ((dt, c) for dt, c in tr_curves.iteritems()), + ((dt, c) for dt, c in tr_curves.items()), key=lambda t: t[0])) return benchmark_returns, tr_curves @@ -290,7 +290,7 @@ def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): data[stock] = stkd if indexes is not None: - for name, ticker in indexes.iteritems(): + for name, ticker in indexes.items(): stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd @@ -329,7 +329,7 @@ def load_from_yahoo(indexes=None, close_key = 'Adj Close' else: close_key = 'Close' - df = pd.DataFrame({key: d[close_key] for key, d in data.iteritems()}) + df = pd.DataFrame({key: d[close_key] for key, d in data.items()}) df.index = df.index.tz_localize(pytz.utc) return df diff --git a/alephnull/data/loader.py.bak b/alephnull/data/loader.py.bak new file mode 100644 index 0000000..f805822 --- /dev/null +++ b/alephnull/data/loader.py.bak @@ -0,0 +1,383 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import importlib +import os +from os.path import expanduser +from collections import OrderedDict +from datetime import timedelta + +import logbook + +import pandas as pd +from pandas.io.data import DataReader +import pytz + +from . import benchmarks +from . benchmarks import get_benchmark_returns + +from alephnull.utils.tradingcalendar import ( + trading_day, + trading_days +) + +logger = logbook.Logger('Loader') + +# TODO: Make this path customizable. +DATA_PATH = os.path.join( + expanduser("~"), + '.zipline', + 'data' +) + +CACHE_PATH = os.path.join( + expanduser("~"), + '.zipline', + 'cache' +) + +#Mapping from index symbol to appropriate bond data +INDEX_MAPPING = { + '^GSPC': + ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'), + '^GSPTSE': + ('treasuries_can', 'treasury_curves_can.csv', 'bankofcanada.ca'), + '^FTSE': # use US treasuries until UK bonds implemented + ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'), +} + + +def get_datafile(name, mode='r'): + """ + Returns a handle to data file. + + Creates containing directory, if needed. + """ + + if not os.path.exists(DATA_PATH): + os.makedirs(DATA_PATH) + + return open(os.path.join(DATA_PATH, name), mode) + + +def get_cache_filepath(name): + if not os.path.exists(CACHE_PATH): + os.makedirs(CACHE_PATH) + + return os.path.join(CACHE_PATH, name) + + +def dump_treasury_curves(module='treasuries', filename='treasury_curves.csv'): + """ + Dumps data to be used with zipline. + + Puts source treasury and data into zipline. + """ + try: + m = importlib.import_module("." + module, package='alephnull.data') + except ImportError: + raise NotImplementedError( + 'Treasury curve {0} module not implemented'.format(module)) + + tr_data = {} + + for curve in m.get_treasury_data(): + # Not ideal but massaging data into expected format + tr_data[curve['date']] = curve + + curves = pd.DataFrame(tr_data).T + + datafile = get_datafile(filename, mode='wb') + curves.to_csv(datafile) + datafile.close() + + return curves + + +def dump_benchmarks(symbol): + """ + Dumps data to be used with zipline. + + Puts source treasury and data into zipline. + """ + benchmark_data = [] + for daily_return in get_benchmark_returns(symbol): + # Not ideal but massaging data into expected format + benchmark = (daily_return.date, daily_return.returns) + benchmark_data.append(benchmark) + + datafile = get_datafile(get_benchmark_filename(symbol), mode='wb') + benchmark_returns = pd.Series(dict(benchmark_data)) + benchmark_returns.to_csv(datafile) + datafile.close() + + +def update_benchmarks(symbol, last_date): + """ + Updates data in the zipline message pack + + last_date should be a datetime object of the most recent data + + Puts source benchmark into zipline. + """ + datafile = get_datafile(get_benchmark_filename(symbol), mode='rb') + saved_benchmarks = pd.Series.from_csv(datafile) + datafile.close() + + try: + start = last_date + timedelta(days=1) + for daily_return in get_benchmark_returns(symbol, start_date=start): + # Not ideal but massaging data into expected format + benchmark = pd.Series({daily_return.date: daily_return.returns}) + saved_benchmarks = saved_benchmarks.append(benchmark) + + datafile = get_datafile(get_benchmark_filename(symbol), mode='wb') + saved_benchmarks.to_csv(datafile) + datafile.close() + except benchmarks.BenchmarkDataNotFoundError as exc: + logger.warn(exc) + return saved_benchmarks + + +def get_benchmark_filename(symbol): + return "%s_benchmark.csv" % symbol + + +def load_market_data(bm_symbol='^GSPC'): + try: + fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") + except IOError: + print(""" +data files aren't distributed with source. +Fetching data from Yahoo Finance. +""").strip() + dump_benchmarks(bm_symbol) + fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") + + saved_benchmarks = pd.Series.from_csv(fp_bm) + saved_benchmarks = saved_benchmarks.tz_localize('UTC') + fp_bm.close() + + most_recent = pd.Timestamp('today', tz='UTC') - trading_day + most_recent_index = trading_days.searchsorted(most_recent) + days_up_to_now = trading_days[:most_recent_index + 1] + + # Find the offset of the last date for which we have trading data in our + # list of valid trading days + last_bm_date = saved_benchmarks.index[-1] + last_bm_date_offset = days_up_to_now.searchsorted( + last_bm_date.strftime('%Y/%m/%d')) + + # If more than 1 trading days has elapsed since the last day where + # we have data,then we need to update + if len(days_up_to_now) - last_bm_date_offset > 1: + benchmark_returns = update_benchmarks(bm_symbol, last_bm_date) + if ( + benchmark_returns.index.tz is None + or + benchmark_returns.index.tz.zone != 'UTC' + ): + benchmark_returns = benchmark_returns.tz_localize('UTC') + else: + benchmark_returns = saved_benchmarks + if ( + benchmark_returns.index.tz is None + or + benchmark_returns.index.tz.zone != 'UTC' + ): + benchmark_returns = benchmark_returns.tz_localize('UTC') + + #Get treasury curve module, filename & source from mapping. + #Default to USA. + module, filename, source = INDEX_MAPPING.get( + bm_symbol, INDEX_MAPPING['^GSPC']) + + try: + fp_tr = get_datafile(filename, "rb") + except IOError: + print(""" +data files aren't distributed with source. +Fetching data from {0} +""").format(source).strip() + dump_treasury_curves(module, filename) + fp_tr = get_datafile(filename, "rb") + + saved_curves = pd.DataFrame.from_csv(fp_tr) + + # Find the offset of the last date for which we have trading data in our + # list of valid trading days + last_tr_date = saved_curves.index[-1] + last_tr_date_offset = days_up_to_now.searchsorted( + last_tr_date.strftime('%Y/%m/%d')) + + # If more than 1 trading days has elapsed since the last day where + # we have data,then we need to update + if len(days_up_to_now) - last_tr_date_offset > 1: + treasury_curves = dump_treasury_curves(module, filename) + else: + treasury_curves = saved_curves.tz_localize('UTC') + + tr_curves = {} + for tr_dt, curve in treasury_curves.T.iterkv(): + # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0, + # tzinfo=pytz.utc) + tr_curves[tr_dt] = curve.to_dict() + + fp_tr.close() + + tr_curves = OrderedDict(sorted( + ((dt, c) for dt, c in tr_curves.iteritems()), + key=lambda t: t[0])) + + return benchmark_returns, tr_curves + + +def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): + """Load closing prices from yahoo finance. + + :Optional: + indexes : dict (Default: {'SPX': '^GSPC'}) + Financial indexes to load. + stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', + 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) + Stock closing prices to load. + start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) + Retrieve prices from start date on. + end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) + Retrieve prices until end date. + + :Note: + This is based on code presented in a talk by Wes McKinney: + http://wesmckinney.com/files/20111017/notebook_output.pdf + """ + + assert indexes is not None or stocks is not None, """ +must specify stocks or indexes""" + + if start is None: + start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) + + if not start is None and not end is None: + assert start < end, "start date is later than end date." + + data = OrderedDict() + + if stocks is not None: + for stock in stocks: + cache_filename = "{stock}-{start}-{end}.csv".format( + stock=stock, + start=start, + end=end).replace(':', '-') + cache_filepath = get_cache_filepath(_colon_to_semicolon(cache_filename)) + if os.path.exists(cache_filepath): + stkd = pd.DataFrame.from_csv(cache_filepath) + else: + stkd = DataReader(stock, 'yahoo', start, end).sort_index() + stkd.to_csv(cache_filepath) + data[stock] = stkd + + if indexes is not None: + for name, ticker in indexes.iteritems(): + stkd = DataReader(ticker, 'yahoo', start, end).sort_index() + data[name] = stkd + + return data + + +def _colon_to_semicolon(text): + return text.replace(":", ";") + + +def load_from_yahoo(indexes=None, + stocks=None, + start=None, + end=None, + adjusted=True): + """ + Loads price data from Yahoo into a dataframe for each of the indicated + securities. By default, 'price' is taken from Yahoo's 'Adjusted Close', + which removes the impact of splits and dividends. If the argument + 'adjusted' is False, then the non-adjusted 'close' field is used instead. + + :param indexes: Financial indexes to load. + :type indexes: dict + :param stocks: Stock closing prices to load. + :type stocks: list + :param start: Retrieve prices from start date on. + :type start: datetime + :param end: Retrieve prices until end date. + :type end: datetime + :param adjusted: Adjust the price for splits and dividends. + :type adjusted: bool + + """ + data = _load_raw_yahoo_data(indexes, stocks, start, end) + if adjusted: + close_key = 'Adj Close' + else: + close_key = 'Close' + df = pd.DataFrame({key: d[close_key] for key, d in data.iteritems()}) + df.index = df.index.tz_localize(pytz.utc) + return df + + +def load_bars_from_yahoo(indexes=None, + stocks=None, + start=None, + end=None, + adjusted=True): + """ + Loads data from Yahoo into a panel with the following + column names for each indicated security: + + - open + - high + - low + - close + - volume + - price + + Note that 'price' is Yahoo's 'Adjusted Close', which removes the + impact of splits and dividends. If the argument 'adjusted' is True, then + the open, high, low, and close values are adjusted as well. + + :param indexes: Financial indexes to load. + :type indexes: dict + :param stocks: Stock closing prices to load. + :type stocks: list + :param start: Retrieve prices from start date on. + :type start: datetime + :param end: Retrieve prices until end date. + :type end: datetime + :param adjusted: Adjust open/high/low/close for splits and dividends. + The 'price' field is always adjusted. + :type adjusted: bool + + """ + data = _load_raw_yahoo_data(indexes, stocks, start, end) + panel = pd.Panel(data) + # Rename columns + panel.minor_axis = ['open', 'high', 'low', 'close', 'volume', 'price'] + panel.major_axis = panel.major_axis.tz_localize(pytz.utc) + # Adjust data + if adjusted: + adj_cols = ['open', 'high', 'low', 'close'] + for ticker in panel.items: + ratio = (panel[ticker]['price'] / panel[ticker]['close']) + ratio_filtered = ratio.fillna(0).values + for col in adj_cols: + panel[ticker][col] *= ratio_filtered + return panel diff --git a/alephnull/data/loader_utils.py b/alephnull/data/loader_utils.py index 7ff5840..f919274 100644 --- a/alephnull/data/loader_utils.py +++ b/alephnull/data/loader_utils.py @@ -126,7 +126,7 @@ def _row_cb(mapping, row): return { target: apply_mapping(mapping, row) for target, mapping - in mapping.iteritems() + in mapping.items() } diff --git a/alephnull/data/loader_utils.py.bak b/alephnull/data/loader_utils.py.bak new file mode 100644 index 0000000..7ff5840 --- /dev/null +++ b/alephnull/data/loader_utils.py.bak @@ -0,0 +1,155 @@ +# +# Copyright 2012 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Various utilites used by different date loaders. + +Could stand to be broken up more into components. +e.g. the mapping utilities. + +""" + +import datetime + +import pytz + +from collections import namedtuple + +from functools import partial + + +def get_utc_from_exchange_time(naive): + local = pytz.timezone('US/Eastern') + local_dt = naive.replace(tzinfo=local) + utc_dt = local_dt.astimezone(pytz.utc) + return utc_dt + + +def get_exchange_time_from_utc(utc_dt): + """ + Takes in result from exchange time. + """ + dt = utc_dt.replace(tzinfo=pytz.utc) + local = pytz.timezone('US/Eastern') + dt = dt.astimezone(local) + + return dt + + +def guarded_conversion(conversion, str_val): + """ + Returns the result of applying the @conversion to @str_val + """ + if str_val in (None, ""): + return None + return conversion(str_val) + + +def safe_int(str_val): + """ + casts the @str_val to a float to handle the occassional + decimal point in int fields from data providers. + """ + f = float(str_val) + i = int(f) + return i + + +def date_conversion(date_str, date_pattern='%m/%d/%Y', to_utc=True): + """ + Convert date strings from TickData (or other source) into epoch values. + + Specify to_utc=False if the input date is already UTC (or is naive). + """ + dt = datetime.datetime.strptime(date_str, date_pattern) + if to_utc: + dt = get_utc_from_exchange_time(dt) + else: + dt = dt.replace(tzinfo=pytz.utc) + return dt + + +# Mapping is a structure for how want to convert the source data into +# the form we insert into the database. +# - conversion, a function used to convert source input to our target value +# - source, the key(s) in the original source to pass to the conversion +# method +# If a single string, then it's a direct lookup into the +# source row by that key +# If an iterator, pass the source to as a list of keys, +# in order, to the conversion function. +# If empty, then the conversion method provides a 'default' value. +Mapping = namedtuple('Mapping', ['conversion', 'source']) + + +def apply_mapping(mapping, row): + """ + Returns the value of a @mapping for a given @row. + + i.e. the @mapping.source values are extracted from @row and fed + into the @mapping.conversion method. + """ + if isinstance(mapping.source, str): + # Do a 'direct' conversion of one key from the source row. + return guarded_conversion(mapping.conversion, row[mapping.source]) + if mapping.source is None: + # For hardcoded values. + # conversion method will return a constant value + return mapping.conversion() + else: + # Assume we are using multiple source values. + # Feed the source values in order prescribed by mapping.source + # to mapping.conversion. + return mapping.conversion(*[row[source] for source in mapping.source]) + + +def _row_cb(mapping, row): + """ + Returns the dict created from our @mapping of the source @row. + + Not intended to be used directly, but rather to be the base of another + function that supplies the mapping value. + """ + return { + target: apply_mapping(mapping, row) + for target, mapping + in mapping.iteritems() + } + + +def make_row_cb(mapping): + """ + Returns a func that can be applied to a dict that returns the + application of the @mapping, which results in a dict. + """ + return partial(_row_cb, mapping) + + +def source_to_records(mappings, + source, + source_wrapper=None, + records_wrapper=None): + if source_wrapper: + source = source_wrapper(source) + + callback = make_row_cb(mappings) + + records = (callback(row) for row in source) + + if records_wrapper: + records = records_wrapper(records) + + return records diff --git a/alephnull/data/treasuries.py b/alephnull/data/treasuries.py index a2b325e..e44e93f 100644 --- a/alephnull/data/treasuries.py +++ b/alephnull/data/treasuries.py @@ -61,7 +61,7 @@ def get_treasury_rate(string_val): def treasury_mappings(mappings): return {key: Mapping(*value) for key, value - in mappings.iteritems()} + in mappings.items()} class iter_to_stream(object): diff --git a/alephnull/data/treasuries.py.bak b/alephnull/data/treasuries.py.bak new file mode 100644 index 0000000..a2b325e --- /dev/null +++ b/alephnull/data/treasuries.py.bak @@ -0,0 +1,156 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +import numpy as np +import pandas as pd +import requests + +from collections import OrderedDict +import xml.etree.ElementTree as ET + +from . loader_utils import ( + guarded_conversion, + safe_int, + Mapping, + date_conversion, + source_to_records +) + + +def get_treasury_date(dstring): + return date_conversion(dstring.split("T")[0], date_pattern='%Y-%m-%d', + to_utc=False) + + +def get_treasury_rate(string_val): + val = guarded_conversion(float, string_val) + if val is not None: + val = round(val / 100.0, 4) + return val + +_CURVE_MAPPINGS = { + 'tid': (safe_int, "Id"), + 'date': (get_treasury_date, "NEW_DATE"), + '1month': (get_treasury_rate, "BC_1MONTH"), + '3month': (get_treasury_rate, "BC_3MONTH"), + '6month': (get_treasury_rate, "BC_6MONTH"), + '1year': (get_treasury_rate, "BC_1YEAR"), + '2year': (get_treasury_rate, "BC_2YEAR"), + '3year': (get_treasury_rate, "BC_3YEAR"), + '5year': (get_treasury_rate, "BC_5YEAR"), + '7year': (get_treasury_rate, "BC_7YEAR"), + '10year': (get_treasury_rate, "BC_10YEAR"), + '20year': (get_treasury_rate, "BC_20YEAR"), + '30year': (get_treasury_rate, "BC_30YEAR"), +} + + +def treasury_mappings(mappings): + return {key: Mapping(*value) + for key, value + in mappings.iteritems()} + + +class iter_to_stream(object): + """ + Exposes an iterable as an i/o stream + """ + def __init__(self, iterable): + self.buffered = "" + self.iter = iter(iterable) + + def read(self, size): + result = "" + while size > 0: + data = self.buffered or next(self.iter, None) + self.buffered = "" + if data is None: + break + size -= len(data) + if size < 0: + data, self.buffered = data[:size], data[size:] + result += data + return result + + +def get_localname(element): + qtag = ET.QName(element.tag).text + return re.match("(\{.*\})(.*)", qtag).group(2) + + +def get_treasury_source(): + url = """\ +http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData\ +""" + res = requests.get(url, stream=True) + stream = iter_to_stream(res.iter_lines()) + + elements = ET.iterparse(stream, ('end', 'start-ns', 'end-ns')) + + namespaces = OrderedDict() + properties_xpath = [''] + + def updated_namespaces(): + if '' in namespaces and 'm' in namespaces: + properties_xpath[0] = "{%s}content/{%s}properties" % ( + namespaces[''], namespaces['m'] + ) + else: + properties_xpath[0] = '' + + for event, element in elements: + if event == 'end': + tag = get_localname(element) + if tag == "entry": + properties = element.find(properties_xpath[0]) + datum = {get_localname(node): node.text + for node in properties.getchildren() + if ET.iselement(node)} + # clear the element after we've dealt with it: + element.clear() + yield datum + + elif event == 'start-ns': + namespaces[element[0]] = element[1] + updated_namespaces() + + elif event == 'end-ns': + namespaces.popitem() + updated_namespaces() + + +def get_treasury_data(): + mappings = treasury_mappings(_CURVE_MAPPINGS) + source = get_treasury_source() + return source_to_records(mappings, source) + + +def dataconverter(s): + try: + return float(s) / 100 + except: + return np.nan + + +def get_daily_10yr_treasury_data(): + """Download daily 10 year treasury rates from the Federal Reserve and + return a pandas.Series.""" + url = "http://www.federalreserve.gov/datadownload/Output.aspx?rel=H15" \ + "&series=bcb44e57fb57efbe90002369321bfb3f&lastObs=&from=&to=" \ + "&filetype=csv&label=include&layout=seriescolumn" + return pd.read_csv(url, header=5, index_col=0, names=['DATE', 'BC_10YEAR'], + parse_dates=True, converters={1: dataconverter}, + squeeze=True) diff --git a/alephnull/data/treasuries_can.py b/alephnull/data/treasuries_can.py index 8562034..adfa6fe 100644 --- a/alephnull/data/treasuries_can.py +++ b/alephnull/data/treasuries_can.py @@ -84,7 +84,7 @@ def get_treasury_source(start_date=None, end_date=None): bill_row = "" while ",".join(BILLS) not in bill_row: - bill_row = bill_iter.next() + bill_row = next(bill_iter) if 'Daily series:' in bill_row: bill_end_date = datetime.datetime.strptime( bill_row.split(' - ')[1].strip(), @@ -93,7 +93,7 @@ def get_treasury_source(start_date=None, end_date=None): bond_row = "" while ",".join(BONDS) not in bond_row: - bond_row = bond_iter.next() + bond_row = next(bond_iter) if 'Daily series:' in bond_row: bond_end_date = datetime.datetime.strptime( bond_row.split(' - ')[1].strip(), @@ -102,14 +102,14 @@ def get_treasury_source(start_date=None, end_date=None): #Line up the two dates if bill_end_date > bond_end_date: - bill_iter.next() + next(bill_iter) elif bond_end_date > bill_end_date: - bond_iter.next() + next(bond_iter) for bill_row in bill_iter: - bond_row = bond_iter.next() - bill_dict = dict(zip(bill_header, bill_row.split(","))) - bond_dict = dict(zip(bond_header, bond_row.split(","))) + bond_row = next(bond_iter) + bill_dict = dict(list(zip(bill_header, bill_row.split(",")))) + bond_dict = dict(list(zip(bond_header, bond_row.split(",")))) if ' Bank holiday' in bond_row.split(",") + bill_row.split(","): continue if ' Not available' in bond_row.split(",") + bill_row.split(","): diff --git a/alephnull/data/treasuries_can.py.bak b/alephnull/data/treasuries_can.py.bak new file mode 100644 index 0000000..8562034 --- /dev/null +++ b/alephnull/data/treasuries_can.py.bak @@ -0,0 +1,125 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import requests + +from . loader_utils import ( + source_to_records +) + +from alephnull.data.treasuries import ( + treasury_mappings, get_treasury_date, get_treasury_rate +) + + +_CURVE_MAPPINGS = { + 'date': (get_treasury_date, "Date"), + '1month': (get_treasury_rate, "V39063"), + '3month': (get_treasury_rate, "V39065"), + '6month': (get_treasury_rate, "V39066"), + '1year': (get_treasury_rate, "V39067"), + '2year': (get_treasury_rate, "V39051"), + '3year': (get_treasury_rate, "V39052"), + '5year': (get_treasury_rate, "V39053"), + '7year': (get_treasury_rate, "V39054"), + '10year': (get_treasury_rate, "V39055"), + #Bank of Canada refers to this as 'Long' Rate, approximately 30 years. + '30year': (get_treasury_rate, "V39056"), +} + +BILLS = ['V39063', 'V39065', 'V39066', 'V39067'] +BONDS = ['V39051', 'V39052', 'V39053', 'V39054', 'V39055', 'V39056'] + + +def get_treasury_source(start_date=None, end_date=None): + + today = datetime.date.today() + #Bank of Canada only has 10 years of data and has this in the URL. + restriction = datetime.date(today.year-10, today.month, today.day) + + if not end_date: + end_date = today + + if not start_date: + start_date = restriction + + bill_url = ( + "http://www.bankofcanada.ca/stats/results/csv?" + "lP=lookup_tbill_yields.php&sR={restrict}&se=" + "L_V39063-L_V39065-L_V39066-L_V39067&dF={start}&dT={end}" + .format(restrict=restriction.strftime("%Y-%m-%d"), + start=start_date.strftime("%Y-%m-%d"), + end=end_date.strftime("%Y-%m-%d"), + ) + ) + + bond_url = ( + "http://www.bankofcanada.ca/stats/results/csv?" + "lP=lookup_bond_yields.php&sR={restrict}&se=" + "L_V39051-L_V39052-L_V39053-L_V39054-L_V39055-L_V39056" + "&dF={start}&dT={end}" + .format(restrict=restriction.strftime("%Y-%m-%d"), + start=start_date.strftime("%Y-%m-%d"), + end=end_date.strftime("%Y-%m-%d") + ) + ) + + res_bill = requests.get(bill_url, stream=True) + res_bond = requests.get(bond_url, stream=True) + bill_iter = res_bill.iter_lines() + bond_iter = res_bond.iter_lines() + + bill_row = "" + while ",".join(BILLS) not in bill_row: + bill_row = bill_iter.next() + if 'Daily series:' in bill_row: + bill_end_date = datetime.datetime.strptime( + bill_row.split(' - ')[1].strip(), + "%Y-%m-%d").date() + bill_header = bill_row.split(",") + + bond_row = "" + while ",".join(BONDS) not in bond_row: + bond_row = bond_iter.next() + if 'Daily series:' in bond_row: + bond_end_date = datetime.datetime.strptime( + bond_row.split(' - ')[1].strip(), + "%Y-%m-%d").date() + bond_header = bond_row.split(",") + + #Line up the two dates + if bill_end_date > bond_end_date: + bill_iter.next() + elif bond_end_date > bill_end_date: + bond_iter.next() + + for bill_row in bill_iter: + bond_row = bond_iter.next() + bill_dict = dict(zip(bill_header, bill_row.split(","))) + bond_dict = dict(zip(bond_header, bond_row.split(","))) + if ' Bank holiday' in bond_row.split(",") + bill_row.split(","): + continue + if ' Not available' in bond_row.split(",") + bill_row.split(","): + continue + + bill_dict.update(bond_dict) + yield bill_dict + + +def get_treasury_data(): + mappings = treasury_mappings(_CURVE_MAPPINGS) + source = get_treasury_source() + return source_to_records(mappings, source) diff --git a/alephnull/examples/FuturesTradingAlgorithm.py b/alephnull/examples/FuturesTradingAlgorithm.py index 059e0e6..38d8e4b 100644 --- a/alephnull/examples/FuturesTradingAlgorithm.py +++ b/alephnull/examples/FuturesTradingAlgorithm.py @@ -43,7 +43,7 @@ def add_margin_to_bars(self, data): # Ideally we would use SPAN margining; however, based on some naive data analysis, # the max a stock changes in a several day period (up to 30 days) is about 42%. # Change this when you have a better strategy! - for symbol, measures in data.iteritems(): + for symbol, measures in data.items(): initial_margin = measures['price'] * 0.42 maintenance_margin = measures['price'] * 0.32 measures.__dict__.update({'initial_margin': initial_margin}) @@ -62,7 +62,7 @@ def handle_data(self, data): self.total_maintenance_margin = 0 # update margin account - for symbol, measures in data.iteritems(): + for symbol, measures in data.items(): position = self.perf_tracker.cumulative_performance.positions[symbol] last_price = self.last_prices.get(symbol) price = measures['price'] @@ -71,7 +71,7 @@ def handle_data(self, data): self.last_prices[symbol] = price self.total_maintenance_margin += measures['maintenance_margin'] - timestamp = next(data[0].iteritems() if type(data) is list else data.iteritems())[1]['datetime'] + timestamp = next(iter(data[0].items()) if type(data) is list else iter(data.items()))[1]['datetime'] self._margin_account_log[timestamp] = self.margin_account_value @@ -106,7 +106,7 @@ def _liquidate_random_positions(self): """Liquidate an entire position (the position in particular is chosen at random) until we are back above maintenance margin.""" while self.margin_account_value < self.total_maintenance_margin: - positions_as_list = self.perf_tracker.cumulative_performance.positions.items()[:] + positions_as_list = list(self.perf_tracker.cumulative_performance.positions.items())[:] chosen_symbol, chosen_position = positions_as_list[random.randint(0, len(positions_as_list) - 1)] TradingAlgorithm.order(self, chosen_symbol, chosen_position.amount) positions_as_list.remove((chosen_symbol, chosen_position)) diff --git a/alephnull/examples/FuturesTradingAlgorithm.py.bak b/alephnull/examples/FuturesTradingAlgorithm.py.bak new file mode 100644 index 0000000..059e0e6 --- /dev/null +++ b/alephnull/examples/FuturesTradingAlgorithm.py.bak @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# +# Copyright 2013 Carter Bain Wealth Management +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import matplotlib.pyplot as plt +from datetime import datetime +import pytz +import random + +from alephnull.algorithm import TradingAlgorithm +from alephnull.utils.factory import load_from_yahoo + +from collections import OrderedDict +from pandas.core.series import TimeSeries + + +class FuturesTradingAlgorithm(TradingAlgorithm): + """A wrapper around TradingAlgorithm that adds calculations for futures contracts. + + In order to have everything work in subclasses, you have to do several things: + + Create a method "_handle_margin_call(self, data) that is executed if you go below maintenance margin. + Instead of handle_data(), create handle_futures_data() + Instead of initialize(), create initialize_futures() + + """ + + def add_margin_to_bars(self, data): + # Uses some strategy to get the price at some bar and calculate appropriate + # initial and maintenance margins for that bar. + # Ideally we would use SPAN margining; however, based on some naive data analysis, + # the max a stock changes in a several day period (up to 30 days) is about 42%. + # Change this when you have a better strategy! + for symbol, measures in data.iteritems(): + initial_margin = measures['price'] * 0.42 + maintenance_margin = measures['price'] * 0.32 + measures.__dict__.update({'initial_margin': initial_margin}) + measures.__dict__.update({'maintenance_margin': maintenance_margin}) + + def initialize(self, *args, **kwargs): + self._margin_account_log = OrderedDict() + self.margin_account_value = 100000 + self.last_prices = {} + self.initialize_futures(*args, **kwargs) + # self.max_leverage = 1.5 + + def handle_data(self, data): + + self.add_margin_to_bars(data) + self.total_maintenance_margin = 0 + + # update margin account + for symbol, measures in data.iteritems(): + position = self.perf_tracker.cumulative_performance.positions[symbol] + last_price = self.last_prices.get(symbol) + price = measures['price'] + if last_price is not None: + self.margin_account_value += (price - last_price) * position.amount + self.last_prices[symbol] = price + self.total_maintenance_margin += measures['maintenance_margin'] + + timestamp = next(data[0].iteritems() if type(data) is list else data.iteritems())[1]['datetime'] + + self._margin_account_log[timestamp] = self.margin_account_value + + if self.margin_account_value < self.total_maintenance_margin: + self._handle_margin_call() + self.handle_futures_data(data) + + def order(self, sid, amount, initial_margin, limit_price=None, stop_price=None): + # TODO: get rid of the initial_margin parameter when we can figure that out from inside this method + # Check if there's enough in the margin account to cover initial margin + if self.margin_account_value > self.total_maintenance_margin + initial_margin * amount: + TradingAlgorithm.order(self, sid, amount, limit_price, stop_price) + else: + # there shouldn't be an exception here, right? + # TODO: log once you figure out how zipline's logging works + pass + + def handle_futures_data(self): + """Up to subclasses to implement""" + pass + + def initialize_futures(self, *args, **kwargs): + """Up to subclasses to implement""" + pass + + def _handle_margin_call(self): + """Up to subclasses to implement, though this class does provide a few premade procedures + like _liquidate_random_positions""" + pass + + def _liquidate_random_positions(self): + """Liquidate an entire position (the position in particular is chosen at random) until we are back above + maintenance margin.""" + while self.margin_account_value < self.total_maintenance_margin: + positions_as_list = self.perf_tracker.cumulative_performance.positions.items()[:] + chosen_symbol, chosen_position = positions_as_list[random.randint(0, len(positions_as_list) - 1)] + TradingAlgorithm.order(self, chosen_symbol, chosen_position.amount) + positions_as_list.remove((chosen_symbol, chosen_position)) + + self.total_maintenance_margin = sum( + [position.last_sale_price * 0.32 * position.amount for symbol, position in positions_as_list]) + + @property + def margin_account_log(self): + return TimeSeries(self._margin_account_log) + + +class BuyGoogleAsFuture(FuturesTradingAlgorithm): + + def initialize_futures(self, *args, **kwargs): + pass + + def handle_futures_data(self, data): + self.order("GOOG", 1, initial_margin=data['GOOG']['initial_margin']) + + def _handle_margin_call(self): + self._liquidate_random_positions() + +if __name__ == '__main__': + start = datetime(2008, 1, 1, 0, 0, 0, 0, pytz.utc) + end = datetime(2013, 1, 1, 0, 0, 0, 0, pytz.utc) + data = load_from_yahoo(stocks=["GOOG"], indexes={}, start=start, + end=end, adjusted=True) + simple_algo = BuyGoogleAsFuture() + results = simple_algo.run(data) + + ax1 = plt.subplot(211) + futures_indexes = list(simple_algo.margin_account_log.keys()) + futures_margin_data = list(simple_algo.margin_account_log.values) + + futures_margin_series = TimeSeries(index=futures_indexes, data=futures_margin_data) + futures_margin_series.plot(ax=ax1) + + ax2 = plt.subplot(212, sharex=ax1) + data.GOOG.plot(ax=ax2) + + plt.gcf().set_size_inches(18, 8) \ No newline at end of file diff --git a/alephnull/examples/buystockasfuture.py b/alephnull/examples/buystockasfuture.py index 2bf8189..c37f7f3 100644 --- a/alephnull/examples/buystockasfuture.py +++ b/alephnull/examples/buystockasfuture.py @@ -62,7 +62,7 @@ def handle_data(self, data): # overload handle_data() method initial_quantity = 50 self.order(SYMBOL, initial_quantity) position.margin += initial_margin * initial_quantity - print(position.margin) + print((position.margin)) self._first_pass = False self.last_price = price return @@ -73,7 +73,7 @@ def handle_data(self, data): # overload handle_data() method quantity_owned = position.amount margin = position.margin # don't ask... - timestamp = next(data[0].iteritems() if type(data) is list else data.iteritems())[1]['datetime'] + timestamp = next(iter(data[0].items()) if type(data) is list else iter(data.items()))[1]['datetime'] TRACK.append((margin, quantity_owned, timestamp)) if maintenance_margin * quantity_owned > margin: diff --git a/alephnull/examples/buystockasfuture.py.bak b/alephnull/examples/buystockasfuture.py.bak new file mode 100644 index 0000000..2bf8189 --- /dev/null +++ b/alephnull/examples/buystockasfuture.py.bak @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# +# Copyright 2013 Carter Bain Wealth Management +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import matplotlib.pyplot as plt +from datetime import datetime +import pytz + +from alephnull.algorithm import TradingAlgorithm +from alephnull.utils.factory import load_from_yahoo + +from pandas.core.series import TimeSeries + +SYMBOL = 'GS' +TRACK = [] +DAT = [None] +DIFFS = [] +SHORTFALL_STRATEGY = "sell" + + +class BuyStock(TradingAlgorithm): + """This is the simplest possible algorithm that does nothing but + buy 1 share of SYMBOL on each event. + """ + + def add_margin(self, data): + # Uses some strategy to get the price at some bar and calculate appropriate + # initial and maintenance margins for that bar. + # Ideally we would use SPAN margining; however, based on some naive data analysis, + # the max a stock changes in a several day period (up to 30 days) is about 42%. + # Change this when you have a better strategy! + initial_margin = data[SYMBOL]['price'] * 0.42 + maintenance_margin = data[SYMBOL]['price'] * 0.32 + data[SYMBOL].__dict__.update({'initial_margin': initial_margin}) + data[SYMBOL].__dict__.update({'maintenance_margin': maintenance_margin}) + + def initialize(self, *args, **kwargs): + self._first_pass = True + self.futures_results + + def handle_data(self, data): # overload handle_data() method + DAT[0] = data + self.add_margin(data) + position = self.perf_tracker.cumulative_performance.positions[SYMBOL] + maintenance_margin = data[SYMBOL]['maintenance_margin'] + initial_margin = data[SYMBOL]['initial_margin'] + price = data[SYMBOL].price + + if self._first_pass: + initial_quantity = 50 + self.order(SYMBOL, initial_quantity) + position.margin += initial_margin * initial_quantity + print(position.margin) + self._first_pass = False + self.last_price = price + return + else: + DIFFS.append((self.last_price - price) / price) + + + quantity_owned = position.amount + margin = position.margin + # don't ask... + timestamp = next(data[0].iteritems() if type(data) is list else data.iteritems())[1]['datetime'] + + TRACK.append((margin, quantity_owned, timestamp)) + if maintenance_margin * quantity_owned > margin: + if SHORTFALL_STRATEGY == "sell": + TRACK.append("SELL") + # sell enough so that your margin account is back above initial margin for every contract + quantity_to_sell = int(initial_margin * quantity_owned ** 2 / margin - quantity_owned) + 1 + self.order(SYMBOL, -1*quantity_to_sell) + if quantity_to_sell == 0: + TRACK.append(str(timestamp) + " had a 0-sell!") + elif SHORTFALL_STRATEGY == "buffer": + # put some more money from elsewhere into the account + pass + elif margin > 1.5*(maintenance_margin * quantity_owned): + # we've got too much in margin - we need to make our money work for us! + # buy as many contracts as we can until buying another would put us under + # 1.25 * required margin + TRACK.append("BUY") + max_funds_available = margin - 1.25*(maintenance_margin * quantity_owned) + quantity_to_buy = int(max_funds_available / initial_margin) + + + # we don't have to update the margin because the same amount of cash is still in the margin account, + # it is just distributed over a larger number of contracts + if quantity_to_buy == 0: + TRACK.append("0 to buy, what a shame") + else: + self.order(SYMBOL, quantity_to_buy) # order SID (=0) and amount (=1 shares) + + if quantity_to_buy == 0: + TRACK.append(str(timestamp) + " had a 0-sell!") + + self.last_price = price + + +if __name__ == '__main__': + start = datetime(2008, 1, 1, 0, 0, 0, 0, pytz.utc) + end = datetime(2013, 1, 1, 0, 0, 0, 0, pytz.utc) + data = load_from_yahoo(stocks=[SYMBOL], indexes={}, start=start, + end=end, adjusted=True) + simple_algo = BuyStock() + results = simple_algo.run(data) + + ax1 = plt.subplot(211) + ax2 = plt.subplot(212) + TRACK_STRIPPED = [x for x in TRACK if type(x) == tuple] + futures_indexes = [timestamp for (_, _, timestamp) in TRACK_STRIPPED] + futures_quantity_data = [quantity_owned for (_, quantity_owned, _) in TRACK_STRIPPED] + futures_margin_data = [margin for (margin, _, _) in TRACK_STRIPPED] + + futures_margin_series = TimeSeries(index=futures_indexes, data=futures_margin_data) + futures_margin_series.plot(ax=ax1) + futures_quantity_series = TimeSeries(index=futures_indexes, data=futures_quantity_data) + futures_quantity_series.plot(ax=ax2) + + plt.gcf().set_size_inches(18, 8) \ No newline at end of file diff --git a/alephnull/examples/dual_moving_average.py b/alephnull/examples/dual_moving_average.py index f641b9d..d7b09d7 100644 --- a/alephnull/examples/dual_moving_average.py +++ b/alephnull/examples/dual_moving_average.py @@ -96,5 +96,5 @@ def handle_data(self, data): plt.legend(loc=0) sharpe = [risk['sharpe'] for risk in dma.risk_report['one_month']] - print "Monthly Sharpe ratios:", sharpe + print("Monthly Sharpe ratios:", sharpe) plt.gcf().set_size_inches(18, 8) diff --git a/alephnull/examples/dual_moving_average.py.bak b/alephnull/examples/dual_moving_average.py.bak new file mode 100644 index 0000000..f641b9d --- /dev/null +++ b/alephnull/examples/dual_moving_average.py.bak @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import matplotlib.pyplot as plt + +from alephnull.algorithm import TradingAlgorithm +from alephnull.finance import trading +from alephnull.transforms import MovingAverage +from alephnull.utils.factory import load_from_yahoo + +from datetime import datetime +import pytz + + +class DualMovingAverage(TradingAlgorithm): + """Dual Moving Average Crossover algorithm. + + This algorithm buys apple once its short moving average crosses + its long moving average (indicating upwards momentum) and sells + its shares once the averages cross again (indicating downwards + momentum). + + """ + def initialize(self, short_window=20, long_window=40): + # Add 2 mavg transforms, one with a long window, one + # with a short window. + self.add_transform(MovingAverage, 'short_mavg', ['price'], + window_length=short_window) + + self.add_transform(MovingAverage, 'long_mavg', ['price'], + window_length=long_window) + + # To keep track of whether we invested in the stock or not + self.invested = False + + def handle_data(self, data): + self.short_mavg = data['AAPL'].short_mavg['price'] + self.long_mavg = data['AAPL'].long_mavg['price'] + self.buy = False + self.sell = False + + if self.short_mavg > self.long_mavg and not self.invested: + self.order('AAPL', 5000) + self.invested = True + self.buy = True + elif self.short_mavg < self.long_mavg and self.invested: + self.order('AAPL', -5000) + self.invested = False + self.sell = True + + self.record(short_mavg=self.short_mavg, + long_mavg=self.long_mavg, + buy=self.buy, + sell=self.sell) + +if __name__ == '__main__': + start = datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) + end = datetime(1991, 1, 1, 0, 0, 0, 0, pytz.utc) + data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start, + end=end) + + dma = DualMovingAverage() + results = dma.run(data) + + br = trading.environment.benchmark_returns + bm_returns = br[(br.index >= start) & (br.index <= end)] + results['benchmark_returns'] = (1 + bm_returns).cumprod().values + results['algorithm_returns'] = (1 + results.returns).cumprod() + fig = plt.figure() + ax1 = fig.add_subplot(211, ylabel='cumulative returns') + + results[['algorithm_returns', 'benchmark_returns']].plot(ax=ax1, + sharex=True) + + ax2 = fig.add_subplot(212) + data['AAPL'].plot(ax=ax2, color='r') + results[['short_mavg', 'long_mavg']].plot(ax=ax2) + + ax2.plot(results.ix[results.buy].index, results.short_mavg[results.buy], + '^', markersize=10, color='m') + ax2.plot(results.ix[results.sell].index, results.short_mavg[results.sell], + 'v', markersize=10, color='k') + plt.legend(loc=0) + + sharpe = [risk['sharpe'] for risk in dma.risk_report['one_month']] + print "Monthly Sharpe ratios:", sharpe + plt.gcf().set_size_inches(18, 8) diff --git a/alephnull/examples/pairtrade.py b/alephnull/examples/pairtrade.py index 716e4c4..9ee863b 100644 --- a/alephnull/examples/pairtrade.py +++ b/alephnull/examples/pairtrade.py @@ -133,4 +133,4 @@ def sell_spread(self): plt.ylabel('zscored spread') plt.show() - raw_input() \ No newline at end of file + input() \ No newline at end of file diff --git a/alephnull/examples/pairtrade.py.bak b/alephnull/examples/pairtrade.py.bak new file mode 100644 index 0000000..716e4c4 --- /dev/null +++ b/alephnull/examples/pairtrade.py.bak @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import matplotlib.pyplot as plt +import numpy as np +import statsmodels.api as sm +from datetime import datetime +import pytz + +from alephnull.algorithm import TradingAlgorithm +from alephnull.transforms import batch_transform +from alephnull.utils.factory import load_from_yahoo + + +@batch_transform +def ols_transform(data, sid1, sid2): + """Computes regression coefficient (slope and intercept) + via Ordinary Least Squares between two SIDs. + """ + p0 = data.price[sid1] + p1 = sm.add_constant(data.price[sid2], prepend=True) + slope, intercept = sm.OLS(p0, p1).fit().params + + return slope, intercept + + +class Pairtrade(TradingAlgorithm): + """Pairtrading relies on cointegration of two stocks. + + The expectation is that once the two stocks drifted apart + (i.e. there is spread), they will eventually revert again. Thus, + if we short the upward drifting stock and long the downward + drifting stock (in short, we buy the spread) once the spread + widened we can sell the spread with profit once they converged + again. A nice property of this algorithm is that we enter the + market in a neutral position. + + This specific algorithm tries to exploit the cointegration of + Pepsi and Coca Cola by estimating the correlation between the + two. Divergence of the spread is evaluated by z-scoring. + """ + + def initialize(self, window_length=100): + self.spreads = [] + self.invested = 0 + self.window_length = window_length + self.ols_transform = ols_transform(refresh_period=self.window_length, + window_length=self.window_length) + + def handle_data(self, data): + ###################################################### + # 1. Compute regression coefficients between PEP and KO + params = self.ols_transform.handle_data(data, 'PEP', 'KO') + if params is None: + return + intercept, slope = params + + ###################################################### + # 2. Compute spread and zscore + zscore = self.compute_zscore(data, slope, intercept) + self.record(zscores=zscore) + + ###################################################### + # 3. Place orders + self.place_orders(data, zscore) + + def compute_zscore(self, data, slope, intercept): + """1. Compute the spread given slope and intercept. + 2. zscore the spread. + """ + spread = (data['PEP'].price - (slope * data['KO'].price + intercept)) + self.spreads.append(spread) + spread_wind = self.spreads[-self.window_length:] + zscore = (spread - np.mean(spread_wind)) / np.std(spread_wind) + return zscore + + def place_orders(self, data, zscore): + """Buy spread if zscore is > 2, sell if zscore < .5. + """ + if zscore >= 2.0 and not self.invested: + self.order('PEP', int(100 / data['PEP'].price)) + self.order('KO', -int(100 / data['KO'].price)) + self.invested = True + elif zscore <= -2.0 and not self.invested: + self.order('PEP', -int(100 / data['PEP'].price)) + self.order('KO', int(100 / data['KO'].price)) + self.invested = True + elif abs(zscore) < .5 and self.invested: + self.sell_spread() + self.invested = False + + def sell_spread(self): + """ + decrease exposure, regardless of position long/short. + buy for a short position, sell for a long. + """ + ko_amount = self.portfolio.positions['KO'].amount + self.order('KO', -1 * ko_amount) + pep_amount = self.portfolio.positions['PEP'].amount + self.order('PEP', -1 * pep_amount) + + +if __name__ == '__main__': + start = datetime(2013, 1, 1, 0, 0, 0, 0, pytz.utc) + end = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) + data = load_from_yahoo(stocks=['PEP', 'KO'], indexes={}, + start=start, end=end) + + pairtrade = Pairtrade() + results = pairtrade.run(data) + data['spreads'] = np.nan + + ax1 = plt.subplot(211) + data[['PEP', 'KO']].plot(ax=ax1) + plt.ylabel('price') + plt.setp(ax1.get_xticklabels(), visible=False) + + ax2 = plt.subplot(212, sharex=ax1) + results.zscores.plot(ax=ax2, color='r') + plt.ylabel('zscored spread') + + plt.show() + raw_input() \ No newline at end of file diff --git a/alephnull/examples/test_algo.py b/alephnull/examples/test_algo.py index 0a9b838..21a68ee 100644 --- a/alephnull/examples/test_algo.py +++ b/alephnull/examples/test_algo.py @@ -37,15 +37,15 @@ contracts = np.ravel([[(''.join(month(string.letters[:26])) + str(np.random.choice([14, 15, 16])))] * len(cols) - for x in xrange(len(source.columns) / len(cols) / 2)]) + for x in range(len(source.columns) / len(cols) / 2)]) level_1 = len(source.columns) / len(contracts) * list(contracts) numsyms = len(source.columns) / (len(set(level_1)) * len(cols)) -underlyings = [''.join(sym(string.letters[:26])) for x in xrange(numsyms)] +underlyings = [''.join(sym(string.letters[:26])) for x in range(numsyms)] level_0 = np.ravel([[sym] * len(set(level_1)) * len(cols) for sym in underlyings]) -source.columns = pd.MultiIndex.from_tuples(zip(level_0, level_1, source.columns)) +source.columns = pd.MultiIndex.from_tuples(list(zip(level_0, level_1, source.columns))) source.index = pd.date_range(start=dt.datetime.utcnow() - dt.timedelta(days=len(source.index) - 1), end=dt.datetime.utcnow(), freq='D') @@ -55,7 +55,7 @@ class FrontTrader(TradingAlgorithm): @roll(lambda x: x[x['open_interest'] == x['open_interest'].max()]) def handle_data(self, data): - for sym in data.keys(): + for sym in list(data.keys()): self.order((sym, data[sym]['contract']), 2) return data diff --git a/alephnull/examples/test_algo.py.bak b/alephnull/examples/test_algo.py.bak new file mode 100644 index 0000000..0a9b838 --- /dev/null +++ b/alephnull/examples/test_algo.py.bak @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Copyright 2013 Carter Bain Wealth Management +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime as dt +import string + +import numpy as np +import pandas as pd +from pandas import DataFrame + +from alephnull.algorithm import TradingAlgorithm +from alephnull.sources.futures_data_frame_source import FuturesDataFrameSource +from alephnull.roll_method import roll + + +source = DataFrame(np.random.uniform(100, 200, [60, 30])) +cols = ['price', 'volume', 'open_interest'] +scale = (len(source.columns) / len(cols)) +source.columns = [scale * cols] +sym = lambda x: np.random.choice([abc for abc in x], + np.random.choice([2, 3])) +month = lambda x: np.random.choice([abc for abc in x], + np.random.choice([1])) + +contracts = np.ravel([[(''.join(month(string.letters[:26])) + + str(np.random.choice([14, 15, 16])))] * len(cols) + for x in xrange(len(source.columns) / len(cols) / 2)]) + +level_1 = len(source.columns) / len(contracts) * list(contracts) + +numsyms = len(source.columns) / (len(set(level_1)) * len(cols)) +underlyings = [''.join(sym(string.letters[:26])) for x in xrange(numsyms)] +level_0 = np.ravel([[sym] * len(set(level_1)) * len(cols) for sym in underlyings]) + +source.columns = pd.MultiIndex.from_tuples(zip(level_0, level_1, source.columns)) +source.index = pd.date_range(start=dt.datetime.utcnow() - dt.timedelta(days=len(source.index) - 1), + end=dt.datetime.utcnow(), freq='D') + +futdata = FuturesDataFrameSource(source.tz_localize('UTC')) + + +class FrontTrader(TradingAlgorithm): + @roll(lambda x: x[x['open_interest'] == x['open_interest'].max()]) + def handle_data(self, data): + for sym in data.keys(): + self.order((sym, data[sym]['contract']), 2) + return data + + +bot = FrontTrader() +stats = bot.run(futdata) diff --git a/alephnull/experiment/dummy_futures_data_generator.py b/alephnull/experiment/dummy_futures_data_generator.py index 73301ad..51d6a06 100644 --- a/alephnull/experiment/dummy_futures_data_generator.py +++ b/alephnull/experiment/dummy_futures_data_generator.py @@ -51,7 +51,7 @@ CONTRACT_OUT_LIMIT = LESS_CONTRACT_OUT_LIMIT STEP = LESS_STEP -CONTRACT_COUNT = sum([sum([1 for m in month_list]) for month_list in [x for x in ACCEPTABLE_SYMBOLS.itervalues()]]) +CONTRACT_COUNT = sum([sum([1 for m in month_list]) for month_list in [x for x in ACCEPTABLE_SYMBOLS.values()]]) @@ -68,7 +68,7 @@ def __init__(self, iterator): def __iter__(self): return self - def next(self): + def __next__(self): self.last_element = self.current_element self.current_element = next(self.iterator) return self.current_element @@ -78,7 +78,7 @@ def last(self): def lazy_contracts(): - for symbol, months in ACCEPTABLE_SYMBOLS.iteritems(): + for symbol, months in ACCEPTABLE_SYMBOLS.items(): for month in list(months): for year in range(datetime.date.today().year, CONTRACT_OUT_LIMIT + 1): short_year = year - 2000 @@ -159,12 +159,12 @@ def create_dummy_universe_dict(): def dataframe_from_universe_dict(universe_dict): timestamps = [] outer_frames = [] - for timestamp, hl_ticker_dict in universe_dict.iteritems(): + for timestamp, hl_ticker_dict in universe_dict.items(): timestamps.append(timestamp) inner_frames = [] hl_tickers = [] - for hl_ticker, low_level_ticker_dict in hl_ticker_dict.iteritems(): + for hl_ticker, low_level_ticker_dict in hl_ticker_dict.items(): hl_tickers.append(hl_ticker) inner_frames.append(DataFrame.from_dict(low_level_ticker_dict, orient='index')) hl_ticker_frame = pd.concat(inner_frames, keys=hl_tickers) diff --git a/alephnull/experiment/dummy_futures_data_generator.py.bak b/alephnull/experiment/dummy_futures_data_generator.py.bak new file mode 100644 index 0000000..73301ad --- /dev/null +++ b/alephnull/experiment/dummy_futures_data_generator.py.bak @@ -0,0 +1,224 @@ +"""Generates fairly plausible random dummy universe data + +Naming conventions: + +symbol - a high level symbol (i.e. a "contract constructor" in functional terms) like YG +contract - a low level symbol that represents a specific contract like ZLF17 +""" + +from pandas.tslib import Timestamp +from pandas.core.frame import DataFrame +from collections import OrderedDict +import datetime +import random +import pytz +import pandas as pd + +# Presets + +MORE_ACCEPTABLE_SYMBOLS = { + "GC": "GHMQVZ", + "SI": "HKNUZ", + "HG": "HKNUZ", + "PL": "FJNV", + "PA": "HMUZ", + "CT": "HKNVZ", + "OJ": "FHKNUX", + "KC": "HKNUZ", + "ES": "HMUZ", + "LE": "GJMQVZ", + "ZL": "FHKNQUVZ", +} + +LESS_ACCEPTABLE_SYMBOLS = { + "GC": "GHV", + "HG": "HUZ", +} + +MORE_BAR_RANGE = (Timestamp('2013-05-13 13:30:00+0000', tz='UTC'), Timestamp('2013-09-11 20:30:00+0000', tz='UTC')) +LESS_BAR_RANGE = (Timestamp('2013-05-13 13:30:00+0000', tz='UTC'), Timestamp('2013-05-15 20:30:00+0000', tz='UTC')) + +MORE_CONTRACT_OUT_LIMIT = 2020 +LESS_CONTRACT_OUT_LIMIT = 2014 + +MORE_STEP = datetime.timedelta(minutes=30) +LESS_STEP = datetime.timedelta(days=1) + +# Configuration + +ACCEPTABLE_SYMBOLS = LESS_ACCEPTABLE_SYMBOLS +BAR_RANGE = LESS_BAR_RANGE +CONTRACT_OUT_LIMIT = LESS_CONTRACT_OUT_LIMIT +STEP = LESS_STEP + +CONTRACT_COUNT = sum([sum([1 for m in month_list]) for month_list in [x for x in ACCEPTABLE_SYMBOLS.itervalues()]]) + + + + +class PrevIterator(object): + """Iterator with the capability to fetch the previous element + (though history does not go back any farther). + """ + def __init__(self, iterator): + self.iterator = iterator + self.current_element = None + self.last_element = None + + def __iter__(self): + return self + + def next(self): + self.last_element = self.current_element + self.current_element = next(self.iterator) + return self.current_element + + def last(self): + return self.last_element + + +def lazy_contracts(): + for symbol, months in ACCEPTABLE_SYMBOLS.iteritems(): + for month in list(months): + for year in range(datetime.date.today().year, CONTRACT_OUT_LIMIT + 1): + short_year = year - 2000 + yield (symbol, month, str(short_year)) + + +def lazy_timestamps(): + start = BAR_RANGE[0] + end = BAR_RANGE[1] + exchange_opens = datetime.time(hour=13, minute=30) # UTC + exchange_closes = datetime.time(hour=20, minute=0) # UTC + step = STEP + + running_timestamp = start + while running_timestamp <= end: + yield running_timestamp + if exchange_opens <= running_timestamp.time() <= exchange_closes: + running_timestamp += step + elif running_timestamp.time() < exchange_opens: + d = running_timestamp.date() + z = running_timestamp.tz + running_timestamp = Timestamp(d, exchange_opens, z) + elif running_timestamp.time() > exchange_closes: + d = running_timestamp.date() + z = running_timestamp.tz + running_timestamp = datetime.datetime.combine(d + datetime.timedelta(days=1), exchange_opens) + running_timestamp = running_timestamp.replace(tzinfo=pytz.UTC) + running_timestamp = Timestamp(running_timestamp) + + +def create_dummy_universe_dict(): + """ + WARNING: Because the underlying data structure has to be highly nested, the logic in here + will be highly nested. + """ + universe_dict = OrderedDict() + timestamps = PrevIterator(lazy_timestamps()) + for timestamp in timestamps: + universe_dict[timestamp] = {} + for symbol, month, short_year in lazy_contracts(): + if symbol not in universe_dict[timestamp]: + universe_dict[timestamp][symbol] = {} + expiry = month + str(short_year) + universe_dict[timestamp][symbol][expiry] = {} + + if timestamps.last() in universe_dict: + old_price = universe_dict[timestamps.last()][symbol][expiry]["Price"] + price_percent_change = 0.1 + new_price = random.gauss(mu=old_price, sigma=old_price * price_percent_change) + + old_open_interest = universe_dict[timestamps.last()][symbol][expiry]["Open Interest"] + open_interest_percent_change = 0.1 + new_open_interest = random.gauss(mu=old_open_interest, + sigma=old_open_interest * open_interest_percent_change) + + # For now, assume all margin requirements stay static. + # In the future: read the SPAN Margining handout for an algorithm + old_margin_requirements = universe_dict[timestamps.last()][symbol][expiry]["Margin Requirements"] + new_margin_requirements = old_margin_requirements + else: + # First price + new_price = random.random() * 100 + new_open_interest = random.random() * 2000 + new_margin_requirements = 100.00 + + new_price = round(new_price, 2) + universe_dict[timestamp][symbol][expiry]["Price"] = new_price + + new_open_interest = int(round(new_open_interest, 0)) + universe_dict[timestamp][symbol][expiry]["Open Interest"] = new_open_interest + + new_margin_requirements = round(new_margin_requirements, 2) + universe_dict[timestamp][symbol][expiry]["Margin Requirements"] = new_margin_requirements + + return universe_dict + + +def dataframe_from_universe_dict(universe_dict): + timestamps = [] + outer_frames = [] + for timestamp, hl_ticker_dict in universe_dict.iteritems(): + timestamps.append(timestamp) + + inner_frames = [] + hl_tickers = [] + for hl_ticker, low_level_ticker_dict in hl_ticker_dict.iteritems(): + hl_tickers.append(hl_ticker) + inner_frames.append(DataFrame.from_dict(low_level_ticker_dict, orient='index')) + hl_ticker_frame = pd.concat(inner_frames, keys=hl_tickers) + outer_frames.append(hl_ticker_frame) + + universe_df = pd.concat(outer_frames, keys=timestamps) + return universe_df + +""" +A small set of dummy futures data will have this structure: + +{Timestamp('2013-05-13 07:45:49+0000', tz='UTC'): + {'YG': + {'F15': + {'Price': 180.00, + 'Open Index': 1000, + }, + 'N16': + {'Price': 250.75, + 'Open Index': 2000, + }, + }, + 'XSN': + {'F15': + {'Price': 360.00, + 'Open Index': 4682, + }, + 'N16': + {'Price': 405.75, + 'Open Index': 4001, + }, + }, + }, + Timestamp('2013-05-13 08:45:49+0000', tz='UTC'): + {'YG': + {'F15': + {'Price': 195.66, + 'Open Index': 996, + }, + 'N16': + {'Price': 266.75, + 'Open Index': 2003, + }, + }, + 'XSN': + {'F15': + {'Price': 358.08, + 'Open Index': 5000, + }, + 'N16': + {'Price': 402.75, + 'Open Index': 4002, + }, + }, + }, +} +""" \ No newline at end of file diff --git a/alephnull/finance/blotter.py b/alephnull/finance/blotter.py index c573a4d..eb8fc2b 100644 --- a/alephnull/finance/blotter.py +++ b/alephnull/finance/blotter.py @@ -300,9 +300,7 @@ def process_trade(self, trade_event): orders = self.open_orders[sid] orders = sorted(orders, key=lambda o: o.dt) # Only use orders for the current day or before - current_orders = filter( - lambda o: o.dt <= trade_event.dt, - orders) + current_orders = [o for o in orders if o.dt <= trade_event.dt] else: return diff --git a/alephnull/finance/blotter.py.bak b/alephnull/finance/blotter.py.bak new file mode 100644 index 0000000..c573a4d --- /dev/null +++ b/alephnull/finance/blotter.py.bak @@ -0,0 +1,455 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import math +import uuid +from copy import copy +from collections import defaultdict + +from logbook import Logger +import numpy as np + +import alephnull.errors +import alephnull.protocol as zp +from alephnull.finance.slippage import ( + VolumeShareSlippage, + transact_partial, + check_order_triggers + ) +from alephnull.finance.commission import PerShare +import alephnull.utils.math_utils as zp_math + + +log = Logger('Blotter') + +from alephnull.utils.protocol_utils import Enum + +ORDER_STATUS = Enum( + 'OPEN', + 'FILLED', + 'CANCELLED' +) + + +# On an order to buy, between .05 below to .95 above a penny, use that penny. +# On an order to sell, between .05 above to .95 below a penny, use that penny. +# buy: [.0095, .0195) -> round to .01, sell: (.0005, .0105] -> round to .01 +def round_for_minimum_price_variation(x, is_buy, diff=(0.0095 - .005)): + # relies on rounding half away from zero, unlike numpy's bankers' rounding + rounded = round(x - (diff if is_buy else -diff), 2) + if zp_math.tolerant_equals(rounded, 0.0): + return 0.0 + return rounded + + +class Blotter(object): + def __init__(self): + self.transact = transact_partial(VolumeShareSlippage(), PerShare()) + # these orders are aggregated by sid + self.open_orders = defaultdict(list) + # keep a dict of orders by their own id + self.orders = {} + # holding orders that have come in since the last + # event. + self.new_orders = [] + self.current_dt = None + self.max_shares = int(1e+11) + + def __repr__(self): + return """ +{class_name}( + transact_partial={transact_partial}, + open_orders={open_orders}, + orders={orders}, + new_orders={new_orders}, + current_dt={current_dt}) +""".strip().format(class_name=self.__class__.__name__, + transact_partial=self.transact.args, + open_orders=self.open_orders, + orders=self.orders, + new_orders=self.new_orders, + current_dt=self.current_dt) + + def set_date(self, dt): + self.current_dt = dt + + def order(self, sid, amount, limit_price, stop_price, order_id=None): + + # something could be done with amount to further divide + # between buy by share count OR buy shares up to a dollar amount + # numeric == share count AND "$dollar.cents" == cost amount + + """ + amount > 0 :: Buy/Cover + amount < 0 :: Sell/Short + Market order: order(sid, amount) + Limit order: order(sid, amount, limit_price) + Stop order: order(sid, amount, None, stop_price) + StopLimit order: order(sid, amount, limit_price, stop_price) + """ + + if type(sid) is tuple: + whole_sid = sid + contract = sid[1] + sid = sid[0] + else: + whole_sid = sid + + # Fractional shares are not supported. + amount = int(amount) + + + # just validates amount and passes rest on to TransactionSimulator + # Tell the user if they try to buy 0 shares of something. + if amount == 0: + zero_message = "Requested to trade zero shares of {psid}".format( + psid=sid + ) + log.debug(zero_message) + # Don't bother placing orders for 0 shares. + return + elif amount > self.max_shares: + # Arbitrary limit of 100 billion (US) shares will never be + # exceeded except by a buggy algorithm. + raise OverflowError("Can't order more than %d shares" % + self.max_shares) + + if limit_price: + limit_price = round_for_minimum_price_variation(limit_price, + amount > 0) + + order = Order( + dt=self.current_dt, + sid=sid, + amount=amount, + filled=0, + stop=stop_price, + limit=limit_price, + id=order_id + ) + + if 'contract' in locals(): + order.contract = contract + + # initialized filled field. + order.filled = 0 + self.open_orders[whole_sid].append(order) + self.orders[order.id] = order + self.new_orders.append(order) + + return order.id + + def order_value(self, sid, value, last_price, + limit_price=None, stop_price=None): + """ + Place an order by desired value rather than desired number of shares. + If the requested sid is found in the universe, the requested value is + divided by its price to imply the number of shares to transact. + + value > 0 :: Buy/Cover + value < 0 :: Sell/Short + Market order: order(sid, value) + Limit order: order(sid, value, limit_price) + Stop order: order(sid, value, None, stop_price) + StopLimit order: order(sid, value, limit_price, stop_price) + """ + if np.allclose(last_price, 0): + zero_message = "Price of 0 for {psid}; can't infer value".format( + psid=sid + ) + log.debug(zero_message) + # Don't place any order + return + else: + amount = value / last_price + return self.order(sid, amount, limit_price, stop_price) + + def cancel(self, order_id): + if order_id not in self.orders: + return + + cur_order = self.orders[order_id] + if cur_order.open: + order_list = self.open_orders[cur_order.sid] + if cur_order in order_list: + order_list.remove(cur_order) + + if cur_order in self.new_orders: + self.new_orders.remove(cur_order) + cur_order.status = ORDER_STATUS.CANCELLED + cur_order.dt = self.current_dt + # we want this order's new status to be relayed out + # along with newly placed orders. + self.new_orders.append(cur_order) + + def process_split(self, split_event): + if split_event.sid not in self.open_orders: + return + + orders_to_modify = self.open_orders[split_event.sid] + for order in orders_to_modify: + order.handle_split(split_event) + + def update_account(self, portfolio): + self.portfolio = portfolio + + def handle_leverage(self, txn, order): + leverage_err = 'INSUFFICIENT CAPITAL\n' \ + 'requested to transact ${} of {}, with ${} available \n' \ + 'filled {} of {} shares, outstanding {} shares cancelled' + + #if this offsets an existing position return + + if self.portfolio.positions[order.sid].amount + txn.amount != 0: + #test to see if this is a short position + + if txn.amount + self.portfolio.positions[order.sid].amount < 0: + if order.direction < 0: + if abs(txn.price * (txn.amount + order.filled)) * .5 > self.portfolio.portfolio_value: + log.info(leverage_err.format( + txn.price * (txn.amount + order.filled), + order.sid, self.portfolio.portfolio_value / .5, + order.filled, order.amount, + order.amount - order.filled)) + order.direction *= -1 + self.cancel(order.id) + txn.amount = -0 + txn.commission = 0 + + + #test to see if this is a long position + if txn.amount + self.portfolio.positions[order.sid].amount > 0: + if order.direction > 0: + if txn.price * (txn.amount + order.filled) > self.portfolio.cash: + log.info(leverage_err.format( + txn.price * (txn.amount + order.filled), + order.sid, self.portfolio.cash, + order.filled, order.amount, + order.amount - order.filled)) + self.cancel(order.id) + txn.amount = 0 + txn.commission = 0 + + def futures_handle_leverage(self, txn, order): + leverage_err = 'INSUFFICIENT CAPITAL\n' \ + 'requested to transact ${} of {}, with ${} available \n' \ + 'filled {} of {} shares, outstanding {} shares cancelled' + + #if this offsets an existing position return + + sid = (order.sid, order.contract) + + if self.portfolio.positions[sid].amount + txn.amount != 0: + #test to see if this is a short position + + if txn.amount + self.portfolio.positions[sid].amount < 0: + if order.direction < 0: + if abs(txn.price * (txn.amount + order.filled)) * .5 > self.portfolio.portfolio_value: + log.info(leverage_err.format( + txn.price * (txn.amount + order.filled), + order.sid, self.portfolio.portfolio_value / .5, + order.filled, order.amount, + order.amount - order.filled)) + order.direction *= -1 + self.cancel(order.id) + txn.amount = -0 + txn.commission = 0 + + #test to see if this is a long position + if txn.amount + self.portfolio.positions[sid].amount > 0: + if order.direction > 0: + if txn.price * (txn.amount + order.filled) > self.portfolio.cash: + log.info(leverage_err.format( + txn.price * (txn.amount + order.filled), + order.sid, self.portfolio.cash, + order.filled, order.amount, + order.amount - order.filled)) + self.cancel(order.id) + txn.amount = 0 + txn.commission = 0 + + def process_trade(self, trade_event): + + if trade_event.type != zp.DATASOURCE_TYPE.TRADE: + return + + if zp_math.tolerant_equals(trade_event.volume, 0): + # there are zero volume trade_events bc some stocks trade + # less frequently than once per minute. + return + + if 'contract' in trade_event.__dict__: + sid = (trade_event.sid, trade_event.contract) + else: + sid = trade_event.sid + + if sid in self.open_orders: + orders = self.open_orders[sid] + orders = sorted(orders, key=lambda o: o.dt) + # Only use orders for the current day or before + current_orders = filter( + lambda o: o.dt <= trade_event.dt, + orders) + else: + return + + for order, txn in self.transact(trade_event, current_orders): + if txn.type == zp.DATASOURCE_TYPE.COMMISSION: + yield txn, order + continue + + if 'contract' in order.__dict__: + self.futures_handle_leverage(txn, order) + else: + self.handle_leverage(txn, order) + + transaction_cost = txn.amount * txn.price + self.portfolio.cash -= transaction_cost + self.portfolio.positions_value += transaction_cost + self.portfolio.portfolio_value = self.portfolio.cash + self.portfolio.positions_value + + # if txn.amount == 0: + # raise alephnull.errors.TransactionWithNoAmount(txn=txn) + + if math.copysign(1, txn.amount) != order.direction: + raise alephnull.errors.TransactionWithWrongDirection( + txn=txn, order=order) + if abs(txn.amount) > abs(self.orders[txn.order_id].amount): + raise alephnull.errors.TransactionVolumeExceedsOrder( + txn=txn, order=order) + + order.filled += txn.amount + # mark the date of the order to match the transaction + # that is filling it. + order.dt = txn.dt + + yield txn, order + + # update the open orders for the trade_event's sid + self.open_orders[sid] = \ + [order for order + in self.open_orders[sid] + if order.open] + + +class Order(object) : + def __init__(self, dt, sid, amount, stop=None, limit=None, filled=0, id=None, contract=None): + """ + @dt - datetime.datetime that the order was placed + @sid - stock sid of the order + @amount - the number of shares to buy/sell + a positive sign indicates a buy + a negative sign indicates a sell + @filled - how many shares of the order have been filled so far + """ + + if contract is not None: + self.contract = contract + # get a string representation of the uuid. + self.id = id or self.make_id() + self.dt = dt + self.created = dt + self.sid = sid + self.amount = amount + self.filled = filled + self.status = ORDER_STATUS.OPEN + self.stop = stop + self.limit = limit + self.stop_reached = False + self.limit_reached = False + self.direction = math.copysign(1, self.amount) + self.type = zp.DATASOURCE_TYPE.ORDER + + + def make_id(self): + return uuid.uuid4().hex + + def to_dict(self): + py = copy(self.__dict__) + for field in ['type', 'direction']: + del py[field] + return py + + def to_api_obj(self): + pydict = self.to_dict() + obj = zp.Order(initial_values=pydict) + return obj + + def check_triggers(self, event): + """ + Update internal state based on price triggers and the + trade event's price. + """ + stop_reached, limit_reached = \ + check_order_triggers(self, event) + if (stop_reached, limit_reached) \ + != (self.stop_reached, self.limit_reached): + self.dt = event.dt + self.stop_reached = stop_reached + self.limit_reached = limit_reached + + def handle_split(self, split_event): + ratio = split_event.ratio + + # update the amount, limit_price, and stop_price + # by the split's ratio + + # info here: http://finra.complinet.com/en/display/display_plain.html? + # rbid=2403&element_id=8950&record_id=12208&print=1 + + # new_share_amount = old_share_amount / ratio + # new_price = old_price * ratio + + self.amount = int(self.amount / ratio) + + if self.limit: + self.limit = round(self.limit * ratio, 2) + + if self.stop: + self.stop = round(self.stop * ratio, 2) + + @property + def open(self): + if self.status == ORDER_STATUS.CANCELLED: + return False + + remainder = self.amount - self.filled + if remainder != 0: + self.status = ORDER_STATUS.OPEN + else: + self.status = ORDER_STATUS.FILLED + + return self.status == ORDER_STATUS.OPEN + + @property + def triggered(self): + """ + For a market order, True. + For a stop order, True IFF stop_reached. + For a limit order, True IFF limit_reached. + For a stop-limit order, True IFF (stop_reached AND limit_reached) + """ + if self.stop and not self.stop_reached: + return False + + if self.limit and not self.limit_reached: + return False + + return True + + @property + def open_amount(self): + return self.amount - self.filled diff --git a/alephnull/finance/performance/futures_period.py b/alephnull/finance/performance/futures_period.py index 07517cd..4688335 100644 --- a/alephnull/finance/performance/futures_period.py +++ b/alephnull/finance/performance/futures_period.py @@ -1,4 +1,4 @@ -from __future__ import division + import math from collections import OrderedDict, defaultdict @@ -97,7 +97,7 @@ def update_dividends(self, todays_date): payment has been disbursed. """ cash_payments = 0.0 - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): cash_payments += pos.update_dividends(todays_date) # credit our cash balance with the dividend payments, or @@ -206,7 +206,7 @@ def round_to_nearest(self, x, base=5): return int(base * round(float(x) / base)) def calculate_positions_value(self): - multipliers = [get_multiplier(symbol) for symbol in self._position_amounts.keys()] + multipliers = [get_multiplier(symbol) for symbol in list(self._position_amounts.keys())] result = 0 for amount, price, multiplier in zip(self._position_amounts, self._position_last_sale_prices, multipliers): result += amount * price * multiplier @@ -271,7 +271,7 @@ def to_dict(self, dt=None): else: transactions = \ [y.to_dict() - for x in self.processed_transactions.itervalues() + for x in self.processed_transactions.values() for y in x] rval['transactions'] = transactions @@ -279,9 +279,9 @@ def to_dict(self, dt=None): if dt: # only include orders modified as of the given dt. orders = [x.to_dict() - for x in self.orders_by_modified[dt].itervalues()] + for x in self.orders_by_modified[dt].values()] else: - orders = [x.to_dict() for x in self.orders_by_id.itervalues()] + orders = [x.to_dict() for x in self.orders_by_id.values()] rval['orders'] = orders return rval @@ -315,7 +315,7 @@ def as_portfolio(self): def get_positions(self): positions = self._positions_store - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): if sid not in positions: if type(sid) is tuple: @@ -331,7 +331,7 @@ def get_positions(self): def get_positions_list(self): positions = [] - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): if pos.amount != 0: positions.append(pos.to_dict()) return positions \ No newline at end of file diff --git a/alephnull/finance/performance/futures_period.py.bak b/alephnull/finance/performance/futures_period.py.bak new file mode 100644 index 0000000..07517cd --- /dev/null +++ b/alephnull/finance/performance/futures_period.py.bak @@ -0,0 +1,337 @@ +from __future__ import division +import math +from collections import OrderedDict, defaultdict + +import logbook +import numpy as np +import pandas as pd + +import alephnull.protocol as zp +from .position import positiondict + + +try: + from alephtools.connection import get_multiplier +except: + #Replace this with source to multiplier + get_multiplier = lambda x: 25 + +log = logbook.Logger('Performance') + + +class FuturesPerformancePeriod(object): + def __init__( + self, + starting_cash, + period_open=None, + period_close=None, + keep_transactions=True, + keep_orders=False, + serialize_positions=True): + + # * # + self.starting_mav = starting_cash + self.ending_mav = starting_cash + self.cash_adjustment = 0 + self.ending_total_value = 0.0 + self.pnl = 0.0 + # ** # + + self.period_open = period_open + self.period_close = period_close + + # sid => position object + self.positions = positiondict() + # rollover initializes a number of self's attributes: + self.rollover() + self.keep_transactions = keep_transactions + self.keep_orders = keep_orders + + # Arrays for quick calculations of positions value + self._position_amounts = pd.Series() + self._position_last_sale_prices = pd.Series() + + self.calculate_performance() + + # An object to recycle via assigning new values + # when returning portfolio information. + # So as not to avoid creating a new object for each event + self._portfolio_store = zp.Portfolio() + self._positions_store = zp.Positions() + self.serialize_positions = serialize_positions + + def rollover(self): + # * # + self.starting_mav = self.ending_mav + self.cash_adjustment = 0 + self.pnl = 0.0 + # ** # + + self.processed_transactions = defaultdict(list) + self.orders_by_modified = defaultdict(OrderedDict) + self.orders_by_id = OrderedDict() + self.cumulative_capital_used = 0.0 + self.max_capital_used = 0.0 + self.max_leverage = 0.0 + + def ensure_position_index(self, sid): + try: + _ = self._position_amounts[sid] + _ = self._position_last_sale_prices[sid] + except (KeyError, IndexError): + self._position_amounts = \ + self._position_amounts.append(pd.Series({sid: 0.0})) + self._position_last_sale_prices = \ + self._position_last_sale_prices.append(pd.Series({sid: 0.0})) + + def add_dividend(self, div): + pass + + def handle_split(self, split): + pass + + def update_dividends(self, todays_date): + """ + Check the payment date and ex date against today's date + to determine if we are owed a dividend payment or if the + payment has been disbursed. + """ + cash_payments = 0.0 + for sid, pos in self.positions.iteritems(): + cash_payments += pos.update_dividends(todays_date) + + # credit our cash balance with the dividend payments, or + # if we are short, debit our cash balance with the + # payments. + # debit our cumulative cash spent with the dividend + # payments, or credit our cumulative cash spent if we are + # short the stock. + self.handle_cash_payment(cash_payments) + + # recalculate performance, including the dividend + # payments + self.calculate_performance() + + def handle_cash_payment(self, payment_amount): + self.adjust_cash(payment_amount) + + def handle_commission(self, commission): + # Deduct from our total cash pool. + self.adjust_cash(-commission.cost) + # Adjust the cost basis of the stock if we own it + if commission.sid in self.positions: + self.positions[commission.sid]. \ + adjust_commission_cost_basis(commission) + + def adjust_cash(self, amount): + # * # + self.cash_adjustment += amount + # ** # + self.cumulative_capital_used -= amount + + def calculate_performance(self): + old_total_value = self.ending_total_value + old_mav = self.ending_mav + self.ending_total_value = self.calculate_positions_value() + total_value_difference = self.ending_total_value - old_total_value + self.ending_mav = old_mav + total_value_difference + self.cash_adjustment + self.cash_adjustment = 0 + + self.pnl = self.ending_mav - self.starting_mav + + if self.starting_mav != 0: + self.returns = self.pnl / self.starting_mav + else: + self.returns = 0.0 + + def record_order(self, order): + if self.keep_orders: + dt_orders = self.orders_by_modified[order.dt] + if order.id in dt_orders: + del dt_orders[order.id] + dt_orders[order.id] = order + # to preserve the order of the orders by modified date + # we delete and add back. (ordered dictionary is sorted by + # first insertion date). + if order.id in self.orders_by_id: + del self.orders_by_id[order.id] + self.orders_by_id[order.id] = order + + def execute_transaction(self, txn): + # Update Position + # ---------------- + if 'contract' in txn.__dict__: + sid = (txn.sid, txn.contract) + else: + sid = txn.sid + + position = self.positions[sid] + + position.update(txn) + self.ensure_position_index(sid) + self._position_amounts[sid] = position.amount + + # Max Leverage + # --------------- + # Calculate the maximum capital used and maximum leverage + transaction_cost = txn.price * txn.amount + self.cumulative_capital_used += transaction_cost + + # * # + # now we update ending_mav and ending_total_value such that the performance tracker doesn't think we + # profited when in fact we just entered another position. + # how? just put a negative balance into cash_adjustment equal to the value of the position entered + self.cash_adjustment -= txn.price * txn.amount * get_multiplier(sid) + + if math.fabs(self.cumulative_capital_used) > self.max_capital_used: + self.max_capital_used = math.fabs(self.cumulative_capital_used) + + # We want to conveye a level, rather than a precise figure. + # round to the nearest 5,000 to keep the number easy on the eyes + self.max_capital_used = self.round_to_nearest( + self.max_capital_used, + base=5000 + ) + + # we're adding a 10% cushion to the capital used. + self.max_leverage = 1.1 * \ + self.max_capital_used / self.starting_mav + + # add transaction to the list of processed transactions + if self.keep_transactions: + self.processed_transactions[txn.dt].append(txn) + + + def round_to_nearest(self, x, base=5): + return int(base * round(float(x) / base)) + + def calculate_positions_value(self): + multipliers = [get_multiplier(symbol) for symbol in self._position_amounts.keys()] + result = 0 + for amount, price, multiplier in zip(self._position_amounts, self._position_last_sale_prices, multipliers): + result += amount * price * multiplier + return result + + def update_last_sale(self, event): + if 'contract' in event: + sid = (event.sid, event.contract) + else: + sid = event.sid + + is_trade = event.type == zp.DATASOURCE_TYPE.TRADE + has_price = not np.isnan(event.price) + # isnan check will keep the last price if its not present + + if sid in self.positions and is_trade and has_price: + self.positions[sid].last_sale_price = event.price + self.ensure_position_index(sid) + self._position_last_sale_prices[sid] = event.price + self.positions[sid].last_sale_date = event.dt + + def __core_dict(self): + rval = { + 'ending_value': self.ending_total_value, + # this field is renamed to capital_used for backward + # compatibility. + 'capital_used': self.starting_mav, + 'starting_cash': self.starting_mav, + 'ending_cash': self.ending_mav, + 'portfolio_value': self.ending_mav, + 'cumulative_capital_used': self.cumulative_capital_used, + 'max_capital_used': self.max_capital_used, + 'max_leverage': self.max_leverage, + 'pnl': self.pnl, + 'returns': self.returns, + 'period_open': self.period_open, + 'period_close': self.period_close, + } + + return rval + + def to_dict(self, dt=None): + """ + Creates a dictionary representing the state of this performance + period. See header comments for a detailed description. + + Kwargs: + dt (datetime): If present, only return transactions for the dt. + """ + rval = self.__core_dict() + + if self.serialize_positions: + positions = self.get_positions_list() + rval['positions'] = positions + + # we want the key to be absent, not just empty + if self.keep_transactions: + if dt: + # Only include transactions for given dt + transactions = [x.to_dict() + for x in self.processed_transactions[dt]] + else: + transactions = \ + [y.to_dict() + for x in self.processed_transactions.itervalues() + for y in x] + rval['transactions'] = transactions + + if self.keep_orders: + if dt: + # only include orders modified as of the given dt. + orders = [x.to_dict() + for x in self.orders_by_modified[dt].itervalues()] + else: + orders = [x.to_dict() for x in self.orders_by_id.itervalues()] + rval['orders'] = orders + + return rval + + def as_portfolio(self): + """ + The purpose of this method is to provide a portfolio + object to algorithms running inside the same trading + client. The data needed is captured raw in a + PerformancePeriod, and in this method we rename some + fields for usability and remove extraneous fields. + """ + # Recycles containing objects' Portfolio object + # which is used for returning values. + # as_portfolio is called in an inner loop, + # so repeated object creation becomes too expensive + portfolio = self._portfolio_store + # maintaining the old name for the portfolio field for + # backward compatibility + portfolio.capital_used = self.starting_mav + portfolio.starting_cash = self.starting_mav + portfolio.portfolio_value = self.ending_mav + portfolio.pnl = self.pnl + portfolio.returns = self.returns + portfolio.cash = self.ending_mav + portfolio.start_date = self.period_open + portfolio.positions = self.get_positions() + portfolio.positions_value = self.ending_total_value + return portfolio + + def get_positions(self): + positions = self._positions_store + + for sid, pos in self.positions.iteritems(): + + if sid not in positions: + if type(sid) is tuple: + positions[sid] = zp.Position(sid[0], contract=sid[1]) + else: + positions[sid] = zp.Position(sid) + position = positions[sid] + position.amount = pos.amount + position.cost_basis = pos.cost_basis + position.last_sale_price = pos.last_sale_price + + return positions + + def get_positions_list(self): + positions = [] + for sid, pos in self.positions.iteritems(): + if pos.amount != 0: + positions.append(pos.to_dict()) + return positions \ No newline at end of file diff --git a/alephnull/finance/performance/period.py b/alephnull/finance/performance/period.py index 260fffb..9bf6598 100644 --- a/alephnull/finance/performance/period.py +++ b/alephnull/finance/performance/period.py @@ -72,7 +72,7 @@ """ -from __future__ import division + import math from collections import OrderedDict, defaultdict @@ -171,7 +171,7 @@ def update_dividends(self, todays_date): payment has been disbursed. """ cash_payments = 0.0 - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): cash_payments += pos.update_dividends(todays_date) # credit our cash balance with the dividend payments, or @@ -353,7 +353,7 @@ def to_dict(self, dt=None): else: transactions = \ [y.to_dict() - for x in self.processed_transactions.itervalues() + for x in self.processed_transactions.values() for y in x] rval['transactions'] = transactions @@ -361,9 +361,9 @@ def to_dict(self, dt=None): if dt: # only include orders modified as of the given dt. orders = [x.to_dict() - for x in self.orders_by_modified[dt].itervalues()] + for x in self.orders_by_modified[dt].values()] else: - orders = [x.to_dict() for x in self.orders_by_id.itervalues()] + orders = [x.to_dict() for x in self.orders_by_id.values()] rval['orders'] = orders return rval @@ -397,7 +397,7 @@ def as_portfolio(self): def get_positions(self): positions = self._positions_store - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): if sid not in positions: if type(sid) is tuple: @@ -413,7 +413,7 @@ def get_positions(self): def get_positions_list(self): positions = [] - for sid, pos in self.positions.iteritems(): + for sid, pos in self.positions.items(): if pos.amount != 0: positions.append(pos.to_dict()) return positions diff --git a/alephnull/finance/performance/period.py.bak b/alephnull/finance/performance/period.py.bak new file mode 100644 index 0000000..260fffb --- /dev/null +++ b/alephnull/finance/performance/period.py.bak @@ -0,0 +1,654 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" + +Performance Period +================== + +Performance Periods are updated with every trade. When calling +code needs a portfolio object that fulfills the algorithm +protocol, use the PerformancePeriod.as_portfolio method. See that +method for comments on the specific fields provided (and +omitted). + + +---------------+------------------------------------------------------+ + | key | value | + +===============+======================================================+ + | ending_value | the total market value of the positions held at the | + | | end of the period | + +---------------+------------------------------------------------------+ + | cash_flow | the cash flow in the period (negative means spent) | + | | from buying and selling securities in the period. | + | | Includes dividend payments in the period as well. | + +---------------+------------------------------------------------------+ + | starting_value| the total market value of the positions held at the | + | | start of the period | + +---------------+------------------------------------------------------+ + | starting_cash | cash on hand at the beginning of the period | + +---------------+------------------------------------------------------+ + | ending_cash | cash on hand at the end of the period | + +---------------+------------------------------------------------------+ + | positions | a list of dicts representing positions, see | + | | :py:meth:`Position.to_dict()` | + | | for details on the contents of the dict | + +---------------+------------------------------------------------------+ + | pnl | Dollar value profit and loss, for both realized and | + | | unrealized gains. | + +---------------+------------------------------------------------------+ + | returns | percentage returns for the entire portfolio over the | + | | period | + +---------------+------------------------------------------------------+ + | cumulative\ | The net capital used (positive is spent) during | + | _capital_used | the period | + +---------------+------------------------------------------------------+ + | max_capital\ | The maximum amount of capital deployed during the | + | _used | period. | + +---------------+------------------------------------------------------+ + | max_leverage | The maximum leverage used during the period. | + +---------------+------------------------------------------------------+ + | period_close | The last close of the market in period. datetime in | + | | pytz.utc timezone. | + +---------------+------------------------------------------------------+ + | period_open | The first open of the market in period. datetime in | + | | pytz.utc timezone. | + +---------------+------------------------------------------------------+ + | transactions | all the transactions that were acrued during this | + | | period. Unset/missing for cumulative periods. | + +---------------+------------------------------------------------------+ + + +""" + +from __future__ import division +import math +from collections import OrderedDict, defaultdict + +import logbook +import numpy as np +import pandas as pd + +import alephnull.protocol as zp +from . position import positiondict + + +log = logbook.Logger('Performance') + + +class PerformancePeriod(object): + + def __init__( + self, + starting_cash, + period_open=None, + period_close=None, + keep_transactions=True, + keep_orders=False, + serialize_positions=True): + + self.period_open = period_open + self.period_close = period_close + + self.ending_value = 0.0 + self.period_cash_flow = 0.0 + self.pnl = 0.0 + # sid => position object + self.positions = positiondict() + self.ending_cash = starting_cash + # rollover initializes a number of self's attributes: + self.rollover() + self.keep_transactions = keep_transactions + self.keep_orders = keep_orders + + # Arrays for quick calculations of positions value + self._position_amounts = pd.Series() + self._position_last_sale_prices = pd.Series() + + self.calculate_performance() + + # An object to recycle via assigning new values + # when returning portfolio information. + # So as not to avoid creating a new object for each event + self._portfolio_store = zp.Portfolio() + self._positions_store = zp.Positions() + self.serialize_positions = serialize_positions + + def rollover(self): + self.starting_value = self.ending_value + self.starting_cash = self.ending_cash + self.period_cash_flow = 0.0 + self.pnl = 0.0 + self.processed_transactions = defaultdict(list) + self.orders_by_modified = defaultdict(OrderedDict) + self.orders_by_id = OrderedDict() + self.cumulative_capital_used = 0.0 + self.max_capital_used = 0.0 + self.max_leverage = 0.0 + + def ensure_position_index(self, sid): + try: + self._position_amounts[sid] + self._position_last_sale_prices[sid] + except (KeyError, IndexError): + self._position_amounts = \ + self._position_amounts.append(pd.Series({sid: 0.0})) + self._position_last_sale_prices = \ + self._position_last_sale_prices.append(pd.Series({sid: 0.0})) + + def add_dividend(self, div): + # The dividend is received on midnight of the dividend + # declared date. We calculate the dividends based on the amount of + # stock owned on midnight of the ex dividend date. However, the cash + # is not dispersed until the payment date, which is + # included in the event. + self.positions[div.sid].add_dividend(div) + + def handle_split(self, split): + if split.sid in self.positions: + # Make the position object handle the split. It returns the + # leftover cash from a fractional share, if there is any. + leftover_cash = self.positions[split.sid].handle_split(split) + + if leftover_cash > 0: + self.handle_cash_payment(leftover_cash) + + def update_dividends(self, todays_date): + """ + Check the payment date and ex date against today's date + to determine if we are owed a dividend payment or if the + payment has been disbursed. + """ + cash_payments = 0.0 + for sid, pos in self.positions.iteritems(): + cash_payments += pos.update_dividends(todays_date) + + # credit our cash balance with the dividend payments, or + # if we are short, debit our cash balance with the + # payments. + # debit our cumulative cash spent with the dividend + # payments, or credit our cumulative cash spent if we are + # short the stock. + self.handle_cash_payment(cash_payments) + + # recalculate performance, including the dividend + # payments + self.calculate_performance() + + def handle_cash_payment(self, payment_amount): + self.adjust_cash(payment_amount) + + def handle_commission(self, commission): + # Deduct from our total cash pool. + self.adjust_cash(-commission.cost) + # Adjust the cost basis of the stock if we own it + if commission.sid in self.positions: + self.positions[commission.sid].\ + adjust_commission_cost_basis(commission) + + def adjust_cash(self, amount): + self.period_cash_flow += amount + self.cumulative_capital_used -= amount + + def calculate_performance(self): + self.ending_value = self.calculate_positions_value() + + total_at_start = self.starting_cash + self.starting_value + self.ending_cash = self.starting_cash + self.period_cash_flow + total_at_end = self.ending_cash + self.ending_value + + self.pnl = total_at_end - total_at_start + if total_at_start != 0: + self.returns = self.pnl / total_at_start + else: + self.returns = 0.0 + + def record_order(self, order): + if self.keep_orders: + dt_orders = self.orders_by_modified[order.dt] + if order.id in dt_orders: + del dt_orders[order.id] + dt_orders[order.id] = order + # to preserve the order of the orders by modified date + # we delete and add back. (ordered dictionary is sorted by + # first insertion date). + if order.id in self.orders_by_id: + del self.orders_by_id[order.id] + self.orders_by_id[order.id] = order + + def update_position(self, sid, contract=None, amount=None, last_sale_price=None, + last_sale_date=None, cost_basis=None): + pos = self.positions[sid] + self.ensure_position_index(sid) + + if contract is not None: + pos.contract = contract + if amount is not None: + pos.amount = amount + self._position_amounts[sid] = amount + if last_sale_price is not None: + pos.last_sale_price = last_sale_price + self._position_last_sale_prices[sid] = last_sale_price + if last_sale_date is not None: + pos.last_sale_date = last_sale_date + if cost_basis is not None: + pos.cost_basis = cost_basis + + def execute_transaction(self, txn): + # Update Position + # ---------------- + if 'contract' in txn.__dict__: + sid = (txn.sid, txn.contract) + else: + sid = txn.sid + + position = self.positions[sid] + + position.update(txn) + self.ensure_position_index(sid) + self._position_amounts[sid] = position.amount + + self.period_cash_flow -= txn.price * txn.amount + + # Max Leverage + # --------------- + # Calculate the maximum capital used and maximum leverage + transaction_cost = txn.price * txn.amount + self.cumulative_capital_used += transaction_cost + + if math.fabs(self.cumulative_capital_used) > self.max_capital_used: + self.max_capital_used = math.fabs(self.cumulative_capital_used) + + # We want to convey a level, rather than a precise figure. + # round to the nearest 5,000 to keep the number easy on the eyes + self.max_capital_used = self.round_to_nearest( + self.max_capital_used, + base=5000 + ) + + # we're adding a 10% cushion to the capital used. + self.max_leverage = 1.1 * \ + self.max_capital_used / self.starting_cash + + # add transaction to the list of processed transactions + if self.keep_transactions: + self.processed_transactions[txn.dt].append(txn) + + def round_to_nearest(self, x, base=5): + return int(base * round(float(x) / base)) + + def calculate_positions_value(self): + return np.dot(self._position_amounts, self._position_last_sale_prices) + + def update_last_sale(self, event): + if 'contract' in event: + sid = (event.sid, event.contract) + else: + sid = event.sid + + is_trade = event.type == zp.DATASOURCE_TYPE.TRADE + has_price = not np.isnan(event.price) + # isnan check will keep the last price if its not present + + is_contract_tracked = sid in self.positions + + if is_contract_tracked and is_trade and has_price: + self.positions[sid].last_sale_price = event.price + self.ensure_position_index(sid) + self._position_last_sale_prices[sid] = event.price + self.positions[sid].last_sale_date = event.dt + + def __core_dict(self): + rval = { + 'ending_value': self.ending_value, + # this field is renamed to capital_used for backward + # compatibility. + 'capital_used': self.period_cash_flow, + 'starting_value': self.starting_value, + 'starting_cash': self.starting_cash, + 'ending_cash': self.ending_cash, + 'portfolio_value': self.ending_cash + self.ending_value, + 'cumulative_capital_used': self.cumulative_capital_used, + 'max_capital_used': self.max_capital_used, + 'max_leverage': self.max_leverage, + 'pnl': self.pnl, + 'returns': self.returns, + 'period_open': self.period_open, + 'period_close': self.period_close + } + + return rval + + def to_dict(self, dt=None): + """ + Creates a dictionary representing the state of this performance + period. See header comments for a detailed description. + + Kwargs: + dt (datetime): If present, only return transactions for the dt. + """ + rval = self.__core_dict() + + if self.serialize_positions: + positions = self.get_positions_list() + rval['positions'] = positions + + # we want the key to be absent, not just empty + if self.keep_transactions: + if dt: + # Only include transactions for given dt + transactions = [x.to_dict() + for x in self.processed_transactions[dt]] + else: + transactions = \ + [y.to_dict() + for x in self.processed_transactions.itervalues() + for y in x] + rval['transactions'] = transactions + + if self.keep_orders: + if dt: + # only include orders modified as of the given dt. + orders = [x.to_dict() + for x in self.orders_by_modified[dt].itervalues()] + else: + orders = [x.to_dict() for x in self.orders_by_id.itervalues()] + rval['orders'] = orders + + return rval + + def as_portfolio(self): + """ + The purpose of this method is to provide a portfolio + object to algorithms running inside the same trading + client. The data needed is captured raw in a + PerformancePeriod, and in this method we rename some + fields for usability and remove extraneous fields. + """ + # Recycles containing objects' Portfolio object + # which is used for returning values. + # as_portfolio is called in an inner loop, + # so repeated object creation becomes too expensive + portfolio = self._portfolio_store + # maintaining the old name for the portfolio field for + # backward compatibility + portfolio.capital_used = self.period_cash_flow + portfolio.starting_cash = self.starting_cash + portfolio.portfolio_value = self.ending_cash + self.ending_value + portfolio.pnl = self.pnl + portfolio.returns = self.returns + portfolio.cash = self.ending_cash + portfolio.start_date = self.period_open + portfolio.positions = self.get_positions() + portfolio.positions_value = self.ending_value + return portfolio + + def get_positions(self): + positions = self._positions_store + + for sid, pos in self.positions.iteritems(): + + if sid not in positions: + if type(sid) is tuple: + positions[sid] = zp.Position(sid[0], contract=sid[1]) + else: + positions[sid] = zp.Position(sid) + position = positions[sid] + position.amount = pos.amount + position.cost_basis = pos.cost_basis + position.last_sale_price = pos.last_sale_price + + return positions + + def get_positions_list(self): + positions = [] + for sid, pos in self.positions.iteritems(): + if pos.amount != 0: + positions.append(pos.to_dict()) + return positions + + +"""class FuturesPerformancePeriod(object): + "We need to replicate: + * calculate_performance + * execute_transaction + * record_order + * update_last_sale + " + def __init__( + self, + starting_cash, + period_open=None, + period_close=None, + keep_transactions=True, + keep_orders=False, + serialize_positions=True): + self.backing_period = PerformancePeriod(starting_cash, period_open, period_close, keep_transactions, + keep_orders, serialize_positions) + + self.margin_account_value = starting_cash + self.owned_positions = {} # will have a format like {("GS", "N10"): {amount: 100, last_price: 0.25} + self.margin_history = {} # format like {Timestamp(...): 400.30} + + self.contract_multiplier = 100 + self.maintenance_margin_rate = 0.20 + self.initial_margin_rate = 0.30 + + self.contract_details = {} # set externally if at all + self.margin_data = {} # set externally if at all + self.margin_call = self.scale_back_positions # can be set to another function externally + self.gameover = False + + self.algo = None + + def get_initial_margin(self, sid, timestamp, contract_value): + return self.get_margin("initial", sid, timestamp, contract_value) + + def get_maintenance_margin(self, sid, timestamp, contract_value): + return self.get_margin("maintenance", sid, timestamp, contract_value) + + def get_margin(self, margin_type, sid, timestamp, contract_value): + # provides initial margin for a sid, basing it on the latest_price if there is no data available. + multiplier = {"initial": 0.25, "maintenance": 0.2}[margin_type] + + # the structure of self.margin_data is like so: + # self.margin_data["initial"]["GS"]["N14"][Timestamp] == 300.03 + # where the final dict in the nesting is a TimeSeries + if (margin_type in self.margin_data and + sid[0] in self.margin_data[margin_type] and + sid[1] in self.margin_data[margin_type][sid[0]]): + series = self.margin_data[margin_type][sid[0]][sid[1]] + previous_data = series[:timestamp] + if previous_data: + return previous_data[-1] + return contract_value * multiplier + + + def unit_multiplier(self, currency): + "Returns a number C such that given_price / C = value of item in dollars. + Another way of figuring is that 1 USD = C other currencies + Exchange rates are estimated based on the time this is programmed and are thus in no way accurate. + We're not really dealing with currencies so I don't much mind. + The ones that matter are mainly dollars and cents (C's of 1 and 100 of course) + + Defaults to 1 if we don't know what to do." + + return {'$': 1, + '$/GAL': 1, + '$/GRAM': 1, + '$/MBTU': 1, + '$/MWH': 1, + '$/TON': 1, + 'AU$': 1.12, + 'CD$': 1.07, + 'CHF': 0.91, + 'CZK': 20.18, + 'HUF': 220.32, + 'NOK': 6.17, + 'NZD': 1.21, + 'SEK': 6.51, + 'TRY': 2.17, + u'\xa3': 0.61, # Pound + u'\xa5': 104.49, # Yen + u'\xf3': 100, # Cents + u'\u20ac': 0.73, # Euro + }.get(currency, 1) + + def dollars_from_currency(self, price, unit): + return price / self.unit_multiplier(unit) + + def get_multiplier(self, sid): + # multiplier = contract_size * quoted_unit / $ + # i.e. what do I multiply price by to get the value of a single contract + + fallback = 1000 + + contract_size = self.contract_details.get(sid[0], {}).get('contract_size', str(fallback) + " UNITS") + quoted_unit = self.contract_details.get(sid[0], {}).get('quoted_unit', "$") + + def matches(pattern, text): + result = re.match(pattern, text) + if result is not None: + return result.group() == text + else: + return False + + # case 1: some number with units like "1,000 TONS" or "42,000 GAL" and a quoted unit that is simply "$" + # what "$" means is "$/UNIT", whether UNIT be GAL or LITERS or whatever. + + straight_currencies = {'$', 'AU$', 'CD$', 'CHF', 'CZK', 'HUF', 'NOK', 'NZD', 'SEK', 'TRY', + u'\xa3', # Pound + u'\xa5', # Yen + u'\xf3', # Cents + u'\u20ac', # Euro + } + + is_standard_quoted_unit = quoted_unit in straight_currencies or matches("\$\/.+", quoted_unit) + is_standard_contract_size = matches("[0-9,\\.]+ [A-Za-z\\. \$]+", contract_size) + + is_standard_pointwise_contract_size = matches("\$?[\\.0-9,]+[ ]+(X[ ]+INDEX|TIMES INDEX VALUE)", contract_size) + + if quoted_unit == 'PTS.' and is_standard_pointwise_contract_size: + result = "" + # remove currency + for n, ch in enumerate(contract_size): + if ch in '0123456789': + result = contract_size[n:] + break + result = result.replace(" ", "").replace("XINDEX", "").replace("TIMESINDEXVALUE", "") + return result + elif is_standard_quoted_unit and is_standard_contract_size: + chunks = [x for x in contract_size.split(" ") if x] + quantity = self.dollars_from_currency( + float(chunks[0].replace(",", "")), + quoted_unit) + return quantity + else: + return fallback + + def get_first_notice(self, sid): + # returns a Timestamp representing midnight at the day of first delivery + expiration = self.contract_details.get(sid[0], {}).get('contracts', {}).get(sid[1], {}).get('expiration_date') + if expiration is not None: + return Timestamp(expiration, tz='UTC') - timedelta(days=5) + else: + delivery_months = "FGHJKMNQUVXZ" + contract_delivery_month = delivery_months.find(sid[1][0]) + 1 + contract_delivery_year = int("20" + str(sid[1][1:])) + return Timestamp(contract_delivery_year + "-" + contract_delivery_month + "-01") - timedelta(days=3) + + + def record_order(self, order): + # self.owned_positions[order.sid] = order.amount + + self.backing_period.record_order(order) + + def execute_transaction(self, txn): + if self.gameover: + self.margin_history[txn.dt] = self.margin_account_value + return + + margin_for_new_txn = self.get_initial_margin(txn.sid, txn.dt, + txn.price * self.get_multiplier(txn.sid)) * txn.amount + + if txn.sid in self.owned_positions: + self.recalculate_margin_from_price_change(txn.sid, txn.price - txn.commission) + + if margin_for_new_txn <= self.margin_account_value - self.calculate_maintenance_margin(txn.dt): + self.owned_positions[txn.sid]['amount'] += txn.amount + self.margin_account_value -= txn.commission * txn.amount + else: # buying the first units of a contract + if margin_for_new_txn <= self.margin_account_value - self.calculate_maintenance_margin(txn.dt): + self.owned_positions[txn.sid] = {'amount': txn.amount, 'last_price': txn.price} + + def calculate_maintenance_margin(self, timestamp): + "Uses the owned_positions dictionary to calculate the minimum a margin account must meet in order for + new transactions to take place." + + maintenance_margin = 0 + for sid, position in self.owned_positions.iteritems(): + maintenance_margin += self.get_maintenance_margin(sid, timestamp, + self.get_multiplier(sid) * position['last_price']) * position['amount'] + return maintenance_margin + + def update_last_sale(self, event): + if self.gameover: + self.margin_history[event.dt] = self.margin_account_value + return + + if event.sid in self.owned_positions: + self.recalculate_margin_from_price_change(event.sid, event.price) + if self.calculate_maintenance_margin(event.dt) > self.margin_account_value: + self.margin_call(event.dt) + + self.margin_history[event.dt] = self.margin_account_value + + def recalculate_margin_from_price_change(self, sid, new_price): + "Adjusts the margin account value to compensate with a change in price of an already-owned contract" + last_price = self.owned_positions[sid]['last_price'] + amount = self.owned_positions[sid]['amount'] + + delta = self.get_multiplier(sid) * (new_price - last_price) * amount + self.margin_account_value += delta + self.owned_positions[sid]['last_price'] = new_price + + # margin call logic + if self.margin_account_value <= 0: + self.gameover = True + + def scale_back_positions(self, timestamp): + # A default option for margin calls where it goes through positions alphabetically exits those positions until + # the maintenance margin is below the margin account value + + # we know that + # self.calculate_maintenance_margin() > self.margin_account_value + + positions = list(reversed(sorted(self.owned_positions.items()))) + while self.calculate_maintenance_margin(timestamp) > self.margin_account_value: + shortfall = self.calculate_maintenance_margin(timestamp) - self.margin_account_value + position = positions.pop() + sid = position[0] + details = position[1] + + contract_value = self.get_multiplier(sid) * details['last_price'] + margin_per_contract = self.get_maintenance_margin(sid, timestamp, contract_value) + contracts_to_exit_amount = int(shortfall / margin_per_contract) + 1 + if contracts_to_exit_amount >= details['amount']: + del self.owned_positions[sid] + else: + self.owned_positions[sid] -= contracts_to_exit_amount + + + def __getattr__(self, name): + return getattr(self.backing_period, name)""" \ No newline at end of file diff --git a/alephnull/finance/performance/position.py b/alephnull/finance/performance/position.py index e4a9a74..c853932 100644 --- a/alephnull/finance/performance/position.py +++ b/alephnull/finance/performance/position.py @@ -33,7 +33,7 @@ """ -from __future__ import division + import math import logbook diff --git a/alephnull/finance/performance/position.py.bak b/alephnull/finance/performance/position.py.bak new file mode 100644 index 0000000..e4a9a74 --- /dev/null +++ b/alephnull/finance/performance/position.py.bak @@ -0,0 +1,219 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Position Tracking +================= + + +-----------------+----------------------------------------------------+ + | key | value | + +=================+====================================================+ + | sid | the identifier for the security held in this | + | | position. | + +-----------------+----------------------------------------------------+ + | amount | whole number of shares in the position | + +-----------------+----------------------------------------------------+ + | last_sale_price | price at last sale of the security on the exchange | + +-----------------+----------------------------------------------------+ + | cost_basis | the volume weighted average price paid per share | + +-----------------+----------------------------------------------------+ + +""" + +from __future__ import division +import math + +import logbook + + +log = logbook.Logger('Performance') + + +class Position(object): + def __init__(self, sid, amount=0, cost_basis=0.0, + last_sale_price=0.0, last_sale_date=0.0, + dividends=None, contract=None): + self.sid = sid + if contract is not None: + self.contract = contract + self.amount = amount + self.cost_basis = cost_basis # per share + self.last_sale_price = last_sale_price + self.last_sale_date = last_sale_date + self.dividends = dividends or [] + + def update_dividends(self, midnight_utc): + """ + midnight_utc is the 0 hour for the current (not yet open) trading day. + This method will be invoked at the end of the market + close handling, before the next market open. + """ + payment = 0.0 + unpaid_dividends = [] + for dividend in self.dividends: + if midnight_utc == dividend.ex_date: + # if we own shares at midnight of the div_ex date + # we are entitled to the dividend. + dividend.amount_on_ex_date = self.amount + if dividend.net_amount: + dividend.payment = self.amount * dividend.net_amount + else: + dividend.payment = self.amount * dividend.gross_amount + + if midnight_utc == dividend.pay_date: + # if it is the payment date, include this + # dividend's actual payment (calculated on + # ex_date) + payment += dividend.payment + else: + unpaid_dividends.append(dividend) + + self.dividends = unpaid_dividends + return payment + + def add_dividend(self, dividend): + self.dividends.append(dividend) + + # Update the position by the split ratio, and return the + # resulting fractional share that will be converted into cash. + + # Returns the unused cash. + def handle_split(self, split): + if (self.sid != split.sid): + raise NameError("updating split with the wrong sid!") + + ratio = split.ratio + + log.info("handling split for sid = " + str(split.sid) + + ", ratio = " + str(split.ratio)) + log.info("before split: " + str(self)) + + # adjust the # of shares by the ratio + # (if we had 100 shares, and the ratio is 3, + # we now have 33 shares) + # (old_share_count / ratio = new_share_count) + # (old_price * ratio = new_price) + + # ie, 33.333 + raw_share_count = self.amount / float(ratio) + + # ie, 33 + full_share_count = math.floor(raw_share_count) + + # ie, 0.333 + fractional_share_count = raw_share_count - full_share_count + + # adjust the cost basis to the nearest cent, ie, 60.0 + new_cost_basis = round(self.cost_basis * ratio, 2) + + # adjust the last sale price + new_last_sale_price = round(self.last_sale_price * ratio, 2) + + self.cost_basis = new_cost_basis + self.last_sale_price = new_last_sale_price + self.amount = full_share_count + + return_cash = round(float(fractional_share_count * new_cost_basis), 2) + + log.info("after split: " + str(self)) + log.info("returning cash: " + str(return_cash)) + + # return the leftover cash, which will be converted into cash + # (rounded to the nearest cent) + return return_cash + + def update(self, txn): + if self.sid != txn.sid: + raise Exception('updating position with txn for a ' + 'different sid') + + # we're covering a short or closing a position + if self.amount + txn.amount == 0: + self.cost_basis = 0.0 + self.amount = 0 + else: + prev_cost = self.cost_basis * self.amount + txn_cost = txn.amount * txn.price + total_cost = prev_cost + txn_cost + total_shares = self.amount + txn.amount + self.cost_basis = total_cost / total_shares + self.amount = total_shares + + def adjust_commission_cost_basis(self, commission): + """ + A note about cost-basis in zipline: all positions are considered + to share a cost basis, even if they were executed in different + transactions with different commission costs, different prices, etc. + + Due to limitations about how zipline handles positions, zipline will + currently spread an externally-delivered commission charge across + all shares in a position. + """ + + if commission.sid != self.sid: + raise Exception('Updating a commission for a different sid?') + if commission.cost == 0.0: + return + + # If we no longer hold this position, there is no cost basis to + # adjust. + if self.amount == 0: + return + + prev_cost = self.cost_basis * self.amount + new_cost = prev_cost + commission.cost + self.cost_basis = new_cost / self.amount + + def __repr__(self): + template = "sid: {sid}, amount: {amount}, cost_basis: {cost_basis}, \ +last_sale_price: {last_sale_price}" + return template.format( + sid=self.sid, + amount=self.amount, + cost_basis=self.cost_basis, + last_sale_price=self.last_sale_price + ) + + def to_dict(self): + """ + Creates a dictionary representing the state of this position. + Returns a dict object of the form: + """ + if hasattr(self, 'contract'): + return { + 'sid': self.sid, + 'contract': self.contract, + 'amount': self.amount, + 'cost_basis': self.cost_basis, + 'last_sale_price': self.last_sale_price + } + return { + 'sid': self.sid, + 'amount': self.amount, + 'cost_basis': self.cost_basis, + 'last_sale_price': self.last_sale_price + } + + +class positiondict(dict): + + def __missing__(self, key): + if type(key) is tuple: + pos = Position(key[0], contract=key[1]) + else: + pos = Position(key) + self[key] = pos + return pos diff --git a/alephnull/finance/performance/tracker.py b/alephnull/finance/performance/tracker.py index d8d95f3..5db9614 100644 --- a/alephnull/finance/performance/tracker.py +++ b/alephnull/finance/performance/tracker.py @@ -57,7 +57,7 @@ """ -from __future__ import division + import logbook import pandas as pd diff --git a/alephnull/finance/performance/tracker.py.bak b/alephnull/finance/performance/tracker.py.bak new file mode 100644 index 0000000..d8d95f3 --- /dev/null +++ b/alephnull/finance/performance/tracker.py.bak @@ -0,0 +1,402 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" + +Performance Tracking +==================== + + +-----------------+----------------------------------------------------+ + | key | value | + +=================+====================================================+ + | period_start | The beginning of the period to be tracked. datetime| + | | in pytz.utc timezone. Will always be 0:00 on the | + | | date in UTC. The fact that the time may be on the | + | | prior day in the exchange's local time is ignored | + +-----------------+----------------------------------------------------+ + | period_end | The end of the period to be tracked. datetime | + | | in pytz.utc timezone. Will always be 23:59 on the | + | | date in UTC. The fact that the time may be on the | + | | next day in the exchange's local time is ignored | + +-----------------+----------------------------------------------------+ + | progress | percentage of test completed | + +-----------------+----------------------------------------------------+ + | capital_base | The initial capital assumed for this tracker. | + +-----------------+----------------------------------------------------+ + | cumulative_perf | A dictionary representing the cumulative | + | | performance through all the events delivered to | + | | this tracker. For details see the comments on | + | | :py:meth:`PerformancePeriod.to_dict` | + +-----------------+----------------------------------------------------+ + | todays_perf | A dictionary representing the cumulative | + | | performance through all the events delivered to | + | | this tracker with datetime stamps between last_open| + | | and last_close. For details see the comments on | + | | :py:meth:`PerformancePeriod.to_dict` | + | | TODO: adding this because we calculate it. May be | + | | overkill. | + +-----------------+----------------------------------------------------+ + | cumulative_risk | A dictionary representing the risk metrics | + | _metrics | calculated based on the positions aggregated | + | | through all the events delivered to this tracker. | + | | For details look at the comments for | + | | :py:meth:`zipline.finance.risk.RiskMetrics.to_dict`| + +-----------------+----------------------------------------------------+ + +""" + +from __future__ import division +import logbook + +import pandas as pd +from pandas.tseries.tools import normalize_date + +import alephnull.protocol as zp +import alephnull.finance.risk as risk +from alephnull.finance import trading +from . period import PerformancePeriod +from . futures_period import FuturesPerformancePeriod + +log = logbook.Logger('Performance') + + +class BasePerformanceTracker(object): + """ + Tracks the performance of the algorithm. + """ + + def __init__(self, sim_params, perf_tracker_class): + + self.sim_params = sim_params + self.perf_tracker_class = perf_tracker_class + + self.period_start = self.sim_params.period_start + self.period_end = self.sim_params.period_end + self.last_close = self.sim_params.last_close + first_day = self.sim_params.first_open + self.market_open, self.market_close = \ + trading.environment.get_open_and_close(first_day) + self.total_days = self.sim_params.days_in_period + self.capital_base = self.sim_params.capital_base + self.emission_rate = sim_params.emission_rate + + all_trading_days = trading.environment.trading_days + mask = ((all_trading_days >= normalize_date(self.period_start)) & + (all_trading_days <= normalize_date(self.period_end))) + + self.trading_days = all_trading_days[mask] + + self.perf_periods = [] + + if self.emission_rate == 'daily': + self.all_benchmark_returns = pd.Series( + index=self.trading_days) + self.intraday_risk_metrics = None + self.cumulative_risk_metrics = \ + risk.RiskMetricsCumulative(self.sim_params) + + elif self.emission_rate == 'minute': + self.all_benchmark_returns = pd.Series(index=pd.date_range( + self.sim_params.first_open, self.sim_params.last_close, + freq='Min')) + self.intraday_risk_metrics = \ + risk.RiskMetricsCumulative(self.sim_params) + + self.cumulative_risk_metrics = \ + risk.RiskMetricsCumulative(self.sim_params, + returns_frequency='daily', + create_first_day_stats=True) + + self.minute_performance = self.perf_tracker_class( + # initial cash is your capital base. + self.capital_base, + # the cumulative period will be calculated over the + # entire test. + self.period_start, + self.period_end, + # don't save the transactions for the cumulative + # period + keep_transactions=False, + keep_orders=False, + # don't serialize positions for cumualtive period + serialize_positions=False + ) + self.perf_periods.append(self.minute_performance) + + # this performance period will span the entire simulation from + # inception. + self.cumulative_performance = self.perf_tracker_class( + # initial cash is your capital base. + self.capital_base, + # the cumulative period will be calculated over the entire test. + self.period_start, + self.period_end, + # don't save the transactions for the cumulative + # period + keep_transactions=False, + keep_orders=False, + # don't serialize positions for cumualtive period + serialize_positions=False + ) + self.perf_periods.append(self.cumulative_performance) + + # this performance period will span just the current market day + self.todays_performance = self.perf_tracker_class( + # initial cash is your capital base. + self.capital_base, + # the daily period will be calculated for the market day + self.market_open, + self.market_close, + keep_transactions=True, + keep_orders=True, + serialize_positions=True + ) + self.perf_periods.append(self.todays_performance) + + self.saved_dt = self.period_start + self.returns = pd.Series(index=self.trading_days) + # one indexed so that we reach 100% + self.day_count = 0.0 + self.txn_count = 0 + self.event_count = 0 + + def __repr__(self): + return "%s(%r)" % ( + self.__class__.__name__, + {'simulation parameters': self.sim_params}) + + @property + def progress(self): + if self.emission_rate == 'minute': + # Fake a value + return 1.0 + elif self.emission_rate == 'daily': + return self.day_count / self.total_days + + def set_date(self, date): + if self.emission_rate == 'minute': + self.saved_dt = date + self.todays_performance.period_close = self.saved_dt + + def update_performance(self): + # calculate performance as of last trade + for perf_period in self.perf_periods: + perf_period.calculate_performance() + + def get_portfolio(self): + self.update_performance() + return self.cumulative_performance.as_portfolio() + + def to_dict(self, emission_type=None): + """ + Creates a dictionary representing the state of this tracker. + Returns a dict object of the form described in header comments. + """ + if not emission_type: + emission_type = self.emission_rate + _dict = { + 'period_start': self.period_start, + 'period_end': self.period_end, + 'capital_base': self.capital_base, + 'cumulative_perf': self.cumulative_performance.to_dict(), + 'progress': self.progress, + 'cumulative_risk_metrics': self.cumulative_risk_metrics.to_dict() + } + if emission_type == 'daily': + _dict.update({'daily_perf': self.todays_performance.to_dict()}) + elif emission_type == 'minute': + _dict.update({ + 'intraday_risk_metrics': self.intraday_risk_metrics.to_dict(), + 'minute_perf': self.todays_performance.to_dict(self.saved_dt) + }) + + return _dict + + def process_event(self, event): + self.event_count += 1 + + if event.type == zp.DATASOURCE_TYPE.TRADE: + # update last sale + for perf_period in self.perf_periods: + perf_period.update_last_sale(event) + + elif event.type == zp.DATASOURCE_TYPE.TRANSACTION: + # Trade simulation always follows a transaction with the + # TRADE event that was used to simulate it, so we don't + # check for end of day rollover messages here. + self.txn_count += 1 + for perf_period in self.perf_periods: + perf_period.execute_transaction(event) + + elif event.type == zp.DATASOURCE_TYPE.DIVIDEND: + for perf_period in self.perf_periods: + perf_period.add_dividend(event) + + elif event.type == zp.DATASOURCE_TYPE.SPLIT: + for perf_period in self.perf_periods: + perf_period.handle_split(event) + + elif event.type == zp.DATASOURCE_TYPE.ORDER: + for perf_period in self.perf_periods: + perf_period.record_order(event) + + elif event.type == zp.DATASOURCE_TYPE.COMMISSION: + for perf_period in self.perf_periods: + perf_period.handle_commission(event) + + elif event.type == zp.DATASOURCE_TYPE.CUSTOM: + pass + elif event.type == zp.DATASOURCE_TYPE.BENCHMARK: + if ( + self.sim_params.data_frequency == 'minute' + and + self.sim_params.emission_rate == 'daily' + ): + # Minute data benchmarks should have a timestamp of market + # close, so that calculations are triggered at the right time. + # However, risk module uses midnight as the 'day' + # marker for returns, so adjust back to midgnight. + midnight = event.dt.replace( + hour=0, + minute=0, + second=0, + microsecond=0) + else: + midnight = event.dt + + self.all_benchmark_returns[midnight] = event.returns + + def handle_minute_close(self, dt): + self.update_performance() + todays_date = normalize_date(dt) + + minute_returns = self.minute_performance.returns + self.minute_performance.rollover() + # the intraday risk is calculated on top of minute performance + # returns for the bench and the algo + self.intraday_risk_metrics.update(dt, + minute_returns, + self.all_benchmark_returns[dt]) + + bench_since_open = \ + self.intraday_risk_metrics.benchmark_period_returns[dt] + + # if we've reached market close, check on dividends + if dt == self.market_close: + for perf_period in self.perf_periods: + perf_period.update_dividends(todays_date) + + self.cumulative_risk_metrics.update(todays_date, + self.todays_performance.returns, + bench_since_open) + + # if this is the close, save the returns objects for cumulative + # risk calculations + if dt == self.market_close: + self.returns[todays_date] = self.todays_performance.returns + + def handle_intraday_close(self): + # update_performance should have been called in handle_minute_close + # so it is not repeated here. + self.intraday_risk_metrics = \ + risk.RiskMetricsCumulative(self.sim_params) + # increment the day counter before we move markers forward. + self.day_count += 1.0 + # move the market day markers forward + if self.market_close < trading.environment.last_trading_day: + self.market_open, self.market_close = \ + trading.environment.next_open_and_close(self.market_open) + else: + self.market_close = self.sim_params.last_close + + def handle_market_close(self): + self.update_performance() + # add the return results from today to the returns series + todays_date = normalize_date(self.market_close) + self.cumulative_performance.update_dividends(todays_date) + self.todays_performance.update_dividends(todays_date) + + self.returns[todays_date] = self.todays_performance.returns + + # update risk metrics for cumulative performance + self.cumulative_risk_metrics.update( + todays_date, + self.todays_performance.returns, + self.all_benchmark_returns[todays_date]) + + # increment the day counter before we move markers forward. + self.day_count += 1.0 + + # Take a snapshot of our current performance to return to the + # browser. + daily_update = self.to_dict() + + # On the last day of the test, don't create tomorrow's performance + # period. We may not be able to find the next trading day if we're + # at the end of our historical data + if self.market_close >= self.last_close: + return daily_update + + # move the market day markers forward + self.market_open, self.market_close = \ + trading.environment.next_open_and_close(self.market_open) + + # Roll over positions to current day. + self.todays_performance.rollover() + self.todays_performance.period_open = self.market_open + self.todays_performance.period_close = self.market_close + + # The dividend calculation for the daily needs to be made + # after the rollover. midnight_between is the last midnight + # hour between the close of markets and the next open. To + # make sure midnight_between matches identically with + # dividend data dates, it is in UTC. + midnight_between = self.market_open.replace(hour=0, minute=0, second=0, + microsecond=0) + self.cumulative_performance.update_dividends(midnight_between) + self.todays_performance.update_dividends(midnight_between) + + return daily_update + + def handle_simulation_end(self): + """ + When the simulation is complete, run the full period risk report + and send it out on the results socket. + """ + + log_msg = "Simulated {n} trading days out of {m}." + log.info(log_msg.format(n=int(self.day_count), m=self.total_days)) + log.info("first open: {d}".format( + d=self.sim_params.first_open)) + log.info("last close: {d}".format( + d=self.sim_params.last_close)) + + bms = self.cumulative_risk_metrics.benchmark_returns + ars = self.cumulative_risk_metrics.algorithm_returns + self.risk_report = risk.RiskReport( + ars, + self.sim_params, + benchmark_returns=bms) + + risk_dict = self.risk_report.to_dict() + return risk_dict + + +def PerformanceTracker(sim_params): + return BasePerformanceTracker(sim_params, PerformancePeriod) + + +def FuturesPerformanceTracker(sim_params): + return BasePerformanceTracker(sim_params, FuturesPerformancePeriod) \ No newline at end of file diff --git a/alephnull/finance/risk/cumulative.py b/alephnull/finance/risk/cumulative.py index 78a6a6a..7002958 100644 --- a/alephnull/finance/risk/cumulative.py +++ b/alephnull/finance/risk/cumulative.py @@ -359,7 +359,7 @@ def to_dict(self): return {k: None if check_entry(k, v) - else v for k, v in rval.iteritems()} + else v for k, v in rval.items()} def __repr__(self): statements = [] diff --git a/alephnull/finance/risk/cumulative.py.bak b/alephnull/finance/risk/cumulative.py.bak new file mode 100644 index 0000000..78a6a6a --- /dev/null +++ b/alephnull/finance/risk/cumulative.py.bak @@ -0,0 +1,497 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import logbook +import math +import numpy as np + +from alephnull.finance import trading +import alephnull.utils.math_utils as zp_math + +import pandas as pd +from pandas.tseries.tools import normalize_date + +from . risk import ( + alpha, + check_entry, + choose_treasury, +) + +log = logbook.Logger('Risk Cumulative') + + +choose_treasury = functools.partial(choose_treasury, lambda *args: '10year', + compound=False) + + +def sharpe_ratio(algorithm_volatility, annualized_return, treasury_return): + """ + http://en.wikipedia.org/wiki/Sharpe_ratio + + Args: + algorithm_volatility (float): Algorithm volatility. + algorithm_return (float): Algorithm return percentage. + treasury_return (float): Treasury return percentage. + + Returns: + float. The Sharpe ratio. + """ + if zp_math.tolerant_equals(algorithm_volatility, 0): + return np.nan + + return ( + (annualized_return - treasury_return) + # The square of the annualization factor is in the volatility, + # because the volatility is also annualized, + # i.e. the sqrt(annual factor) is in the volatility's numerator. + # So to have the the correct annualization factor for the + # Sharpe value's numerator, which should be the sqrt(annual factor). + # The square of the sqrt of the annual factor, i.e. the annual factor + # itself, is needed in the numerator to factor out the division by + # its square root. + / algorithm_volatility) + + +def sortino_ratio(annualized_algorithm_return, treasury_return, downside_risk): + """ + http://en.wikipedia.org/wiki/Sortino_ratio + + Args: + algorithm_returns (np.array-like): + Returns from algorithm lifetime. + algorithm_period_return (float): + Algorithm return percentage from latest period. + mar (float): Minimum acceptable return. + + Returns: + float. The Sortino ratio. + """ + if np.isnan(downside_risk) or zp_math.tolerant_equals(downside_risk, 0): + return 0.0 + + return (annualized_algorithm_return - treasury_return) / downside_risk + + +def information_ratio(algo_volatility, algorithm_return, benchmark_return): + """ + http://en.wikipedia.org/wiki/Information_ratio + + Args: + algorithm_returns (np.array-like): + All returns during algorithm lifetime. + benchmark_returns (np.array-like): + All benchmark returns during algo lifetime. + + Returns: + float. Information ratio. + """ + if zp_math.tolerant_equals(algo_volatility, 0): + return np.nan + + return ( + (algorithm_return - benchmark_return) + # The square of the annualization factor is in the volatility, + # because the volatility is also annualized, + # i.e. the sqrt(annual factor) is in the volatility's numerator. + # So to have the the correct annualization factor for the + # Sharpe value's numerator, which should be the sqrt(annual factor). + # The square of the sqrt of the annual factor, i.e. the annual factor + # itself, is needed in the numerator to factor out the division by + # its square root. + / algo_volatility) + + +class RiskMetricsCumulative(object): + """ + :Usage: + Instantiate RiskMetricsCumulative once. + Call update() method on each dt to update the metrics. + """ + + METRIC_NAMES = ( + 'alpha', + 'beta', + 'sharpe', + 'algorithm_volatility', + 'benchmark_volatility', + 'downside_risk', + 'sortino', + 'information', + ) + + def __init__(self, sim_params, + returns_frequency=None, + create_first_day_stats=False): + """ + - @returns_frequency allows for configuration of the whether + the benchmark and algorithm returns are in units of minutes or days, + if `None` defaults to the `emission_rate` in `sim_params`. + """ + + self.treasury_curves = trading.environment.treasury_curves + self.start_date = sim_params.period_start.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + self.end_date = sim_params.period_end.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + self.trading_days = trading.environment.days_in_range( + self.start_date, + self.end_date) + + last_day = normalize_date(sim_params.period_end) + if last_day not in self.trading_days: + last_day = pd.tseries.index.DatetimeIndex( + [last_day] + ) + self.trading_days = self.trading_days.append(last_day) + + self.sim_params = sim_params + + self.create_first_day_stats = create_first_day_stats + + if returns_frequency is None: + returns_frequency = self.sim_params.emission_rate + + self.returns_frequency = returns_frequency + + if returns_frequency == 'daily': + cont_index = self.get_daily_index() + elif returns_frequency == 'minute': + cont_index = self.get_minute_index(sim_params) + + self.cont_index = cont_index + + self.algorithm_returns_cont = pd.Series(index=cont_index) + self.benchmark_returns_cont = pd.Series(index=cont_index) + + # The returns at a given time are read and reset from the respective + # returns container. + self.algorithm_returns = None + self.benchmark_returns = None + self.mean_returns = None + self.annualized_mean_returns = None + self.mean_benchmark_returns = None + self.annualized_benchmark_returns = None + + self.compounded_log_returns = pd.Series(index=cont_index) + self.algorithm_period_returns = pd.Series(index=cont_index) + self.benchmark_period_returns = pd.Series(index=cont_index) + self.excess_returns = pd.Series(index=cont_index) + + self.latest_dt = cont_index[0] + + self.metrics = pd.DataFrame(index=cont_index, + columns=self.METRIC_NAMES) + + self.max_drawdown = 0 + self.current_max = -np.inf + self.daily_treasury = pd.Series(index=self.trading_days) + + def get_minute_index(self, sim_params): + """ + Stitches together multiple days worth of business minutes into + one continous index. + """ + trading_minutes = None + for day in self.trading_days: + minutes_for_day = trading.environment.market_minutes_for_day(day) + if trading_minutes is None: + # Create container for all minutes on first iteration + trading_minutes = minutes_for_day + else: + trading_minutes = trading_minutes + minutes_for_day + return trading_minutes + + def get_daily_index(self): + return self.trading_days + + def update(self, dt, algorithm_returns, benchmark_returns): + # Keep track of latest dt for use in to_dict and other methods + # that report current state. + self.latest_dt = dt + + self.algorithm_returns_cont[dt] = algorithm_returns + self.algorithm_returns = self.algorithm_returns_cont.valid() + + if self.create_first_day_stats: + if len(self.algorithm_returns) == 1: + self.algorithm_returns = pd.Series( + {'null return': 0.0}).append(self.algorithm_returns) + + self.mean_returns = pd.rolling_mean(self.algorithm_returns, + window=len(self.algorithm_returns), + min_periods=1) + + self.annualized_mean_returns = self.mean_returns * 252 + + self.benchmark_returns_cont[dt] = benchmark_returns + self.benchmark_returns = self.benchmark_returns_cont.valid() + + self.mean_benchmark_returns = pd.rolling_mean( + self.benchmark_returns, + window=len(self.benchmark_returns), + min_periods=1) + + self.annualized_benchmark_returns = self.mean_benchmark_returns * 252 + + if self.create_first_day_stats: + if len(self.benchmark_returns) == 1: + self.benchmark_returns = pd.Series( + {'null return': 0.0}).append(self.benchmark_returns) + + self.mean_benchmark_returns = pd.rolling_mean( + self.benchmark_returns, + window=len(self.benchmark_returns), + min_periods=1) + + self.annualized_benchmark_returns = self.mean_benchmark_returns * 252 + + self.num_trading_days = len(self.algorithm_returns) + + self.update_compounded_log_returns() + + self.algorithm_period_returns[dt] = \ + self.calculate_period_returns(self.algorithm_returns) + self.benchmark_period_returns[dt] = \ + self.calculate_period_returns(self.benchmark_returns) + + if not self.algorithm_returns.index.equals( + self.benchmark_returns.index + ): + message = "Mismatch between benchmark_returns ({bm_count}) and \ +algorithm_returns ({algo_count}) in range {start} : {end} on {dt}" + message = message.format( + bm_count=len(self.benchmark_returns), + algo_count=len(self.algorithm_returns), + start=self.start_date, + end=self.end_date, + dt=dt + ) + raise Exception(message) + + self.update_current_max() + self.metrics.benchmark_volatility[dt] = \ + self.calculate_volatility(self.benchmark_returns) + self.metrics.algorithm_volatility[dt] = \ + self.calculate_volatility(self.algorithm_returns) + + # caching the treasury rates for the minutely case is a + # big speedup, because it avoids searching the treasury + # curves on every minute. + # In both minutely and daily, the daily curve is always used. + treasury_end = dt.replace(hour=0, minute=0) + if np.isnan(self.daily_treasury[treasury_end]): + treasury_period_return = choose_treasury( + self.treasury_curves, + self.start_date, + treasury_end + ) + self.daily_treasury[treasury_end] =\ + treasury_period_return + self.treasury_period_return = \ + self.daily_treasury[treasury_end] + self.excess_returns[self.latest_dt] = ( + self.algorithm_period_returns[self.latest_dt] + - + self.treasury_period_return) + self.metrics.beta[dt] = self.calculate_beta() + self.metrics.alpha[dt] = self.calculate_alpha(dt) + self.metrics.sharpe[dt] = self.calculate_sharpe() + self.metrics.downside_risk[dt] = self.calculate_downside_risk() + self.metrics.sortino[dt] = self.calculate_sortino() + self.metrics.information[dt] = self.calculate_information() + self.max_drawdown = self.calculate_max_drawdown() + + if self.create_first_day_stats: + # Remove placeholder 0 return + if 'null return' in self.algorithm_returns: + self.algorithm_returns = self.algorithm_returns.drop( + 'null return') + self.algorithm_returns.index = pd.to_datetime( + self.algorithm_returns.index) + if 'null return' in self.benchmark_returns: + self.benchmark_returns = self.benchmark_returns.drop( + 'null return') + self.benchmark_returns.index = pd.to_datetime( + self.benchmark_returns.index) + + def to_dict(self): + """ + Creates a dictionary representing the state of the risk report. + Returns a dict object of the form: + """ + dt = self.latest_dt + period_label = dt.strftime("%Y-%m") + rval = { + 'trading_days': len(self.algorithm_returns.valid()), + 'benchmark_volatility': + self.metrics.benchmark_volatility[dt], + 'algo_volatility': + self.metrics.algorithm_volatility[dt], + 'treasury_period_return': self.treasury_period_return, + 'algorithm_period_return': self.algorithm_period_returns[dt], + 'benchmark_period_return': self.benchmark_period_returns[dt], + 'beta': self.metrics.beta[dt], + 'alpha': self.metrics.alpha[dt], + 'excess_return': self.excess_returns[dt], + 'max_drawdown': self.max_drawdown, + 'period_label': period_label + } + + rval['sharpe'] = self.metrics.sharpe[dt] + rval['sortino'] = self.metrics.sortino[dt] + rval['information'] = self.metrics.information[dt] + + return {k: None + if check_entry(k, v) + else v for k, v in rval.iteritems()} + + def __repr__(self): + statements = [] + metrics = [ + "algorithm_period_returns", + "benchmark_period_returns", + "excess_returns", + "trading_days", + "benchmark_volatility", + "algorithm_volatility", + "sharpe", + "sortino", + "information", + "beta", + "alpha", + "max_drawdown", + "algorithm_returns", + "benchmark_returns", + ] + + for metric in metrics: + value = getattr(self, metric) + if isinstance(value, list): + if len(value) == 0: + value = np.nan + else: + value = value[-1] + statements.append("{m}:{v}".format(m=metric, v=value)) + + return '\n'.join(statements) + + def update_compounded_log_returns(self): + if len(self.algorithm_returns) == 0: + return + + try: + compound = math.log(1 + self.algorithm_returns[ + self.algorithm_returns.last_valid_index()]) + except ValueError: + compound = 0.0 + # BUG? Shouldn't this be set to log(1.0 + 0) ? + + if np.isnan(self.compounded_log_returns[self.latest_dt]): + self.compounded_log_returns[self.latest_dt] = compound + else: + self.compounded_log_returns[self.latest_dt] = \ + self.compounded_log_returns[self.latest_dt] + compound + + def calculate_period_returns(self, returns): + return (1. + returns).prod() - 1 + + def update_current_max(self): + if len(self.compounded_log_returns) == 0: + return + if self.current_max < self.compounded_log_returns[self.latest_dt]: + self.current_max = self.compounded_log_returns[self.latest_dt] + + def calculate_max_drawdown(self): + if len(self.compounded_log_returns) == 0: + return self.max_drawdown + + cur_drawdown = 1.0 - math.exp( + self.compounded_log_returns[self.latest_dt] - + self.current_max) + + if self.max_drawdown < cur_drawdown: + return cur_drawdown + else: + return self.max_drawdown + + def calculate_sharpe(self): + """ + http://en.wikipedia.org/wiki/Sharpe_ratio + """ + return sharpe_ratio(self.metrics.algorithm_volatility[self.latest_dt], + self.annualized_mean_returns[self.latest_dt], + self.daily_treasury[self.latest_dt.date()]) + + def calculate_sortino(self): + """ + http://en.wikipedia.org/wiki/Sortino_ratio + """ + return sortino_ratio(self.annualized_mean_returns[self.latest_dt], + self.daily_treasury[self.latest_dt.date()], + self.metrics.downside_risk[self.latest_dt]) + + def calculate_information(self): + """ + http://en.wikipedia.org/wiki/Information_ratio + """ + return information_ratio( + self.metrics.algorithm_volatility[self.latest_dt], + self.annualized_mean_returns[self.latest_dt], + self.annualized_benchmark_returns[self.latest_dt]) + + def calculate_alpha(self, dt): + """ + http://en.wikipedia.org/wiki/Alpha_(investment) + """ + return alpha(self.annualized_mean_returns[self.latest_dt], + self.treasury_period_return, + self.annualized_benchmark_returns[self.latest_dt], + self.metrics.beta[dt]) + + def calculate_volatility(self, daily_returns): + return np.std(daily_returns) * math.sqrt(252) + + def calculate_downside_risk(self): + rets = self.algorithm_returns + mar = self.mean_returns + downside_diff = (rets[rets < mar] - mar).valid() + return np.std(downside_diff) * math.sqrt(252) + + def calculate_beta(self): + """ + + .. math:: + + \\beta_a = \\frac{\mathrm{Cov}(r_a,r_p)}{\mathrm{Var}(r_p)} + + http://en.wikipedia.org/wiki/Beta_(finance) + """ + # it doesn't make much sense to calculate beta for less than two days, + # so return none. + if len(self.annualized_mean_returns) < 2: + return 0.0 + + returns_matrix = np.vstack([self.annualized_mean_returns, + self.annualized_benchmark_returns]) + C = np.cov(returns_matrix, ddof=1) + algorithm_covariance = C[0][1] + benchmark_variance = C[1][1] + beta = algorithm_covariance / benchmark_variance + + return beta diff --git a/alephnull/finance/risk/period.py b/alephnull/finance/risk/period.py index ddbb5ee..8cf8707 100644 --- a/alephnull/finance/risk/period.py +++ b/alephnull/finance/risk/period.py @@ -24,7 +24,7 @@ import pandas as pd -import risk +from . import risk from . risk import ( alpha, check_entry, @@ -131,7 +131,7 @@ def to_dict(self): } return {k: None if check_entry(k, v) else v - for k, v in rval.iteritems()} + for k, v in rval.items()} def __repr__(self): statements = [] diff --git a/alephnull/finance/risk/period.py.bak b/alephnull/finance/risk/period.py.bak new file mode 100644 index 0000000..ddbb5ee --- /dev/null +++ b/alephnull/finance/risk/period.py.bak @@ -0,0 +1,281 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools + +import logbook +import math +import numpy as np +import numpy.linalg as la + +from alephnull.finance import trading + +import pandas as pd + +import risk +from . risk import ( + alpha, + check_entry, + information_ratio, + sharpe_ratio, + sortino_ratio, +) + +log = logbook.Logger('Risk Period') + +choose_treasury = functools.partial(risk.choose_treasury, + risk.select_treasury_duration) + + +class RiskMetricsPeriod(object): + def __init__(self, start_date, end_date, returns, + benchmark_returns=None): + + treasury_curves = trading.environment.treasury_curves + if treasury_curves.index[-1] >= start_date: + mask = ((treasury_curves.index >= start_date) & + (treasury_curves.index <= end_date)) + + self.treasury_curves = treasury_curves[mask] + else: + # our test is beyond the treasury curve history + # so we'll use the last available treasury curve + self.treasury_curves = treasury_curves[-1:] + + self.start_date = start_date + self.end_date = end_date + + if benchmark_returns is None: + br = trading.environment.benchmark_returns + benchmark_returns = br[(br.index >= returns.index[0]) & + (br.index <= returns.index[-1])] + + self.algorithm_returns = self.mask_returns_to_period(returns) + self.benchmark_returns = self.mask_returns_to_period(benchmark_returns) + self.calculate_metrics() + + def calculate_metrics(self): + + self.benchmark_period_returns = \ + self.calculate_period_returns(self.benchmark_returns) + + self.algorithm_period_returns = \ + self.calculate_period_returns(self.algorithm_returns) + + if not self.algorithm_returns.index.equals( + self.benchmark_returns.index + ): + message = "Mismatch between benchmark_returns ({bm_count}) and \ + algorithm_returns ({algo_count}) in range {start} : {end}" + message = message.format( + bm_count=len(self.benchmark_returns), + algo_count=len(self.algorithm_returns), + start=self.start_date, + end=self.end_date + ) + raise Exception(message) + + self.num_trading_days = len(self.benchmark_returns) + self.benchmark_volatility = self.calculate_volatility( + self.benchmark_returns) + self.algorithm_volatility = self.calculate_volatility( + self.algorithm_returns) + self.treasury_period_return = choose_treasury( + self.treasury_curves, + self.start_date, + self.end_date + ) + self.sharpe = self.calculate_sharpe() + self.sortino = self.calculate_sortino() + self.information = self.calculate_information() + self.beta, self.algorithm_covariance, self.benchmark_variance, \ + self.condition_number, self.eigen_values = self.calculate_beta() + self.alpha = self.calculate_alpha() + self.excess_return = self.algorithm_period_returns - \ + self.treasury_period_return + self.max_drawdown = self.calculate_max_drawdown() + + def to_dict(self): + """ + Creates a dictionary representing the state of the risk report. + Returns a dict object of the form: + """ + period_label = self.end_date.strftime("%Y-%m") + rval = { + 'trading_days': self.num_trading_days, + 'benchmark_volatility': self.benchmark_volatility, + 'algo_volatility': self.algorithm_volatility, + 'treasury_period_return': self.treasury_period_return, + 'algorithm_period_return': self.algorithm_period_returns, + 'benchmark_period_return': self.benchmark_period_returns, + 'sharpe': self.sharpe, + 'sortino': self.sortino, + 'information': self.information, + 'beta': self.beta, + 'alpha': self.alpha, + 'excess_return': self.excess_return, + 'max_drawdown': self.max_drawdown, + 'period_label': period_label + } + + return {k: None if check_entry(k, v) else v + for k, v in rval.iteritems()} + + def __repr__(self): + statements = [] + metrics = [ + "algorithm_period_returns", + "benchmark_period_returns", + "excess_return", + "num_trading_days", + "benchmark_volatility", + "algorithm_volatility", + "sharpe", + "sortino", + "information", + "algorithm_covariance", + "benchmark_variance", + "beta", + "alpha", + "max_drawdown", + "algorithm_returns", + "benchmark_returns", + "condition_number", + "eigen_values" + ] + + for metric in metrics: + value = getattr(self, metric) + statements.append("{m}:{v}".format(m=metric, v=value)) + + return '\n'.join(statements) + + def mask_returns_to_period(self, daily_returns): + if isinstance(daily_returns, list): + returns = pd.Series([x.returns for x in daily_returns], + index=[x.date for x in daily_returns]) + else: # otherwise we're receiving an index already + returns = daily_returns + + trade_days = trading.environment.trading_days + trade_day_mask = returns.index.normalize().isin(trade_days) + + mask = ((returns.index >= self.start_date) & + (returns.index <= self.end_date) & trade_day_mask) + + returns = returns[mask] + return returns + + def calculate_period_returns(self, returns): + period_returns = (1. + returns).prod() - 1 + return period_returns + + def calculate_volatility(self, daily_returns): + return np.std(daily_returns, ddof=1) * math.sqrt(self.num_trading_days) + + def calculate_sharpe(self): + """ + http://en.wikipedia.org/wiki/Sharpe_ratio + """ + return sharpe_ratio(self.algorithm_volatility, + self.algorithm_period_returns, + self.treasury_period_return) + + def calculate_sortino(self, mar=None): + """ + http://en.wikipedia.org/wiki/Sortino_ratio + """ + if mar is None: + mar = self.treasury_period_return + + return sortino_ratio(self.algorithm_returns, + self.algorithm_period_returns, + mar) + + def calculate_information(self): + """ + http://en.wikipedia.org/wiki/Information_ratio + """ + return information_ratio(self.algorithm_returns, + self.benchmark_returns) + + def calculate_beta(self): + """ + + .. math:: + + \\beta_a = \\frac{\mathrm{Cov}(r_a,r_p)}{\mathrm{Var}(r_p)} + + http://en.wikipedia.org/wiki/Beta_(finance) + """ + # it doesn't make much sense to calculate beta for less than two days, + # so return none. + if len(self.algorithm_returns) < 2: + return 0.0, 0.0, 0.0, 0.0, [] + + returns_matrix = np.vstack([self.algorithm_returns, + self.benchmark_returns]) + C = np.cov(returns_matrix, ddof=1) + eigen_values = la.eigvals(C) + condition_number = max(eigen_values) / min(eigen_values) + algorithm_covariance = C[0][1] + benchmark_variance = C[1][1] + beta = algorithm_covariance / benchmark_variance + + return ( + beta, + algorithm_covariance, + benchmark_variance, + condition_number, + eigen_values + ) + + def calculate_alpha(self): + """ + http://en.wikipedia.org/wiki/Alpha_(investment) + """ + return alpha(self.algorithm_period_returns, + self.treasury_period_return, + self.benchmark_period_returns, + self.beta) + + def calculate_max_drawdown(self): + compounded_returns = [] + cur_return = 0.0 + for r in self.algorithm_returns: + try: + cur_return += math.log(1.0 + r) + # this is a guard for a single day returning -100% + except ValueError: + log.debug("{cur} return, zeroing the returns".format( + cur=cur_return)) + cur_return = 0.0 + # BUG? Shouldn't this be set to log(1.0 + 0) ? + compounded_returns.append(cur_return) + + cur_max = None + max_drawdown = None + for cur in compounded_returns: + if cur_max is None or cur > cur_max: + cur_max = cur + + drawdown = (cur - cur_max) + if max_drawdown is None or drawdown < max_drawdown: + max_drawdown = drawdown + + if max_drawdown is None: + return 0.0 + + return 1.0 - math.exp(max_drawdown) diff --git a/alephnull/finance/slippage.py b/alephnull/finance/slippage.py index dba348e..bd23e29 100644 --- a/alephnull/finance/slippage.py +++ b/alephnull/finance/slippage.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division + import abc @@ -125,9 +125,7 @@ def create_transaction(event, order, price, amount): return transaction -class SlippageModel(object): - __metaclass__ = abc.ABCMeta - +class SlippageModel(object, metaclass=abc.ABCMeta): @property def volume_for_bar(self): return self._volume_for_bar diff --git a/alephnull/finance/slippage.py.bak b/alephnull/finance/slippage.py.bak new file mode 100644 index 0000000..dba348e --- /dev/null +++ b/alephnull/finance/slippage.py.bak @@ -0,0 +1,235 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division + +import abc + +import math + +from copy import copy +from functools import partial +from alephnull.protocol import DATASOURCE_TYPE +import alephnull.utils.math_utils as zp_math + + +def check_order_triggers(order, event): + """ + Given an order and a trade event, return a tuple of + (stop_reached, limit_reached). + For market orders, will return (False, False). + For stop orders, limit_reached will always be False. + For limit orders, stop_reached will always be False. + + Orders that have been triggered already (price targets reached), + the order's current values are returned. + """ + if order.triggered: + return (order.stop_reached, order.limit_reached) + + stop_reached = False + limit_reached = False + # if the stop price is reached, simply set stop_reached + if order.stop is not None: + if (order.direction * (event.price - order.stop) <= 0): + # convert stop -> limit or market + stop_reached = True + + # if the limit price is reached, we execute this order at + # (event.price + simulated_impact) + # we skip this order with a continue when the limit is not reached + if order.limit is not None: + # if limit conditions not met, then continue + if (order.direction * (event.price - order.limit) <= 0): + limit_reached = True + + return (stop_reached, limit_reached) + + +def transact_stub(slippage, commission, event, open_orders): + """ + This is intended to be wrapped in a partial, so that the + slippage and commission models can be enclosed. + """ + for order, transaction in slippage(event, open_orders): + if ( + transaction + and not + zp_math.tolerant_equals(transaction.amount, 0) + ): + direction = math.copysign(1, transaction.amount) + per_share, total_commission = commission.calculate(transaction) + transaction.price = transaction.price + (per_share * direction) + transaction.commission = total_commission + yield order, transaction + + +def transact_partial(slippage, commission): + return partial(transact_stub, slippage, commission) + + +class Transaction(object): + def __init__(self, sid, amount, dt, price, order_id=None, commission=None, contract=None): + self.sid = sid + if contract is not None: + self.contract = contract + self.amount = amount + self.dt = dt + self.price = price + self.order_id = order_id + self.commission = commission + self.type = DATASOURCE_TYPE.TRANSACTION + + def __getitem__(self, name): + return self.__dict__[name] + + def to_dict(self): + py = copy(self.__dict__) + del py['type'] + return py + + +def create_transaction(event, order, price, amount): + # floor the amount to protect against non-whole number orders + # TODO: Investigate whether we can add a robust check in blotter + # and/or tradesimulation, as well. + amount_magnitude = int(abs(amount)) + + if amount_magnitude < 1: + raise Exception("Transaction magnitude must be at least 1.") + + txn = { + 'sid': event.sid, + 'amount': int(amount), + 'dt': event.dt, + 'price': price, + 'order_id': order.id + } + + if 'contract' in event: + txn['contract'] = event.contract + + transaction = Transaction(**txn) + + return transaction + + +class SlippageModel(object): + __metaclass__ = abc.ABCMeta + + @property + def volume_for_bar(self): + return self._volume_for_bar + + @abc.abstractproperty + def process_order(self, event, order): + pass + + def simulate(self, event, current_orders): + + self._volume_for_bar = 0 + + for order in current_orders: + + open_amount = order.amount - order.filled + + if zp_math.tolerant_equals(open_amount, 0): + continue + + order.check_triggers(event) + if not order.triggered: + continue + + txn = self.process_order(event, order) + if txn: + self._volume_for_bar += abs(txn.amount) + yield order, txn + + def __call__(self, event, current_orders, **kwargs): + + return self.simulate(event, current_orders, **kwargs) + + +class VolumeShareSlippage(SlippageModel): + def __init__(self, + volume_limit=.25, + price_impact=0.1): + + self.volume_limit = volume_limit + self.price_impact = price_impact + + def __repr__(self): + return """ +{class_name}( + volume_limit={volume_limit}, + price_impact={price_impact}) +""".strip().format(class_name=self.__class__.__name__, + volume_limit=self.volume_limit, + price_impact=self.price_impact) + + def process_order(self, event, order): + + max_volume = self.volume_limit * event.volume + + # price impact accounts for the total volume of transactions + # created against the current minute bar + remaining_volume = max_volume - self.volume_for_bar + if remaining_volume < 1: + # we can't fill any more transactions + return + + # the current order amount will be the min of the + # volume available in the bar or the open amount. + cur_volume = int(min(remaining_volume, abs(order.open_amount))) + + if cur_volume < 1: + return + + # tally the current amount into our total amount ordered. + # total amount will be used to calculate price impact + total_volume = self.volume_for_bar + cur_volume + + volume_share = min(total_volume / event.volume, + self.volume_limit) + + simulated_impact = (volume_share) ** 2 \ + * math.copysign(self.price_impact, order.direction) \ + * event.price + + return create_transaction( + event, + order, + # In the future, we may want to change the next line + # for limit pricing + event.price + simulated_impact, + math.copysign(cur_volume, order.direction) + ) + + +class FixedSlippage(SlippageModel): + def __init__(self, spread=0.0): + """ + Use the fixed slippage model, which will just add/subtract + a specified spread spread/2 will be added on buys and subtracted + on sells per share + """ + self.spread = spread + + def process_order(self, event, order): + return create_transaction( + event, + order, + event.price + (self.spread / 2.0 * order.direction), + order.amount, + ) diff --git a/alephnull/gens/composites.py b/alephnull/gens/composites.py index df2f167..f92274d 100644 --- a/alephnull/gens/composites.py +++ b/alephnull/gens/composites.py @@ -14,6 +14,7 @@ # limitations under the License. import heapq +from functools import reduce def _decorate_source(source): diff --git a/alephnull/gens/composites.py.bak b/alephnull/gens/composites.py.bak new file mode 100644 index 0000000..df2f167 --- /dev/null +++ b/alephnull/gens/composites.py.bak @@ -0,0 +1,56 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import heapq + + +def _decorate_source(source): + for message in source: + yield ((message.dt, message.source_id), message) + + +def date_sorted_sources(*sources): + """ + Takes an iterable of sources, generating namestrings and + piping their output into date_sort. + """ + sorted_stream = heapq.merge(*(_decorate_source(s) for s in sources)) + + # Strip out key decoration + for _, message in sorted_stream: + yield message + + +def sequential_transforms(stream_in, *transforms): + """ + Apply each transform in transforms sequentially to each event in stream_in. + Each transform application will add a new entry indexed to the transform's + hash string. + """ + # Recursively apply all transforms to the stream. + stream_out = reduce(lambda stream, tnfm: tnfm.transform(stream), + transforms, + stream_in) + + return stream_out + + +def alias_dt(stream_in): + """ + Alias the dt field to datetime on each message. + """ + for message in stream_in: + message['datetime'] = message['dt'] + yield message diff --git a/alephnull/gens/utils.py b/alephnull/gens/utils.py index eac8acd..a032cb9 100644 --- a/alephnull/gens/utils.py +++ b/alephnull/gens/utils.py @@ -26,7 +26,7 @@ def hash_args(*args, **kwargs): """Define a unique string for any set of representable args.""" arg_string = '_'.join([str(arg) for arg in args]) kwarg_string = '_'.join([str(key) + '=' + str(value) - for key, value in kwargs.iteritems()]) + for key, value in kwargs.items()]) combined = ':'.join([arg_string, kwarg_string]) hasher = md5() @@ -37,7 +37,7 @@ def hash_args(*args, **kwargs): def assert_datasource_protocol(event): """Assert that an event meets the protocol for datasource outputs.""" - assert isinstance(event.source_id, basestring) + assert isinstance(event.source_id, str) assert event.type in DATASOURCE_TYPE # Done packets have no dt. @@ -59,17 +59,17 @@ def assert_trade_protocol(event): def assert_datasource_unframe_protocol(event): """Assert that an event is valid output of zp.DATASOURCE_UNFRAME.""" - assert isinstance(event.source_id, basestring) + assert isinstance(event.source_id, str) assert event.type in DATASOURCE_TYPE def assert_sort_protocol(event): """Assert that an event is valid input to zp.FEED_FRAME.""" - assert isinstance(event.source_id, basestring) + assert isinstance(event.source_id, str) assert event.type in DATASOURCE_TYPE def assert_sort_unframe_protocol(event): """Same as above.""" - assert isinstance(event.source_id, basestring) + assert isinstance(event.source_id, str) assert event.type in DATASOURCE_TYPE diff --git a/alephnull/gens/utils.py.bak b/alephnull/gens/utils.py.bak new file mode 100644 index 0000000..eac8acd --- /dev/null +++ b/alephnull/gens/utils.py.bak @@ -0,0 +1,75 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytz +import numbers + +from hashlib import md5 +from datetime import datetime +from alephnull.protocol import DATASOURCE_TYPE + + +def hash_args(*args, **kwargs): + """Define a unique string for any set of representable args.""" + arg_string = '_'.join([str(arg) for arg in args]) + kwarg_string = '_'.join([str(key) + '=' + str(value) + for key, value in kwargs.iteritems()]) + combined = ':'.join([arg_string, kwarg_string]) + + hasher = md5() + hasher.update(combined) + return hasher.hexdigest() + + +def assert_datasource_protocol(event): + """Assert that an event meets the protocol for datasource outputs.""" + + assert isinstance(event.source_id, basestring) + assert event.type in DATASOURCE_TYPE + + # Done packets have no dt. + if not event.type == DATASOURCE_TYPE.DONE: + assert isinstance(event.dt, datetime) + assert event.dt.tzinfo == pytz.utc + + +def assert_trade_protocol(event): + """Assert that an event meets the protocol for datasource TRADE outputs.""" + assert_datasource_protocol(event) + + assert event.type == DATASOURCE_TYPE.TRADE + assert isinstance(event.sid, int) + assert isinstance(event.price, numbers.Real) + assert isinstance(event.volume, numbers.Integral) + assert isinstance(event.dt, datetime) + + +def assert_datasource_unframe_protocol(event): + """Assert that an event is valid output of zp.DATASOURCE_UNFRAME.""" + assert isinstance(event.source_id, basestring) + assert event.type in DATASOURCE_TYPE + + +def assert_sort_protocol(event): + """Assert that an event is valid input to zp.FEED_FRAME.""" + assert isinstance(event.source_id, basestring) + assert event.type in DATASOURCE_TYPE + + +def assert_sort_unframe_protocol(event): + """Same as above.""" + assert isinstance(event.source_id, basestring) + assert event.type in DATASOURCE_TYPE diff --git a/alephnull/live/broker.py b/alephnull/live/broker.py index d35339d..5b6fc15 100644 --- a/alephnull/live/broker.py +++ b/alephnull/live/broker.py @@ -120,7 +120,7 @@ def process_trade(self, trade_event): if order.amount - order.filled == 0: order.status = ORDER_STATUS.FILLED order.dt = txn.dt - print txn.__dict__ + print(txn.__dict__) yield txn, order self.open_orders[sid] = \ diff --git a/alephnull/live/broker.py.bak b/alephnull/live/broker.py.bak new file mode 100644 index 0000000..d35339d --- /dev/null +++ b/alephnull/live/broker.py.bak @@ -0,0 +1,273 @@ +__author__ = 'oglebrandon' + +from logbook import Logger + +from ib.ext.Contract import Contract +from ib.ext.ExecutionFilter import ExecutionFilter +from ib.ext.Order import Order as IBOrder +from alephnull.finance.blotter import Blotter +from alephnull.utils.protocol_utils import Enum +from alephnull.finance.slippage import Transaction +import alephnull.protocol as zp + + +# Medici fork of IbPy +# https://github.com/CarterBain/Medici +from ib.client.IBrokers import IBClient +import datetime as dt +import pytz + +log = Logger('Blotter') + +ORDER_STATUS = Enum( + 'OPEN', + 'FILLED', + 'CANCELLED' +) + + +def round_for_minimum_price_variation(x): + #Todo: modify to round to minimum tick + return x + + +class LiveBlotter(Blotter): + id_map = {} + + def __init__(self): + super(LiveBlotter, self).__init__() + + + def order(self, sid, amount, limit_price, stop_price, order_id=None): + id = super(LiveBlotter, self).order(sid, amount, limit_price, stop_price, order_id=None) + order_obj = self.orders[id] + + ib_order = IBOrder() + ib_order.m_transmit = True + ib_order.m_orderRef = order_obj.id + ib_order.m_totalQuantity = order_obj.amount + ib_order.m_action = ['BUY' if ib_order.m_totalQuantity > 0 else 'SELL'][0] + ib_order.m_tif = 'DAY' + #Todo: make the FA params configurable + ib_order.m_faGroup = 'ALL' + ib_order.m_faMethod = 'AvailableEquity' + + # infer order type + if order_obj.stop and not order_obj.limit: + ib_order.m_orderType = 'STP' + ib_order.m_auxPrice = float(order_obj.stop) + + elif order_obj.limit and not order_obj.stop: + ib_order.m_orderType = 'LMT' + ib_order.m_lmtPrice = float(order_obj.limit) + + elif order_obj.stop and order_obj.limit: + ib_order.m_orderType = 'STPLMT' + ib_order.m_auxPrice = float(order_obj.stop) + ib_order.m_lmtPrice = float(order_obj.limit) + + else: + ib_order.m_orderType = 'MKT' + + contract = Contract() + contract.m_symbol = order_obj.sid + contract.m_currency = 'USD' + + if hasattr(order_obj, 'contract'): + # This is a futures contract + contract.m_secType = 'FUT' + contract.m_exchange = 'GLOBEX' + contract.m_expiry = order_obj.contract + + else: + # This is a stock + contract.m_secType = 'STK' + contract.m_exchange = 'SMART' + + ib_id = self.place_order(contract, ib_order) + self.id_map[order_obj.id] = ib_id + + return order_obj.id + + def cancel(self, order_id): + ib_id = self.id_map[order_id] + self.cancel_order(ib_id) + super(Blotter, self).order(order_id) + + def process_trade(self, trade_event): + + # checks if event is trade + if trade_event.type != zp.DATASOURCE_TYPE.TRADE: + return + + # checks if is future contract + if hasattr(trade_event, 'contract'): + sid = (trade_event.sid, trade_event.cotract) + else: + sid = trade_event.sid + + if sid in self.open_orders: + orders = self.open_orders[sid] + # sort orders by datetime, and filter out future dates + # lambda x: sort([order.dt for order in orders]) + + else: + return + + for order, txn in self.get_transactions(trade_event, orders): + # check that not commission + order.filled += txn.amount + if order.amount - order.filled == 0: + order.status = ORDER_STATUS.FILLED + order.dt = txn.dt + print txn.__dict__ + yield txn, order + + self.open_orders[sid] = \ + [order for order + in self.open_orders[sid] + if order.open] + + +class LiveExecution(IBClient): + """Client connection to the Interactive Brokers API + inherits from IBClient in the Medici fork of IbPy + """ + + def __init__(self, call_msg): + super(LiveExecution, self).__init__(call_msg=call_msg) + self._blotter = LiveBlotter() + self._blotter.place_order = self.place_order + self._blotter.get_transactions = self.get_transactions + self._blotter.cancel_order = self.cancel_order + super(LiveExecution, self).__track_orders__() + + @property + def blotter(self): + return self._blotter + + + def __ib_to_aleph_sym_map__(self, contract): + decade = dt.date.today().strftime('%y')[0] + sym = contract.m_symbol + exp = contract.m_localSymbol.split(sym)[1] + exp = exp[0] + decade[0] + exp[1] + + return (sym, exp) + + + def total_cash(self): + cash = 0 + for acct in self.account.child_accounts: + try: + cash += float([x.value for x in self.account_details(acct) + if x.key == 'TotalCashValue'][0]) + except: + return self.total_cash() + + return cash + + def ib_portfolio(self): + + portfolio_store = zp.Portfolio() + positions_store = zp.Positions() + + for acct in self.account.child_accounts: + positions = self.portfolio(acct) + for pos in positions: + # Skip empty requests + if hasattr(pos, 'contract'): + contract = pos.contract + + # determine position sid + if contract.m_secType == 'STK': + sid = contract.m_localSymbol + if contract.m_secType == 'FUT': + sid = self.__ib_to_aleph_sym_map__(contract) + + # if sid not in positions create a new position object + if sid not in positions_store: + if type(sid) is tuple: + positions_store[sid] = zp.Position(sid[0], contract=sid[1]) + else: + positions_store[sid] = zp.Position(sid) + + positions_store[sid].amount = pos.position_size + positions_store[sid].last_sale_price = pos.market_price + positions_store[sid].cost_basis = pos.avg_cost + else: + current_size = positions_store[sid].amount + # adjust cost basis: + # this should never result in a different value unless + # IB doesn't enforce best execution + positions_store[sid].amount += pos.position_size + if positions_store[sid].amount != 0: + mkt_value = positions_store[sid].cost_basis * current_size + added_value = pos.avg_cost * pos.position_size + positions_store[sid].cost_basis = (mkt_value + added_value) / \ + positions_store[sid].amount + + portfolio_store.positions_value += pos.market_value + portfolio_store.pnl = pos.realized_pnl + pos.unrealized_pnl + portfolio_store.positions = positions_store + + return portfolio_store + + def get_transactions(self, event, orders): + import time + + time.sleep(1) + efilter = ExecutionFilter() + efilter.m_symbol = event.sid + + for order in orders: + + # Todo: I need to refactor how executions are summoned, this is currently a huge bottleneck + # cycle through all executions matching the event sid + for execution in self.executions(efilter): + prior_execution = None + + # further filter out any executions not matching the order.id + if execution.m_orderRef == order.id: + + # prevent processing of duplicate executions + if execution != prior_execution: + order_status_vals = (0, 0) + + # cycle through the order status messages to get transaction details + for status in self.order_status(execution.m_orderId): + + # filter out duplicate transaction messages + if (status['remaining'], status['filled']) != order_status_vals: + + # get execution date + date = dt.datetime.strptime(execution.m_time, + '%Y%m%d %H:%M:%S').replace(tzinfo=pytz.utc) + amount = status['filled'] - order_status_vals[1] + + txn = {'sid': event.sid, + 'amount': int(amount), + 'dt': date, + 'price': status['lastFillPrice'], + 'order_id': order.id} + + transaction = Transaction(**txn) + order_status_vals = (status['remaining'], status['filled']) + #TODO: pretty sure there is still transactions are being duplicated still + if order.status == ORDER_STATUS.OPEN: + yield order, transaction + + prior_execution = execution + + + + + + + + + + + + + diff --git a/alephnull/protocol.py b/alephnull/protocol.py index 4fd3c3b..8df77f8 100644 --- a/alephnull/protocol.py +++ b/alephnull/protocol.py @@ -49,7 +49,7 @@ def __delitem__(self, name): delattr(self, name) def keys(self): - return self.__dict__.keys() + return list(self.__dict__.keys()) def __eq__(self, other): return self.__dict__ == other.__dict__ @@ -177,7 +177,7 @@ def __delitem__(self, name): del self._data[name] def __iter__(self): - for sid, data in self._data.iteritems(): + for sid, data in self._data.items(): # Allow contains override to filter out sids. if sid in self: if len(data): @@ -185,25 +185,25 @@ def __iter__(self): def iterkeys(self): # Allow contains override to filter out sids. - return (sid for sid in self._data.iterkeys() if sid in self) + return (sid for sid in self._data.keys() if sid in self) def keys(self): # Allow contains override to filter out sids. - return list(self.iterkeys()) + return list(self.keys()) def itervalues(self): - return (value for sid, value in self.iteritems()) + return (value for sid, value in self.items()) def values(self): - return list(self.itervalues()) + return list(self.values()) def iteritems(self): return ((sid, value) for sid, value - in self._data.iteritems() + in self._data.items() if sid in self) def items(self): - return list(self.iteritems()) + return list(self.items()) def __len__(self): - return len(self.keys()) + return len(list(self.keys())) diff --git a/alephnull/protocol.py.bak b/alephnull/protocol.py.bak new file mode 100644 index 0000000..4fd3c3b --- /dev/null +++ b/alephnull/protocol.py.bak @@ -0,0 +1,209 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . utils.protocol_utils import Enum + +# Datasource type should completely determine the other fields of a +# message with its type. +DATASOURCE_TYPE = Enum( + 'AS_TRADED_EQUITY', + 'MERGER', + 'SPLIT', + 'DIVIDEND', + 'TRADE', + 'TRANSACTION', + 'ORDER', + 'EMPTY', + 'DONE', + 'CUSTOM', + 'BENCHMARK', + 'COMMISSION' +) + + +class Event(object): + + def __init__(self, initial_values=None): + if initial_values: + self.__dict__ = initial_values + + def __getitem__(self, name): + return getattr(self, name) + + def __setitem__(self, name, value): + setattr(self, name, value) + + def __delitem__(self, name): + delattr(self, name) + + def keys(self): + return self.__dict__.keys() + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __contains__(self, name): + return name in self.__dict__ + + def __repr__(self): + return "Event({0})".format(self.__dict__) + + +class Order(Event): + pass + + +class Portfolio(object): + + def __init__(self): + self.capital_used = 0.0 + self.starting_cash = 0.0 + self.portfolio_value = 0.0 + self.pnl = 0.0 + self.returns = 0.0 + self.cash = 0.0 + self.positions = Positions() + self.start_date = None + self.positions_value = 0.0 + self.portfolio_value = 0.0 + + def __getitem__(self, key): + return self.__dict__[key] + + def __repr__(self): + return "Portfolio({0})".format(self.__dict__) + + +class Position(object): + + def __init__(self, sid, contract=None): + self.sid = sid + if contract is not None: + self.contract = contract + self.amount = 0 + self.cost_basis = 0.0 # per share + self.last_sale_price = 0.0 + + def __getitem__(self, key): + return self.__dict__[key] + + def __repr__(self): + return "Position({0})".format(self.__dict__) + + +class Positions(dict): + + def __missing__(self, key): + if type(key) is tuple: + pos = Position(key[0], contract=key[1]) + else: + pos = Position(key) + self[key] = pos + return pos + + + +class SIDData(object): + + def __init__(self, initial_values=None): + if initial_values: + self.__dict__ = initial_values + + def __getitem__(self, name): + return self.__dict__[name] + + def __setitem__(self, name, value): + self.__dict__[name] = value + + def __len__(self): + return len(self.__dict__) + + def __contains__(self, name): + return name in self.__dict__ + + def __repr__(self): + return "SIDData({0})".format(self.__dict__) + + +class BarData(object): + """ + Holds the event data for all sids for a given dt. + + This is what is passed as `data` to the `handle_data` function. + + Note: Many methods are analogues of dictionary because of historical + usage of what this replaced as a dictionary subclass. + """ + + def __init__(self): + self._data = {} + self._contains_override = None + + def __contains__(self, name): + if self._contains_override: + if self._contains_override(name): + return name in self._data + else: + return False + else: + return name in self._data + + def has_key(self, name): + """ + DEPRECATED: __contains__ is preferred, but this method is for + compatibility with existing algorithms. + """ + return name in self + + def __setitem__(self, name, value): + self._data[name] = value + + def __getitem__(self, name): + return self._data[name] + + def __delitem__(self, name): + del self._data[name] + + def __iter__(self): + for sid, data in self._data.iteritems(): + # Allow contains override to filter out sids. + if sid in self: + if len(data): + yield sid + + def iterkeys(self): + # Allow contains override to filter out sids. + return (sid for sid in self._data.iterkeys() if sid in self) + + def keys(self): + # Allow contains override to filter out sids. + return list(self.iterkeys()) + + def itervalues(self): + return (value for sid, value in self.iteritems()) + + def values(self): + return list(self.itervalues()) + + def iteritems(self): + return ((sid, value) for sid, value + in self._data.iteritems() + if sid in self) + + def items(self): + return list(self.iteritems()) + + def __len__(self): + return len(self.keys()) diff --git a/alephnull/roll_method.py b/alephnull/roll_method.py index e622ed0..8665386 100644 --- a/alephnull/roll_method.py +++ b/alephnull/roll_method.py @@ -9,9 +9,9 @@ def wrap(func): def modified_func(self, data): positions = self.portfolio.positions frames = {} - for sym in data.keys(): + for sym in list(data.keys()): frames[sym] = DataFrame({k: Series(v.__dict__) for - k, v in data[sym].iteritems()}) + k, v in data[sym].items()}) all_ = pd.concat(frames, axis=1).T try: @@ -44,7 +44,7 @@ def modified_func(self, data): [self.order(exp, stack) for exp in front_months if exp[0] == sym[0]] bar_data = BarData() - bar_data.__dict__['_data'].update({k: SIDData(v) for k, v in data.iteritems()}) + bar_data.__dict__['_data'].update({k: SIDData(v) for k, v in data.items()}) return func(self, bar_data) diff --git a/alephnull/roll_method.py.bak b/alephnull/roll_method.py.bak new file mode 100644 index 0000000..e622ed0 --- /dev/null +++ b/alephnull/roll_method.py.bak @@ -0,0 +1,55 @@ +import pandas as pd +from pandas import Series, DataFrame + +from alephnull.protocol import BarData, SIDData + + +def roll(logic): + def wrap(func): + def modified_func(self, data): + positions = self.portfolio.positions + frames = {} + for sym in data.keys(): + frames[sym] = DataFrame({k: Series(v.__dict__) for + k, v in data[sym].iteritems()}) + + all_ = pd.concat(frames, axis=1).T + try: + all_ = all_.groupby(axis=0, level=0).apply(logic).reset_index( + level=(0, 2), drop=True) + except: + all_ = all_.groupby(axis=0, level=0).apply(logic) + + + #Todo: handle multiple contract returns + all_ = all_.groupby(axis=0, level=0).agg(lambda x: x.max()) + + #Todo: Data should be reconstructed into BarData object + data = all_.T.to_dict() + + front_months = [(sym, all_.ix[sym]['contract']) for sym in all_.index] + back_months = [sym for sym in self.perf_tracker.get_portfolio().positions + if sym not in front_months] + + offsets = {} + for sym in back_months: + offsets[sym] = 0 + for order_id in self.get_orders(sym): + order = self.blotter.orders[order_id] + if order.status != 3: + offsets[sym] += (order.amount - order.filled) + stack = self.perf_tracker.get_portfolio().positions[sym].amount + offsets[sym] + if stack != 0: + self.order(sym, -stack) + [self.order(exp, stack) for exp in front_months if exp[0] == sym[0]] + + bar_data = BarData() + bar_data.__dict__['_data'].update({k: SIDData(v) for k, v in data.iteritems()}) + + return func(self, bar_data) + + return modified_func + + return wrap + + diff --git a/alephnull/sources/data_frame_source.py b/alephnull/sources/data_frame_source.py index c307da9..1b9b2c0 100644 --- a/alephnull/sources/data_frame_source.py +++ b/alephnull/sources/data_frame_source.py @@ -142,7 +142,7 @@ def raw_data_gen(self): 'dt': dt, 'sid': sid, } - for field_name, value in series.iteritems(): + for field_name, value in series.items(): event[field_name] = value yield event diff --git a/alephnull/sources/data_frame_source.py.bak b/alephnull/sources/data_frame_source.py.bak new file mode 100644 index 0000000..c307da9 --- /dev/null +++ b/alephnull/sources/data_frame_source.py.bak @@ -0,0 +1,154 @@ + +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tools to generate data sources. +""" +import pandas as pd + +from alephnull.gens.utils import hash_args + +from alephnull.sources.data_source import DataSource + + +class DataFrameSource(DataSource): + """ + Yields all events in event_list that match the given sid_filter. + If no event_list is specified, generates an internal stream of events + to filter. Returns all events if filter is None. + + Configuration options: + + sids : list of values representing simulated internal sids + start : start date + delta : timedelta between internal events + filter : filter to remove the sids + """ + + def __init__(self, data, **kwargs): + assert isinstance(data.index, pd.tseries.index.DatetimeIndex) + + self.data = data + # Unpack config dictionary with default values. + self.sids = kwargs.get('sids', data.columns) + self.start = kwargs.get('start', data.index[0]) + self.end = kwargs.get('end', data.index[-1]) + + # Hash_value for downstream sorting. + self.arg_string = hash_args(data, **kwargs) + + self._raw_data = None + + @property + def mapping(self): + return { + 'dt': (lambda x: x, 'dt'), + 'sid': (lambda x: x, 'sid'), + 'price': (float, 'price'), + 'volume': (int, 'volume'), + } + + @property + def instance_hash(self): + return self.arg_string + + def raw_data_gen(self): + for dt, series in self.data.iterrows(): + for sid, price in series.iterkv(): + if sid in self.sids: + event = { + 'dt': dt, + 'sid': sid, + 'price': price, + 'volume': 1000, + } + yield event + + @property + def raw_data(self): + if not self._raw_data: + self._raw_data = self.raw_data_gen() + return self._raw_data + + +class DataPanelSource(DataSource): + """ + Yields all events in event_list that match the given sid_filter. + If no event_list is specified, generates an internal stream of events + to filter. Returns all events if filter is None. + + Configuration options: + + sids : list of values representing simulated internal sids + start : start date + delta : timedelta between internal events + filter : filter to remove the sids + """ + + def __init__(self, data, **kwargs): + assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex) + + self.data = data + # Unpack config dictionary with default values. + self.sids = kwargs.get('sids', data.items) + self.start = kwargs.get('start', data.major_axis[0]) + self.end = kwargs.get('end', data.major_axis[-1]) + + # Hash_value for downstream sorting. + self.arg_string = hash_args(data, **kwargs) + + self._raw_data = None + + @property + def mapping(self): + mapping = { + 'dt': (lambda x: x, 'dt'), + 'sid': (lambda x: x, 'sid'), + 'price': (float, 'price'), + 'volume': (int, 'volume'), + } + + # Add additional fields. + for field_name in self.data.minor_axis: + if field_name in ['price', 'volume', 'dt', 'sid']: + continue + mapping[field_name] = (lambda x: x, field_name) + + return mapping + + @property + def instance_hash(self): + return self.arg_string + + def raw_data_gen(self): + for dt in self.data.major_axis: + df = self.data.major_xs(dt) + for sid, series in df.iterkv(): + if sid in self.sids: + event = { + 'dt': dt, + 'sid': sid, + } + for field_name, value in series.iteritems(): + event[field_name] = value + + yield event + + @property + def raw_data(self): + if not self._raw_data: + self._raw_data = self.raw_data_gen() + return self._raw_data diff --git a/alephnull/sources/data_source.py b/alephnull/sources/data_source.py index 2a711dc..ad808d7 100644 --- a/alephnull/sources/data_source.py +++ b/alephnull/sources/data_source.py @@ -7,9 +7,7 @@ from alephnull.protocol import Event -class DataSource(object): - - __metaclass__ = ABCMeta +class DataSource(object, metaclass=ABCMeta): @property def event_type(self): @@ -47,7 +45,7 @@ def apply_mapping(self, raw_row): """ row = {target: mapping_func(raw_row[source_key]) for target, (mapping_func, source_key) - in self.mapping.items()} + in list(self.mapping.items())} row.update({'source_id': self.get_hash()}) row.update({'type': self.event_type}) return row @@ -60,5 +58,5 @@ def mapped_data(self): def __iter__(self): return self - def next(self): - return self.mapped_data.next() + def __next__(self): + return next(self.mapped_data) diff --git a/alephnull/sources/data_source.py.bak b/alephnull/sources/data_source.py.bak new file mode 100644 index 0000000..2a711dc --- /dev/null +++ b/alephnull/sources/data_source.py.bak @@ -0,0 +1,64 @@ +from abc import ( + ABCMeta, + abstractproperty +) + +from alephnull.protocol import DATASOURCE_TYPE +from alephnull.protocol import Event + + +class DataSource(object): + + __metaclass__ = ABCMeta + + @property + def event_type(self): + return DATASOURCE_TYPE.TRADE + + @property + def mapping(self): + """ + Mappings of the form: + target_key: (mapping_function, source_key) + """ + return {} + + @abstractproperty + def raw_data(self): + """ + An iterator that yields the raw datasource, + in chronological order of data, one event at a time. + """ + NotImplemented + + @abstractproperty + def instance_hash(self): + """ + A hash that represents the unique args to the source. + """ + pass + + def get_hash(self): + return self.__class__.__name__ + "-" + self.instance_hash + + def apply_mapping(self, raw_row): + """ + Override this to hand craft conversion of row. + """ + row = {target: mapping_func(raw_row[source_key]) + for target, (mapping_func, source_key) + in self.mapping.items()} + row.update({'source_id': self.get_hash()}) + row.update({'type': self.event_type}) + return row + + @property + def mapped_data(self): + for row in self.raw_data: + yield Event(self.apply_mapping(row)) + + def __iter__(self): + return self + + def next(self): + return self.mapped_data.next() diff --git a/alephnull/sources/futures_data_frame_source.py b/alephnull/sources/futures_data_frame_source.py index f770133..b2febd9 100644 --- a/alephnull/sources/futures_data_frame_source.py +++ b/alephnull/sources/futures_data_frame_source.py @@ -70,7 +70,7 @@ def raw_data_gen(self): if sid not in events: events[sid] = {'dt': dt, 'sid': sid} events[sid][metric] = value - for event in events.itervalues(): + for event in events.values(): yield event @property diff --git a/alephnull/sources/futures_data_frame_source.py.bak b/alephnull/sources/futures_data_frame_source.py.bak new file mode 100644 index 0000000..f770133 --- /dev/null +++ b/alephnull/sources/futures_data_frame_source.py.bak @@ -0,0 +1,80 @@ +import pandas as pd + +from alephnull.gens.utils import hash_args + +from alephnull.sources.data_source import DataSource + + +class FuturesDataFrameSource(DataSource): + """ + Yields all events in event_list that match the given sid_filter. + If no event_list is specified, generates an internal stream of events + to filter. Returns all events if filter is None. + + Configuration options: + + sids : list of values representing simulated internal sids + start : start date + delta : timedelta between internal events + filter : filter to remove the sids + """ + + def __init__(self, data, **kwargs): + """ + Data must be a DataFrame formatted like this: + + ################################################################################################# + # # GS # TW # + # # N10 # H10 # G14 # H14 # + # # Price # Volume # Price # Volume # Price # Metric3 # Price # Metric3 # + # 2013-12-20 00:09:15 # 101.00 # 1000 # 60.34 # 2500 # 400.00 # -0.0034 # Price # -5.0 # + # 2013-12-20 00:09:17 # 201.00 # 2000 # 20.34 # 2500 # 200.00 # -2.0034 # Price # -2.0 # + # etc... # + ################################################################################################# + + """ + assert isinstance(data.index, pd.tseries.index.DatetimeIndex) + + self.data = data + # Unpack config dictionary with default values. + self.sids = kwargs.get('sids', list(set(['.'.join(tup[:2]) for tup in data.columns]))) + self.start = kwargs.get('start', data.index[0]) + self.end = kwargs.get('end', data.index[-1]) + + # Hash_value for downstream sorting. + self.arg_string = hash_args(data, **kwargs) + + self._raw_data = None + + @property + def mapping(self): + return { + 'dt': (lambda x: x, 'dt'), + 'sid': (lambda x: x[:x.find(".")], 'sid'), + 'contract': (lambda x: x[x.find(".") + 1:], 'sid'), + 'price': (float, 'price'), + 'volume': (int, 'volume'), + 'open_interest': (int, 'open_interest'), + } + + @property + def instance_hash(self): + return self.arg_string + + def raw_data_gen(self): + for dt, series in self.data.iterrows(): + events = {} + for (underlying, exp, metric), value in series.iterkv(): + sid = '.'.join([underlying, exp]) + if sid in self.sids: + if sid not in events: + events[sid] = {'dt': dt, 'sid': sid} + events[sid][metric] = value + for event in events.itervalues(): + yield event + + @property + def raw_data(self): + if not self._raw_data: + self._raw_data = self.raw_data_gen() + return self._raw_data \ No newline at end of file diff --git a/alephnull/sources/test_source.py b/alephnull/sources/test_source.py index b532e4a..1f9ff19 100644 --- a/alephnull/sources/test_source.py +++ b/alephnull/sources/test_source.py @@ -19,7 +19,7 @@ import pytz -from itertools import cycle, ifilter, izip +from itertools import cycle from datetime import datetime, timedelta import numpy as np @@ -68,9 +68,9 @@ def date_gen(start=datetime(2006, 6, 6, 12, tzinfo=pytz.utc), # during trading hours. # NB: Being inside of trading hours is currently dependent upon the # count parameter being less than the number of trading minutes in a day - for i in xrange(count): + for i in range(count): if repeats: - for j in xrange(repeats): + for j in range(repeats): yield cur else: yield cur @@ -90,7 +90,7 @@ def mock_prices(count): Utility to generate a stream of mock prices. By default cycles through values from 0.0 to 10.0, n times. """ - return (float(i % 10) + 1.0 for i in xrange(count)) + return (float(i % 10) + 1.0 for i in range(count)) def mock_volumes(count): @@ -98,7 +98,7 @@ def mock_volumes(count): Utility to generate a set of volumes. By default cycles through values from 100 to 1000, incrementing by 50. """ - return ((i * 50) % 900 + 100 for i in xrange(count)) + return ((i * 50) % 900 + 100 for i in range(count)) class SpecificEquityTrades(object): @@ -160,8 +160,8 @@ def __init__(self, *args, **kwargs): def __iter__(self): return self - def next(self): - return self.generator.next() + def __next__(self): + return next(self.generator) def rewind(self): self.generator = self.create_fresh_generator() @@ -204,7 +204,7 @@ def create_fresh_generator(self): sids = cycle(self.sids) # Combine the iterators into a single iterator of arguments - arg_gen = izip(sids, prices, volumes, dates) + arg_gen = zip(sids, prices, volumes, dates) # Convert argument packages into events. unfiltered = (create_trade(*args, source_id=self.get_hash()) @@ -213,7 +213,7 @@ def create_fresh_generator(self): # If we specified a sid filter, filter out elements that don't # match the filter. if self.filter: - filtered = ifilter( + filtered = filter( lambda event: event.sid in self.filter, unfiltered) # Otherwise just use all events. diff --git a/alephnull/sources/test_source.py.bak b/alephnull/sources/test_source.py.bak new file mode 100644 index 0000000..b532e4a --- /dev/null +++ b/alephnull/sources/test_source.py.bak @@ -0,0 +1,224 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +A source to be used in testing. +""" + +import pytz + +from itertools import cycle, ifilter, izip +from datetime import datetime, timedelta +import numpy as np + +from alephnull.protocol import ( + Event, + DATASOURCE_TYPE +) +from alephnull.gens.utils import hash_args +from alephnull.utils.tradingcalendar import trading_days + + +def create_trade(sid, price, amount, datetime, source_id="test_factory"): + + trade = Event() + + trade.source_id = source_id + trade.type = DATASOURCE_TYPE.TRADE + trade.sid = sid + trade.dt = datetime + trade.price = price + trade.close_price = price + trade.open_price = price + trade.low = price * .95 + trade.high = price * 1.05 + trade.volume = amount + + return trade + + +def date_gen(start=datetime(2006, 6, 6, 12, tzinfo=pytz.utc), + delta=timedelta(minutes=1), + count=100, + repeats=None): + """ + Utility to generate a stream of dates. + """ + one_day = timedelta(days=1) + cur = start + if delta == one_day: + # if we are producing daily timestamps, we + # use midnight + cur = cur.replace(hour=0, minute=0, second=0, + microsecond=0) + + # yield count trade events, all on trading days, and + # during trading hours. + # NB: Being inside of trading hours is currently dependent upon the + # count parameter being less than the number of trading minutes in a day + for i in xrange(count): + if repeats: + for j in xrange(repeats): + yield cur + else: + yield cur + + cur = cur + delta + cur_midnight = cur.replace(hour=0, minute=0, second=0, microsecond=0) + # skip over any non-trading days + while cur_midnight not in trading_days: + cur = cur + one_day + cur_midnight = cur.replace(hour=0, minute=0, second=0, + microsecond=0) + cur = cur.replace(day=cur_midnight.day) + + +def mock_prices(count): + """ + Utility to generate a stream of mock prices. By default + cycles through values from 0.0 to 10.0, n times. + """ + return (float(i % 10) + 1.0 for i in xrange(count)) + + +def mock_volumes(count): + """ + Utility to generate a set of volumes. By default cycles + through values from 100 to 1000, incrementing by 50. + """ + return ((i * 50) % 900 + 100 for i in xrange(count)) + + +class SpecificEquityTrades(object): + """ + Yields all events in event_list that match the given sid_filter. + If no event_list is specified, generates an internal stream of events + to filter. Returns all events if filter is None. + + Configuration options: + + count : integer representing number of trades + sids : list of values representing simulated internal sids + start : start date + delta : timedelta between internal events + filter : filter to remove the sids + """ + + def __init__(self, *args, **kwargs): + # We shouldn't get any positional arguments. + assert len(args) == 0 + + # Default to None for event_list and filter. + self.event_list = kwargs.get('event_list') + self.filter = kwargs.get('filter') + + if self.event_list is not None: + # If event_list is provided, extract parameters from there + # This isn't really clean and ultimately I think this + # class should serve a single purpose (either take an + # event_list or autocreate events). + self.count = kwargs.get('count', len(self.event_list)) + self.sids = kwargs.get( + 'sids', + np.unique([event.sid for event in self.event_list]).tolist()) + self.start = kwargs.get('start', self.event_list[0].dt) + self.end = kwargs.get('start', self.event_list[-1].dt) + self.delta = kwargs.get( + 'delta', + self.event_list[1].dt - self.event_list[0].dt) + self.concurrent = kwargs.get('concurrent', False) + + else: + # Unpack config dictionary with default values. + self.count = kwargs.get('count', 500) + self.sids = kwargs.get('sids', [1, 2]) + self.start = kwargs.get( + 'start', + datetime(2008, 6, 6, 15, tzinfo=pytz.utc)) + self.delta = kwargs.get( + 'delta', + timedelta(minutes=1)) + self.concurrent = kwargs.get('concurrent', False) + + # Hash_value for downstream sorting. + self.arg_string = hash_args(*args, **kwargs) + + self.generator = self.create_fresh_generator() + + def __iter__(self): + return self + + def next(self): + return self.generator.next() + + def rewind(self): + self.generator = self.create_fresh_generator() + + def get_hash(self): + return self.__class__.__name__ + "-" + self.arg_string + + def update_source_id(self, gen): + for event in gen: + event.source_id = self.get_hash() + yield event + + def create_fresh_generator(self): + + if self.event_list: + event_gen = (event for event in self.event_list) + unfiltered = self.update_source_id(event_gen) + + # Set up iterators for each expected field. + else: + if self.concurrent: + # in this context the count is the number of + # trades per sid, not the total. + dates = date_gen( + count=self.count, + start=self.start, + delta=self.delta, + repeats=len(self.sids), + ) + else: + dates = date_gen( + count=self.count, + start=self.start, + delta=self.delta + ) + + prices = mock_prices(self.count) + volumes = mock_volumes(self.count) + + sids = cycle(self.sids) + + # Combine the iterators into a single iterator of arguments + arg_gen = izip(sids, prices, volumes, dates) + + # Convert argument packages into events. + unfiltered = (create_trade(*args, source_id=self.get_hash()) + for args in arg_gen) + + # If we specified a sid filter, filter out elements that don't + # match the filter. + if self.filter: + filtered = ifilter( + lambda event: event.sid in self.filter, unfiltered) + + # Otherwise just use all events. + else: + filtered = unfiltered + + # Return the filtered event stream. + return filtered diff --git a/alephnull/test_algorithms.py b/alephnull/test_algorithms.py index 2a80698..d7f210f 100644 --- a/alephnull/test_algorithms.py +++ b/alephnull/test_algorithms.py @@ -191,7 +191,7 @@ def initialize(self, sid): def handle_data(self, data): # Unless we're running on some sort of # supercomputer this will hit timeout. - for i in xrange(1000000000): + for i in range(1000000000): self.foo = i @@ -340,7 +340,7 @@ def handle_data(self, data): self.target_shares = 10 return else: - print self.portfolio + print(self.portfolio) assert self.portfolio.positions[0]['amount'] == \ self.target_shares, "Orders not filled immediately." assert self.portfolio.positions[0]['last_sale_price'] == \ @@ -543,7 +543,7 @@ def handle_data(self, data): self.return_nan.handle_data(data)) else: nan_data = deepcopy(data) - for sid in nan_data.iterkeys(): + for sid in nan_data.keys(): nan_data[sid].price = np.nan self.history_return_nan.append( self.return_nan.handle_data(nan_data)) diff --git a/alephnull/test_algorithms.py.bak b/alephnull/test_algorithms.py.bak new file mode 100644 index 0000000..2a80698 --- /dev/null +++ b/alephnull/test_algorithms.py.bak @@ -0,0 +1,633 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Algorithm Protocol +=================== + +For a class to be passed as a trading algorithm to the +:py:class:`zipline.lines.SimulatedTrading` zipline it must follow an +implementation protocol. Examples of this algorithm protocol are provided +below. + +The algorithm must expose methods: + + - initialize: method that takes no args, no returns. Simply called to + enable the algorithm to set any internal state needed. + + - get_sid_filter: method that takes no args, and returns a list of valid + sids. List must have a length between 1 and 10. If None is returned the + filter will block all events. + + - handle_data: method that accepts a :py:class:`zipline.protocol.BarData` + of the current state of the simulation universe. An example data object: + + .. This outputs the table as an HTML table but for some reason there + is no bounding box. Make the previous paragraph ending colon a + double-colon to turn this back into blockquoted table in ASCII art. + + +-----------------+--------------+----------------+-------------------+ + | | sid(133) | sid(134) | sid(135) | + +=================+==============+================+===================+ + | price | $10.10 | $22.50 | $13.37 | + +-----------------+--------------+----------------+-------------------+ + | volume | 10,000 | 5,000 | 50,000 | + +-----------------+--------------+----------------+-------------------+ + | mvg_avg_30 | $9.97 | $22.61 | $13.37 | + +-----------------+--------------+----------------+-------------------+ + | dt | 6/30/2012 | 6/30/2011 | 6/29/2012 | + +-----------------+--------------+----------------+-------------------+ + + - set_order: method that accepts a callable. Will be set as the value of the + order method of trading_client. An algorithm can then place orders with a + valid sid and a number of shares:: + + self.order(sid(133), share_count) + + - set_performance: property which can be set equal to the + cumulative_trading_performance property of the trading_client. An + algorithm can then check position information with the + Portfolio object:: + + self.Portfolio[sid(133)]['cost_basis'] + + - set_transact_setter: method that accepts a callable. Will + be set as the value of the set_transact_setter method of + the trading_client. This allows an algorithm to change the + slippage model used to predict transactions based on orders + and trade events. + +""" +from copy import deepcopy +import numpy as np + +from alephnull.algorithm import TradingAlgorithm +from alephnull.finance.slippage import FixedSlippage + + +class TestAlgorithm(TradingAlgorithm): + """ + This algorithm will send a specified number of orders, to allow unit tests + to verify the orders sent/received, transactions created, and positions + at the close of a simulation. + """ + + def initialize(self, sid, amount, order_count, sid_filter=None): + self.count = order_count + self.sid = sid + self.amount = amount + self.incr = 0 + + if sid_filter: + self.sid_filter = sid_filter + else: + self.sid_filter = [self.sid] + + def handle_data(self, data): + # place an order for 100 shares of sid + if self.incr < self.count: + self.order(self.sid, self.amount) + self.incr += 1 + + +class HeavyBuyAlgorithm(TradingAlgorithm): + """ + This algorithm will send a specified number of orders, to allow unit tests + to verify the orders sent/received, transactions created, and positions + at the close of a simulation. + """ + + def initialize(self, sid, amount): + self.sid = sid + self.amount = amount + self.incr = 0 + + def handle_data(self, data): + # place an order for 100 shares of sid + self.order(self.sid, self.amount) + self.incr += 1 + + +class NoopAlgorithm(TradingAlgorithm): + """ + Dolce fa niente. + """ + def get_sid_filter(self): + return [] + + def set_transact_setter(self, txn_sim_callable): + pass + + +class ExceptionAlgorithm(TradingAlgorithm): + """ + Throw an exception from the method name specified in the + constructor. + """ + + def initialize(self, throw_from, sid): + + self.throw_from = throw_from + self.sid = sid + + if self.throw_from == "initialize": + raise Exception("Algo exception in initialize") + else: + pass + + def set_portfolio(self, portfolio): + if self.throw_from == "set_portfolio": + raise Exception("Algo exception in set_portfolio") + else: + pass + + def handle_data(self, data): + if self.throw_from == "handle_data": + raise Exception("Algo exception in handle_data") + else: + pass + + def get_sid_filter(self): + if self.throw_from == "get_sid_filter": + raise Exception("Algo exception in get_sid_filter") + else: + return [self.sid] + + def set_transact_setter(self, txn_sim_callable): + pass + + +class DivByZeroAlgorithm(TradingAlgorithm): + + def initialize(self, sid): + self.sid = sid + self.incr = 0 + + def handle_data(self, data): + self.incr += 1 + if self.incr > 4: + 5 / 0 + pass + + +class TooMuchProcessingAlgorithm(TradingAlgorithm): + + def initialize(self, sid): + self.sid = sid + + def handle_data(self, data): + # Unless we're running on some sort of + # supercomputer this will hit timeout. + for i in xrange(1000000000): + self.foo = i + + +class TimeoutAlgorithm(TradingAlgorithm): + + def initialize(self, sid): + self.sid = sid + self.incr = 0 + + def handle_data(self, data): + if self.incr > 4: + import time + time.sleep(100) + pass + + +class RecordAlgorithm(TradingAlgorithm): + def initialize(self): + self.incr = 0 + + def handle_data(self, data): + self.incr += 1 + self.record(incr=self.incr) + + +class TestOrderAlgorithm(TradingAlgorithm): + def initialize(self): + self.incr = 0 + self.sale_price = None + + def handle_data(self, data): + if self.incr == 0: + assert 0 not in self.portfolio.positions + else: + assert self.portfolio.positions[0]['amount'] == \ + self.incr, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + self.incr += 1 + self.order(0, 1) + + +class TestOrderInstantAlgorithm(TradingAlgorithm): + def initialize(self): + self.incr = 0 + self.last_price = None + + def handle_data(self, data): + if self.incr == 0: + assert 0 not in self.portfolio.positions + else: + assert self.portfolio.positions[0]['amount'] == \ + self.incr, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + self.last_price, "Orders was not filled at last price." + self.incr += 2 + self.order_value(0, data[0].price * 2.) + self.last_price = data[0].price + + +class TestOrderValueAlgorithm(TradingAlgorithm): + def initialize(self): + self.incr = 0 + self.sale_price = None + + def handle_data(self, data): + if self.incr == 0: + assert 0 not in self.portfolio.positions + else: + assert self.portfolio.positions[0]['amount'] == \ + self.incr, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + self.incr += 2 + self.order_value(0, data[0].price * 2.) + + +class TestTargetAlgorithm(TradingAlgorithm): + def initialize(self): + self.target_shares = 0 + self.sale_price = None + + def handle_data(self, data): + if self.target_shares == 0: + assert 0 not in self.portfolio.positions + else: + assert self.portfolio.positions[0]['amount'] == \ + self.target_shares, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + self.target_shares = np.random.randint(1, 30) + self.order_target(0, self.target_shares) + + +class TestOrderPercentAlgorithm(TradingAlgorithm): + def initialize(self): + self.target_shares = 0 + self.sale_price = None + + def handle_data(self, data): + if self.target_shares == 0: + assert 0 not in self.portfolio.positions + self.order(0, 10) + self.target_shares = 10 + return + else: + assert self.portfolio.positions[0]['amount'] == \ + self.target_shares, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + + self.order_percent(0, .001) + self.target_shares += np.floor((.001 * + self.portfolio.portfolio_value) + / data[0].price) + + +class TestTargetPercentAlgorithm(TradingAlgorithm): + def initialize(self): + self.target_shares = 0 + self.sale_price = None + + def handle_data(self, data): + if self.target_shares == 0: + assert 0 not in self.portfolio.positions + self.target_shares = 1 + else: + assert np.round(self.portfolio.portfolio_value * 0.002) == \ + self.portfolio.positions[0]['amount'] * self.sale_price, \ + "Orders not filled correctly." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + self.sale_price = data[0].price + self.order_target_percent(0, .002) + + +class TestTargetValueAlgorithm(TradingAlgorithm): + def initialize(self): + self.target_shares = 0 + self.sale_price = None + + def handle_data(self, data): + if self.target_shares == 0: + assert 0 not in self.portfolio.positions + self.order(0, 10) + self.target_shares = 10 + return + else: + print self.portfolio + assert self.portfolio.positions[0]['amount'] == \ + self.target_shares, "Orders not filled immediately." + assert self.portfolio.positions[0]['last_sale_price'] == \ + data[0].price, "Orders not filled at current price." + + self.order_target_value(0, 20) + self.target_shares = np.round(20 / data[0].price) + + +from alephnull.algorithm import TradingAlgorithm +from alephnull.transforms import BatchTransform, batch_transform +from alephnull.transforms import MovingAverage + + +class TestRegisterTransformAlgorithm(TradingAlgorithm): + def initialize(self, *args, **kwargs): + self.add_transform(MovingAverage, 'mavg', ['price'], + market_aware=True, + window_length=2) + + self.set_slippage(FixedSlippage()) + + def handle_data(self, data): + pass + + +########################################## +# Algorithm using simple batch transforms + +class ReturnPriceBatchTransform(BatchTransform): + def get_value(self, data): + assert data.shape[1] == self.window_length, \ + "data shape={0} does not equal window_length={1} for data={2}".\ + format(data.shape[1], self.window_length, data) + return data.price + + +@batch_transform +def return_price_batch_decorator(data): + return data.price + + +@batch_transform +def return_args_batch_decorator(data, *args, **kwargs): + return args, kwargs + + +@batch_transform +def return_data(data, *args, **kwargs): + return data + + +@batch_transform +def uses_ufunc(data, *args, **kwargs): + # ufuncs like np.log should not crash + return np.log(data) + + +@batch_transform +def price_multiple(data, multiplier, extra_arg=1): + return data.price * multiplier * extra_arg + + +class BatchTransformAlgorithm(TradingAlgorithm): + def initialize(self, *args, **kwargs): + self.refresh_period = kwargs.pop('refresh_period', 1) + self.window_length = kwargs.pop('window_length', 3) + + self.args = args + self.kwargs = kwargs + + self.history_return_price_class = [] + self.history_return_price_decorator = [] + self.history_return_args = [] + self.history_return_arbitrary_fields = [] + self.history_return_nan = [] + self.history_return_sid_filter = [] + self.history_return_field_filter = [] + self.history_return_field_no_filter = [] + self.history_return_ticks = [] + self.history_return_not_full = [] + + self.return_price_class = ReturnPriceBatchTransform( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.return_price_decorator = return_price_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.return_args_batch = return_args_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.return_arbitrary_fields = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.return_nan = return_price_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True + ) + + self.return_sid_filter = return_price_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True, + sids=[0] + ) + + self.return_field_filter = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True, + fields=['price'] + ) + + self.return_field_no_filter = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True + ) + + self.return_not_full = return_data( + refresh_period=1, + window_length=self.window_length, + compute_only_full=False + ) + + self.uses_ufunc = uses_ufunc( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.price_multiple = price_multiple( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False + ) + + self.iter = 0 + + self.set_slippage(FixedSlippage()) + + def handle_data(self, data): + self.history_return_price_class.append( + self.return_price_class.handle_data(data)) + self.history_return_price_decorator.append( + self.return_price_decorator.handle_data(data)) + self.history_return_args.append( + self.return_args_batch.handle_data( + data, *self.args, **self.kwargs)) + self.history_return_not_full.append( + self.return_not_full.handle_data(data)) + self.uses_ufunc.handle_data(data) + + # check that calling transforms with the same arguments + # is idempotent + self.price_multiple.handle_data(data, 1, extra_arg=1) + + if self.price_multiple.full: + pre = self.price_multiple.rolling_panel.get_current().shape[0] + result1 = self.price_multiple.handle_data(data, 1, extra_arg=1) + post = self.price_multiple.rolling_panel.get_current().shape[0] + assert pre == post, "batch transform is appending redundant events" + result2 = self.price_multiple.handle_data(data, 1, extra_arg=1) + assert result1 is result2, "batch transform is not idempotent" + + # check that calling transform with the same data, but + # different supplemental arguments results in new + # results. + result3 = self.price_multiple.handle_data(data, 2, extra_arg=1) + assert result1 is not result3, \ + "batch transform is not updating for new args" + + result4 = self.price_multiple.handle_data(data, 1, extra_arg=2) + assert result1 is not result4,\ + "batch transform is not updating for new kwargs" + + new_data = deepcopy(data) + for sid in new_data: + new_data[sid]['arbitrary'] = 123 + + self.history_return_arbitrary_fields.append( + self.return_arbitrary_fields.handle_data(new_data)) + + # nan every second event price + if self.iter % 2 == 0: + self.history_return_nan.append( + self.return_nan.handle_data(data)) + else: + nan_data = deepcopy(data) + for sid in nan_data.iterkeys(): + nan_data[sid].price = np.nan + self.history_return_nan.append( + self.return_nan.handle_data(nan_data)) + + self.iter += 1 + + # Add a new sid to check that it does not get included + extra_sid_data = deepcopy(data) + extra_sid_data[1] = extra_sid_data[0] + self.history_return_sid_filter.append( + self.return_sid_filter.handle_data(extra_sid_data) + ) + + # Add a field to check that it does not get included + extra_field_data = deepcopy(data) + extra_field_data[0]['ignore'] = extra_sid_data[0]['price'] + self.history_return_field_filter.append( + self.return_field_filter.handle_data(extra_field_data) + ) + self.history_return_field_no_filter.append( + self.return_field_no_filter.handle_data(extra_field_data) + ) + + +class BatchTransformAlgorithmMinute(TradingAlgorithm): + def initialize(self, *args, **kwargs): + self.refresh_period = kwargs.pop('refresh_period', 1) + self.window_length = kwargs.pop('window_length', 3) + + self.args = args + self.kwargs = kwargs + + self.history = [] + + self.batch_transform = return_price_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=False, + bars='minute' + ) + + def handle_data(self, data): + self.history.append(self.batch_transform.handle_data(data)) + + +class SetPortfolioAlgorithm(TradingAlgorithm): + """ + An algorithm that tries to set the portfolio directly. + + The portfolio should be treated as a read-only object + within the algorithm. + """ + + def initialize(self, *args, **kwargs): + pass + + def handle_data(self, data): + self.portfolio = 3 + + +class TALIBAlgorithm(TradingAlgorithm): + """ + An algorithm that applies a TA-Lib transform. The transform object can be + passed at initialization with the 'talib' keyword argument. The results are + stored in the talib_results array. + """ + def initialize(self, *args, **kwargs): + + if 'talib' not in kwargs: + raise KeyError('No TA-LIB transform specified ' + '(use keyword \'talib\').') + elif not isinstance(kwargs['talib'], (list, tuple)): + self.talib_transforms = (kwargs['talib'],) + else: + self.talib_transforms = kwargs['talib'] + + self.talib_results = dict((t, []) for t in self.talib_transforms) + + def handle_data(self, data): + for t in self.talib_transforms: + result = t.handle_data(data) + if result is None: + if len(t.talib_fn.output_names) == 1: + result = np.nan + else: + result = (np.nan,) * len(t.talib_fn.output_names) + self.talib_results[t].append(result) diff --git a/alephnull/transforms/batch_transform.py b/alephnull/transforms/batch_transform.py index 38f3ced..8d24a41 100644 --- a/alephnull/transforms/batch_transform.py +++ b/alephnull/transforms/batch_transform.py @@ -187,7 +187,7 @@ def __init__(self, # enter the batch transform's window IFF a sid filter is not # specified. if sids is not None: - if isinstance(sids, (basestring, Integral)): + if isinstance(sids, (str, Integral)): self.static_sids = set([sids]) else: self.static_sids = set(sids) @@ -195,7 +195,7 @@ def __init__(self, self.static_sids = None self.initial_field_names = fields - if isinstance(self.initial_field_names, basestring): + if isinstance(self.initial_field_names, str): self.initial_field_names = [self.initial_field_names] self.field_names = set() @@ -230,7 +230,7 @@ def handle_data(self, data, *args, **kwargs): Point of entry. Process an event frame. """ # extract dates - dts = [event.datetime for event in data._data.itervalues()] + dts = [event.datetime for event in data._data.values()] # we have to provide the event with a dt. This is only for # checking if the event is outside the window or not so a # couple of seconds shouldn't matter. We don't add it to @@ -238,7 +238,7 @@ def handle_data(self, data, *args, **kwargs): # sid keys. event = Event() event.dt = max(dts) - event.data = {k: v.__dict__ for k, v in data._data.iteritems() + event.data = {k: v.__dict__ for k, v in data._data.items() # Need to check if data has a 'length' to filter # out sids without trade data available. # TODO: expose more of 'no trade available' @@ -419,8 +419,8 @@ def _extract_field_names(self, event): # extract field names from sids (price, volume etc), make sure # every sid has the same fields. sid_keys = [] - for sid in event.data.itervalues(): - keys = set([name for name, value in sid.items() + for sid in event.data.values(): + keys = set([name for name, value in list(sid.items()) if isinstance(value, (int, float, diff --git a/alephnull/transforms/batch_transform.py.bak b/alephnull/transforms/batch_transform.py.bak new file mode 100644 index 0000000..38f3ced --- /dev/null +++ b/alephnull/transforms/batch_transform.py.bak @@ -0,0 +1,459 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Generator versions of transforms. +""" +import functools +import logbook + +import numpy + +from numbers import Integral + +import pandas as pd + +from alephnull.utils.data import RollingPanel +from alephnull.protocol import Event + +from alephnull.finance import trading + +from . utils import check_window_length + +log = logbook.Logger('BatchTransform') +func_map = {'open_price': 'first', + 'close_price': 'last', + 'low': 'min', + 'high': 'max', + 'volume': 'sum' + } + + +def get_sample_func(item): + if item in func_map: + return func_map[item] + else: + return 'last' + + +def downsample_panel(minute_rp, daily_rp, mkt_close): + """ + @minute_rp is a rolling panel, which should have minutely rows + @daily_rp is a rolling panel, which should have daily rows + @dt is the timestamp to use when adding a frame to daily_rp + + Using the history in minute_rp, a new daily bar is created by + downsampling. The data from the daily bar is then added to the + daily rolling panel using add_frame. + """ + + cur_panel = minute_rp.get_current() + sids = minute_rp.minor_axis + day_frame = pd.DataFrame(columns=sids, index=cur_panel.items) + dt1 = trading.environment.normalize_date(mkt_close) + dt2 = trading.environment.next_trading_day(mkt_close) + by_close = functools.partial(get_date, mkt_close, dt1, dt2) + for item in minute_rp.items: + frame = cur_panel[item] + func = get_sample_func(item) + # group by trading day, using the market close of the current + # day. If events occurred after the last close (yesterday) but + # before today's close, group them into today. + dframe = frame.groupby(lambda d: by_close(d)).agg(func) + for stock in sids: + day_frame[stock][item] = dframe[stock].ix[dt1] + # store the frame at midnight instead of the close + daily_rp.add_frame(dt1, day_frame) + + +def get_date(mkt_close, d1, d2, d): + if d > mkt_close: + return d2 + else: + return d1 + + +class BatchTransform(object): + """Base class for batch transforms with a trailing window of + variable length. As opposed to pure EventWindows that get a stream + of events and are bound to a single SID, this class creates stream + of pandas DataFrames with each colum representing a sid. + + There are two ways to create a new batch window: + (i) Inherit from BatchTransform and overload get_value(data). + E.g.: + ``` + class MyBatchTransform(BatchTransform): + def get_value(self, data): + # compute difference between the means of sid 0 and sid 1 + return data[0].mean() - data[1].mean() + ``` + + (ii) Use the batch_transform decorator. + E.g.: + ``` + @batch_transform + def my_batch_transform(data): + return data[0].mean() - data[1].mean() + + ``` + + In your algorithm you would then have to instantiate + this in the initialize() method: + ``` + self.my_batch_transform = MyBatchTransform() + ``` + + To then use it, inside of the algorithm handle_data(), call the + handle_data() of the BatchTransform and pass it the current event: + ``` + result = self.my_batch_transform(data) + ``` + + """ + + def __init__(self, + func=None, + refresh_period=0, + window_length=None, + clean_nans=True, + sids=None, + fields=None, + compute_only_full=True, + bars='daily', + downsample=False): + + """Instantiate new batch_transform object. + + :Arguments: + func : python function + If supplied will be called after each refresh_period + with the data panel and all args and kwargs supplied + to the handle_data() call. + refresh_period : int + Interval to wait between advances in the window. + window_length : int + How many days the trailing window should have. + clean_nans : bool + Whether to (forward) fill in nans. + sids : list + Which sids to include in the moving window. If not + supplied sids will be extracted from incoming + events. + fields : list + Which fields to include in the moving window + (e.g. 'price'). If not supplied, fields will be + extracted from incoming events. + compute_only_full : bool + Only call the user-defined function once the window is + full. Returns None if window is not full yet. + downsample : bool + If true, downsample bars to daily bars. Otherwise, do nothing. + """ + if func is not None: + self.compute_transform_value = func + else: + self.compute_transform_value = self.get_value + + self.clean_nans = clean_nans + self.compute_only_full = compute_only_full + # no need to down sample if the bars are already daily + self.downsample = downsample and (bars == 'minute') + + # How many bars are in a day + self.bars = bars + if self.bars == 'daily': + self.bars_in_day = 1 + elif self.bars == 'minute': + self.bars_in_day = int(6.5 * 60) + else: + raise ValueError('%s bars not understood.' % self.bars) + + # The following logic is to allow pre-specified sid filters + # to operate on the data, but to also allow new symbols to + # enter the batch transform's window IFF a sid filter is not + # specified. + if sids is not None: + if isinstance(sids, (basestring, Integral)): + self.static_sids = set([sids]) + else: + self.static_sids = set(sids) + else: + self.static_sids = None + + self.initial_field_names = fields + if isinstance(self.initial_field_names, basestring): + self.initial_field_names = [self.initial_field_names] + self.field_names = set() + + self.refresh_period = refresh_period + + check_window_length(window_length) + self.window_length = window_length + + self.trading_days_total = 0 + self.window = None + + self.full = False + # Set to -inf essentially to cause update on first attempt. + self.last_dt = pd.Timestamp('1900-1-1', tz='UTC') + + self.updated = False + self.cached = None + self.last_args = None + self.last_kwargs = None + + # Data panel that provides bar information to fill in the window, + # when no bar ticks are available from the data source generator + # Used in universes that 'rollover', e.g. one that has a different + # set of stocks per quarter + self.supplemental_data = None + + self.rolling_panel = None + self.daily_rolling_panel = None + + def handle_data(self, data, *args, **kwargs): + """ + Point of entry. Process an event frame. + """ + # extract dates + dts = [event.datetime for event in data._data.itervalues()] + # we have to provide the event with a dt. This is only for + # checking if the event is outside the window or not so a + # couple of seconds shouldn't matter. We don't add it to + # the data parameter, because it would mix dt with the + # sid keys. + event = Event() + event.dt = max(dts) + event.data = {k: v.__dict__ for k, v in data._data.iteritems() + # Need to check if data has a 'length' to filter + # out sids without trade data available. + # TODO: expose more of 'no trade available' + # functionality to zipline + if len(v)} + + # only modify the trailing window if this is + # a new event. This is intended to make handle_data + # idempotent. + if self.last_dt < event.dt: + self.updated = True + self._append_to_window(event) + else: + self.updated = False + + # return newly computed or cached value + return self.get_transform_value(*args, **kwargs) + + def _init_panels(self, sids): + if self.downsample: + self.rolling_panel = RollingPanel(self.bars_in_day, + self.field_names, sids) + + self.daily_rolling_panel = RollingPanel(self.window_length, + self.field_names, sids) + else: + self.rolling_panel = RollingPanel(self.window_length * + self.bars_in_day, + self.field_names, sids) + + def _append_to_window(self, event): + self.field_names = self._get_field_names(event) + + if self.static_sids is None: + sids = set(event.data.keys()) + else: + sids = self.static_sids + + # the panel sent to the transform code will have + # columns masked with this set of sids. This is how + # we guarantee that all (and only) the sids sent to the + # algorithm's handle_data and passed to the batch + # transform. See the get_data method to see it applied. + # N.B. that the underlying panel grows monotonically + # if the set of sids changes over time. + self.latest_sids = sids + # Create rolling panel if not existant + if self.rolling_panel is None: + self._init_panels(sids) + + # Store event in rolling frame + self.rolling_panel.add_frame(event.dt, + pd.DataFrame(event.data, + index=self.field_names, + columns=sids)) + + # update trading day counters + # we may get events from non-trading sources which occurr on + # non-trading days. The book-keeping for market close and + # trading day counting should only consider trading days. + if trading.environment.is_trading_day(event.dt): + _, mkt_close = trading.environment.get_open_and_close(event.dt) + if self.bars == 'daily': + # Daily bars have their dt set to midnight. + mkt_close = trading.environment.normalize_date(mkt_close) + if event.dt == mkt_close: + if self.downsample: + downsample_panel(self.rolling_panel, + self.daily_rolling_panel, + mkt_close + ) + self.trading_days_total += 1 + self.mkt_close = mkt_close + + self.last_dt = event.dt + + if self.trading_days_total >= self.window_length: + self.full = True + + def get_transform_value(self, *args, **kwargs): + """Call user-defined batch-transform function passing all + arguments. + + Note that this will only call the transform if the datapanel + has actually been updated. Otherwise, the previously, cached + value will be returned. + """ + if self.compute_only_full and not self.full: + return None + + ################################################# + # Determine whether we should call the transform + # 0. Support historical/legacy usage of '0' signaling, + # 'update on every bar' + if self.refresh_period == 0: + period_signals_update = True + else: + # 1. Is the refresh period over? + period_signals_update = ( + self.trading_days_total % self.refresh_period == 0) + # 2. Have the args or kwargs been changed since last time? + args_updated = args != self.last_args or kwargs != self.last_kwargs + # 3. Is this a downsampled batch, and is the last event mkt close? + downsample_ready = not self.downsample or \ + self.last_dt == self.mkt_close + + recalculate_needed = downsample_ready and \ + (args_updated or (period_signals_update and self.updated)) + ################################################### + + if recalculate_needed: + self.cached = self.compute_transform_value( + self.get_data(), + *args, + **kwargs + ) + + self.last_args = args + self.last_kwargs = kwargs + return self.cached + + def get_data(self): + """Create a pandas.Panel (i.e. 3d DataFrame) from the + events in the current window. + + Returns: + The resulting panel looks like this: + index : field_name (e.g. price) + major axis/rows : dt + minor axis/colums : sid + """ + if self.downsample: + data = self.daily_rolling_panel.get_current() + else: + data = self.rolling_panel.get_current() + + if self.supplemental_data: + for item in data.items: + if item not in self.supplemental_data.items: + continue + for dt in data.major_axis: + try: + supplemental_for_dt = self.supplemental_data.ix[ + item, dt, :] + except KeyError: + # Only filling in data available in supplemental data. + supplemental_for_dt = None + + if supplemental_for_dt is not None: + data[item].ix[dt] = \ + supplemental_for_dt.combine_first( + data[item].ix[dt]) + + # screen out sids no longer in the multiverse + data = data.ix[:, :, self.latest_sids] + if self.clean_nans: + # Fills in gaps of missing data during transform + # of multiple stocks. E.g. we may be missing + # minute data because of illiquidity of one stock + data = data.fillna(method='ffill') + + # Hold on to a reference to the data, + # so that it's easier to find the current data when stepping + # through with a debugger + self._curr_data = data + + return data + + def get_value(self, *args, **kwargs): + raise NotImplementedError( + "Either overwrite get_value or provide a func argument.") + + def __call__(self, f): + self.compute_transform_value = f + return self.handle_data + + def _extract_field_names(self, event): + # extract field names from sids (price, volume etc), make sure + # every sid has the same fields. + sid_keys = [] + for sid in event.data.itervalues(): + keys = set([name for name, value in sid.items() + if isinstance(value, + (int, + float, + numpy.integer, + numpy.float, + numpy.long)) + ]) + sid_keys.append(keys) + + # with CUSTOM data events, there may be different fields + # per sid. So the allowable keys are the union of all events. + union = set.union(*sid_keys) + unwanted_fields = set(['portfolio', 'sid', 'dt', 'type', + 'datetime', 'source_id']) + return union - unwanted_fields + + def _get_field_names(self, event): + if self.initial_field_names is not None: + return self.initial_field_names + else: + self.latest_names = self._extract_field_names(event) + return set.union(self.field_names, self.latest_names) + + +def batch_transform(func): + """Decorator function to use instead of inheriting from BatchTransform. + For an example on how to use this, see the doc string of BatchTransform. + """ + + @functools.wraps(func) + def create_window(*args, **kwargs): + # passes the user defined function to BatchTransform which it + # will call instead of self.get_value() + return BatchTransform(*args, func=func, **kwargs) + + return create_window diff --git a/alephnull/transforms/mavg.py b/alephnull/transforms/mavg.py index 8fe996a..c0b8230 100644 --- a/alephnull/transforms/mavg.py +++ b/alephnull/transforms/mavg.py @@ -19,19 +19,18 @@ from alephnull.errors import WrongDataForTransform -class MovingAverage(object): +class MovingAverage(object, metaclass=TransformMeta): """ Class that maintains a dictionary from sids to MovingAverageEventWindows. For each sid, we maintain moving averages over any number of distinct fields (For example, we can maintain a sid's average volume as well as its average price.) """ - __metaclass__ = TransformMeta def __init__(self, fields='price', market_aware=True, window_length=None, delta=None): - if isinstance(fields, basestring): + if isinstance(fields, str): fields = [fields] self.fields = fields diff --git a/alephnull/transforms/mavg.py.bak b/alephnull/transforms/mavg.py.bak new file mode 100644 index 0000000..8fe996a --- /dev/null +++ b/alephnull/transforms/mavg.py.bak @@ -0,0 +1,159 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict + +from alephnull.transforms.utils import EventWindow, TransformMeta +from alephnull.errors import WrongDataForTransform + + +class MovingAverage(object): + """ + Class that maintains a dictionary from sids to + MovingAverageEventWindows. For each sid, we maintain moving + averages over any number of distinct fields (For example, we can + maintain a sid's average volume as well as its average price.) + """ + __metaclass__ = TransformMeta + + def __init__(self, fields='price', + market_aware=True, window_length=None, delta=None): + + if isinstance(fields, basestring): + fields = [fields] + self.fields = fields + + self.market_aware = market_aware + + self.delta = delta + self.window_length = window_length + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.window_length and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.window_length, \ + "Non-market-aware mode requires a timedelta." + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """ + Factory method for self.sid_windows. + """ + return MovingAverageEventWindow( + self.fields, + self.market_aware, + self.window_length, + self.delta + ) + + def update(self, event): + """ + Update the event window for this event's sid. Return a dict + from tracked fields to moving averages. + """ + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_averages() + + +class Averages(object): + """ + Container for averages. + """ + + def __getitem__(self, name): + """ + Allow dictionary lookup. + """ + return self.__dict__[name] + + +class MovingAverageEventWindow(EventWindow): + """ + Iteratively calculates moving averages for a particular sid over a + given time window. We can maintain averages for arbitrarily many + fields on a single sid. (For example, we might track average + price as well as average volume for a single sid.) The expected + functionality of this class is to be instantiated inside a + MovingAverage transform. + """ + + def __init__(self, fields, market_aware, days, delta): + + # Call the superclass constructor to set up base EventWindow + # infrastructure. + EventWindow.__init__(self, market_aware, days, delta) + + # We maintain a dictionary of totals for each of our tracked + # fields. + self.fields = fields + self.totals = defaultdict(float) + + # Subclass customization for adding new events. + def handle_add(self, event): + # Sanity check on the event. + self.assert_required_fields(event) + # Increment our running totals with data from the event. + for field in self.fields: + self.totals[field] += event[field] + + # Subclass customization for removing expired events. + def handle_remove(self, event): + # Decrement our running totals with data from the event. + for field in self.fields: + self.totals[field] -= event[field] + + def average(self, field): + """ + Calculate the average value of our ticks over a single field. + """ + # Sanity check. + assert field in self.fields + + # Averages are None by convention if we have no ticks. + if len(self.ticks) == 0: + return 0.0 + + # Calculate and return the average. len(self.ticks) is O(1). + else: + return self.totals[field] / len(self.ticks) + + def get_averages(self): + """ + Return a dict of all our tracked averages. + """ + out = Averages() + for field in self.fields: + out.__dict__[field] = self.average(field) + return out + + def assert_required_fields(self, event): + """ + We only allow events with all of our tracked fields. + """ + for field in self.fields: + if field not in event: + raise WrongDataForTransform( + transform="MovingAverageEventWindow", + fields=self.fields) diff --git a/alephnull/transforms/returns.py b/alephnull/transforms/returns.py index d9a96ea..3ec4af4 100644 --- a/alephnull/transforms/returns.py +++ b/alephnull/transforms/returns.py @@ -18,12 +18,11 @@ from collections import defaultdict, deque -class Returns(object): +class Returns(object, metaclass=TransformMeta): """ Class that maintains a dictionary from sids to the sid's closing price N trading days ago. """ - __metaclass__ = TransformMeta def __init__(self, window_length): self.window_length = window_length diff --git a/alephnull/transforms/returns.py.bak b/alephnull/transforms/returns.py.bak new file mode 100644 index 0000000..d9a96ea --- /dev/null +++ b/alephnull/transforms/returns.py.bak @@ -0,0 +1,101 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from alephnull.errors import WrongDataForTransform +from alephnull.transforms.utils import TransformMeta +from collections import defaultdict, deque + + +class Returns(object): + """ + Class that maintains a dictionary from sids to the sid's + closing price N trading days ago. + """ + __metaclass__ = TransformMeta + + def __init__(self, window_length): + self.window_length = window_length + self.mapping = defaultdict(self._create) + + def update(self, event): + """ + Update and return the calculated returns for this event's sid. + """ + tracker = self.mapping[event.sid] + tracker.update(event) + + return tracker.returns + + def _create(self): + return ReturnsFromPriorClose( + self.window_length + ) + + +class ReturnsFromPriorClose(object): + """ + Records the last N closing events for a given security as well as the + last event for the security. When we get an event for a new day, we + treat the last event seen as the close for the previous day. + """ + + def __init__(self, window_length): + self.closes = deque() + self.last_event = None + self.returns = 0.0 + self.window_length = window_length + + def update(self, event): + self.assert_required_fields(event) + if self.last_event: + + # Day has changed since the last event we saw. Treat + # the last event as the closing price for its day and + # clear out the oldest close if it has expired. + if self.last_event.dt.date() != event.dt.date(): + + self.closes.append(self.last_event) + + # We keep an event for the end of each trading day, so + # if the number of stored events is greater than the + # number of days we want to track, the oldest close + # is expired and should be discarded. + while len(self.closes) > self.window_length: + # Pop the oldest event. + self.closes.popleft() + + # We only generate a return value once we've seen enough days + # to give a sensible value. Would be nice if we could query + # db for closes prior to our initial event, but that would + # require giving this transform database creds, which we want + # to avoid. + + if len(self.closes) == self.window_length: + last_close = self.closes[0].price + change = event.price - last_close + self.returns = change / last_close + + # the current event is now the last_event + self.last_event = event + + def assert_required_fields(self, event): + """ + We only allow events with a price field to be run through + the returns transform. + """ + if 'price' not in event: + raise WrongDataForTransform( + transform="ReturnsEventWindow", + fields='price') diff --git a/alephnull/transforms/stddev.py b/alephnull/transforms/stddev.py index 6826d2b..34a1c7b 100644 --- a/alephnull/transforms/stddev.py +++ b/alephnull/transforms/stddev.py @@ -21,14 +21,13 @@ import alephnull.utils.math_utils as zp_math -class MovingStandardDev(object): +class MovingStandardDev(object, metaclass=TransformMeta): """ Class that maintains a dictionary from sids to MovingStandardDevWindows. For each sid, we maintain a the standard deviation of all events falling within the specified window. """ - __metaclass__ = TransformMeta def __init__(self, market_aware=True, window_length=None, delta=None): diff --git a/alephnull/transforms/stddev.py.bak b/alephnull/transforms/stddev.py.bak new file mode 100644 index 0000000..6826d2b --- /dev/null +++ b/alephnull/transforms/stddev.py.bak @@ -0,0 +1,123 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from math import sqrt + +from alephnull.errors import WrongDataForTransform +from alephnull.transforms.utils import EventWindow, TransformMeta +import alephnull.utils.math_utils as zp_math + + +class MovingStandardDev(object): + """ + Class that maintains a dictionary from sids to + MovingStandardDevWindows. For each sid, we maintain a the + standard deviation of all events falling within the specified + window. + """ + __metaclass__ = TransformMeta + + def __init__(self, market_aware=True, window_length=None, delta=None): + + self.market_aware = market_aware + + self.delta = delta + self.window_length = window_length + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.window_length and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.window_length, \ + "Non-market-aware mode requires a timedelta." + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """ + Factory method for self.sid_windows. + """ + return MovingStandardDevWindow( + self.market_aware, + self.window_length, + self.delta + ) + + def update(self, event): + """ + Update the event window for this event's sid. Return a dict + from tracked fields to moving averages. + """ + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_stddev() + + def assert_required_fields(self, event): + """ + We only allow events with a price field to be run through + the returns transform. + """ + if 'price' not in event: + raise WrongDataForTransform( + transform="StdDevEventWindow", + fields='price') + + +class MovingStandardDevWindow(EventWindow): + """ + Iteratively calculates standard deviation for a particular sid + over a given time window. The expected functionality of this + class is to be instantiated inside a MovingStandardDev. + """ + + def __init__(self, market_aware=True, window_length=None, delta=None): + # Call the superclass constructor to set up base EventWindow + # infrastructure. + EventWindow.__init__(self, market_aware, window_length, delta) + + self.sum = 0.0 + self.sum_sqr = 0.0 + + def handle_add(self, event): + self.sum += event.price + self.sum_sqr += event.price ** 2 + + def handle_remove(self, event): + self.sum -= event.price + self.sum_sqr -= event.price ** 2 + + def get_stddev(self): + # Sample standard deviation is undefined for a single event or + # no events. + if len(self) <= 1: + return None + + else: + average = self.sum / len(self) + s_squared = (self.sum_sqr - self.sum * average) \ + / (len(self) - 1) + + if zp_math.tolerant_equals(0, s_squared): + return 0.0 + stddev = sqrt(s_squared) + return stddev diff --git a/alephnull/transforms/ta.py b/alephnull/transforms/ta.py index 3b949c5..cc7efd3 100644 --- a/alephnull/transforms/ta.py +++ b/alephnull/transforms/ta.py @@ -45,7 +45,7 @@ def zipline_wrapper(talib_fn, key_map, data): for sid in data.minor_axis: # build talib_data from zipline data talib_data = dict() - for talib_key, zipline_key in key_map.iteritems(): + for talib_key, zipline_key in key_map.items(): # if zipline_key is found, add it to talib_data if zipline_key in data: values = data[zipline_key][sid].values @@ -90,7 +90,7 @@ def make_transform(talib_fn, name): talib_docs = getattr(talib, talib_fn.info['name']).__doc__ divider1 = '\n#---- Default mapping (TA-Lib : Zipline)\n\n' mappings = '\n'.join(' {0} : {1}'.format(k, v) - for k, v in talib_fn.input_names.items()) + for k, v in list(talib_fn.input_names.items())) divider2 = '\n\n#---- Zipline docs\n' help_str = (header + talib_docs + divider1 + mappings + divider2) @@ -168,7 +168,7 @@ def __init__(self, self.talib_fn = copy.deepcopy(talib_fn) # set the parameters - for param in self.talib_fn.get_parameters().keys(): + for param in list(self.talib_fn.get_parameters().keys()): if param in kwargs: self.talib_fn.set_parameters({param: kwargs[param]}) diff --git a/alephnull/transforms/ta.py.bak b/alephnull/transforms/ta.py.bak new file mode 100644 index 0000000..3b949c5 --- /dev/null +++ b/alephnull/transforms/ta.py.bak @@ -0,0 +1,209 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import math + +import numpy as np +import pandas as pd +import talib +import copy +from alephnull.transforms import BatchTransform + + +def zipline_wrapper(talib_fn, key_map, data): + # get required TA-Lib input names + if 'price' in talib_fn.input_names: + req_inputs = [talib_fn.input_names['price']] + elif 'prices' in talib_fn.input_names: + req_inputs = talib_fn.input_names['prices'] + else: + req_inputs = [] + + # If there are multiple output names then the results are named, + # if there is only one output name, it usually 'real' is best represented + # by a float. + # Use a DataFrame to map sid to named values, and a Series map sid + # to floats. + if len(talib_fn.output_names) > 1: + all_results = pd.DataFrame(index=talib_fn.output_names, + columns=data.minor_axis) + else: + all_results = pd.Series(index=data.minor_axis) + + for sid in data.minor_axis: + # build talib_data from zipline data + talib_data = dict() + for talib_key, zipline_key in key_map.iteritems(): + # if zipline_key is found, add it to talib_data + if zipline_key in data: + values = data[zipline_key][sid].values + # Do not include sids that have only nans, passing only nans + # is incompatible with many of the underlying TALib functions. + if pd.isnull(values).all(): + break + else: + talib_data[talib_key] = data[zipline_key][sid].values + # if zipline_key is not found and not required, add zeros + elif talib_key not in req_inputs: + talib_data[talib_key] = np.zeros(data.shape[1]) + # if zipline key is not found and required, raise error + else: + raise KeyError( + 'Tried to set required TA-Lib data with key ' + '\'{0}\' but no Zipline data is available under ' + 'expected key \'{1}\'.'.format( + talib_key, zipline_key)) + + # call talib + if talib_data: + talib_result = talib_fn(talib_data) + + # keep only the most recent result + if isinstance(talib_result, (list, tuple)): + sid_result = tuple([r[-1] for r in talib_result]) + else: + sid_result = talib_result[-1] + + all_results[sid] = sid_result + + return all_results + + +def make_transform(talib_fn, name): + """ + A factory for BatchTransforms based on TALIB abstract functions. + """ + # make class docstring + header = '\n#---- TA-Lib docs\n\n' + talib_docs = getattr(talib, talib_fn.info['name']).__doc__ + divider1 = '\n#---- Default mapping (TA-Lib : Zipline)\n\n' + mappings = '\n'.join(' {0} : {1}'.format(k, v) + for k, v in talib_fn.input_names.items()) + divider2 = '\n\n#---- Zipline docs\n' + help_str = (header + talib_docs + divider1 + mappings + + divider2) + + class TALibTransform(BatchTransform): + __doc__ = help_str + """ + TA-Lib keyword arguments must be passed at initialization. For + example, to construct a moving average with timeperiod of 5, pass + "timeperiod=5" during initialization. + + All abstract TA-Lib functions accept a data dictionary containing + 'open', 'high', 'low', 'close', and 'volume' keys, even if they do + not require those keys to run. For example, talib.MA (moving + average) is always computed using the data under the 'close' + key. By default, Zipline constructs this data dictionary with the + appropriate sid data, but users may overwrite this by passing + mappings as keyword arguments. For example, to compute the moving + average of the sid's high, provide "close = 'high'" and Zipline's + 'high' data will be used as TA-Lib's 'close' data. Similarly, if a + user had a data column named 'Oil', they could compute its moving + average by passing "close='Oil'". + + + **Example** + + A moving average of a data column called 'Oil' with timeperiod 5, + talib.transforms.ta.MA(close='Oil', timeperiod=5) + + The user could find the default arguments and mappings by calling: + help(zipline.transforms.ta.MA) + + + **Arguments** + + open : string, default 'open' + high : string, default 'high' + low : string, default 'low' + close : string, default 'price' + volume : string, default 'volume' + + refresh_period : int, default 0 + The refresh_period of the BatchTransform determines the number + of iterations that pass before the BatchTransform updates its + internal data. + + \*\*kwargs : any arguments to be passed to the TA-Lib function. + """ + + def __init__(self, + close='price', + open='open', + high='high', + low='low', + volume='volume', + refresh_period=0, + bars='daily', + **kwargs): + + key_map = {'high': high, + 'low': low, + 'open': open, + 'volume': volume, + 'close': close} + + self.call_kwargs = kwargs + + # Make deepcopy of talib abstract function. + # This is necessary because talib abstract functions remember + # state, including parameters, and we need to set the parameters + # in order to compute the lookback period that will determine the + # BatchTransform window_length. TALIB has no way to restore default + # parameters, so the deepcopy lets us change this function's + # parameters without affecting other TALibTransforms of the same + # function. + self.talib_fn = copy.deepcopy(talib_fn) + + # set the parameters + for param in self.talib_fn.get_parameters().keys(): + if param in kwargs: + self.talib_fn.set_parameters({param: kwargs[param]}) + + # get the lookback + self.lookback = self.talib_fn.lookback + + self.bars = bars + if bars == 'daily': + lookback = self.lookback + 1 + elif bars == 'minute': + lookback = int(math.ceil(self.lookback / (6.5 * 60))) + + # Ensure that window_length is at least 1 day's worth of data. + window_length = max(lookback, 1) + + transform_func = functools.partial( + zipline_wrapper, self.talib_fn, key_map) + + super(TALibTransform, self).__init__( + func=transform_func, + refresh_period=refresh_period, + window_length=window_length, + compute_only_full=False, + bars=bars) + + def __repr__(self): + return 'Zipline BatchTransform: {0}'.format( + self.talib_fn.info['name']) + + TALibTransform.__name__ = name + # return class + return TALibTransform + + +# add all TA-Lib functions to locals +for name in talib.abstract.__FUNCTION_NAMES: + fn = getattr(talib.abstract, name) + locals()[name] = make_transform(fn, name) diff --git a/alephnull/transforms/utils.py b/alephnull/transforms/utils.py index 2e5bfb2..a36736e 100644 --- a/alephnull/transforms/utils.py +++ b/alephnull/transforms/utils.py @@ -92,7 +92,7 @@ class StatefulTransform(object): Otherwise only dt, tnfm_id, and tnfm_value are forwarded. """ def __init__(self, tnfm_class, *args, **kwargs): - assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ + assert isinstance(tnfm_class, (object, type)), \ "Stateful transform requires a class." assert hasattr(tnfm_class, 'update'), \ "Stateful transform requires the class to have an update method" @@ -150,7 +150,7 @@ def _gen(self, stream_in): yield out_message -class EventWindow(object): +class EventWindow(object, metaclass=ABCMeta): """ Abstract base class for transform classes that calculate iterative metrics on events within a given timedelta. Maintains a list of @@ -168,8 +168,6 @@ class EventWindow(object): implementations of moving average and volume-weighted average price. """ - # Mark this as an abstract base class. - __metaclass__ = ABCMeta def __init__(self, market_aware=True, window_length=None, delta=None): diff --git a/alephnull/transforms/utils.py.bak b/alephnull/transforms/utils.py.bak new file mode 100644 index 0000000..2e5bfb2 --- /dev/null +++ b/alephnull/transforms/utils.py.bak @@ -0,0 +1,259 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Generator versions of transforms. +""" +import types +import logbook + + +from numbers import Integral + +from datetime import datetime +from collections import deque +from abc import ABCMeta, abstractmethod + +from alephnull.protocol import DATASOURCE_TYPE +from alephnull.gens.utils import assert_sort_unframe_protocol, hash_args +from alephnull.finance import trading + +log = logbook.Logger('Transform') + + +class UnsupportedEventWindowFlagValue(Exception): + """ + Error state when an EventWindow option is attempted to be set + to a value that is no longer supported by the library. + + This is to help enforce deprecation of the market_aware and delta flags, + without completely removing it and breaking existing algorithms. + """ + pass + + +class InvalidWindowLength(Exception): + """ + Error raised when the window length is unusable. + """ + pass + + +def check_window_length(window_length): + """ + Ensure the window length provided to a transform is valid. + """ + if window_length is None: + raise InvalidWindowLength("window_length must be provided") + if not isinstance(window_length, Integral): + raise InvalidWindowLength( + "window_length must be an integer-like number") + if window_length == 0: + raise InvalidWindowLength("window_length must be non-zero") + if window_length < 0: + raise InvalidWindowLength("window_length must be positive") + + +class TransformMeta(type): + """ + Metaclass that automatically packages a class inside of + StatefulTransform on initialization. Specifically, if Foo is a + class with its __metaclass__ attribute set to TransformMeta, then + calling Foo(*args, **kwargs) will return StatefulTransform(Foo, + *args, **kwargs) instead of an instance of Foo. (Note that you can + still recover an instance of a "raw" Foo by introspecting the + resulting StatefulTransform's 'state' field.) + """ + + def __call__(cls, *args, **kwargs): + return StatefulTransform(cls, *args, **kwargs) + + +class StatefulTransform(object): + """ + Generic transform generator that takes each message from an + in-stream and passes it to a state object. For each call to + update, the state class must produce a message to be fed + downstream. Any transform class with the FORWARDER class variable + set to true will forward all fields in the original message. + Otherwise only dt, tnfm_id, and tnfm_value are forwarded. + """ + def __init__(self, tnfm_class, *args, **kwargs): + assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ + "Stateful transform requires a class." + assert hasattr(tnfm_class, 'update'), \ + "Stateful transform requires the class to have an update method" + + # Create an instance of our transform class. + if isinstance(tnfm_class, TransformMeta): + # Classes derived TransformMeta have their __call__ + # attribute overridden. Since this is what is usually + # used to create an instance, we have to delegate the + # responsibility of creating an instance to + # TransformMeta's parent class, which is 'type'. This is + # what is implicitly done behind the scenes by the python + # interpreter for most classes anyway, but here we have to + # be explicit because we've overridden the method that + # usually resolves to our super call. + self.state = super(TransformMeta, tnfm_class).__call__( + *args, **kwargs) + # Normal object instantiation. + else: + self.state = tnfm_class(*args, **kwargs) + # save the window_length of the state for external access. + self.window_length = self.state.window_length + # Create the string associated with this generator's output. + self.namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) + + def get_hash(self): + return self.namestring + + def transform(self, stream_in): + return self._gen(stream_in) + + def _gen(self, stream_in): + # IMPORTANT: Messages may contain pointers that are shared with + # other streams. Transforms that modify their input + # messages should only manipulate copies. + for message in stream_in: + # we only handle TRADE events. + if (hasattr(message, 'type') + and message.type not in ( + DATASOURCE_TYPE.TRADE, + DATASOURCE_TYPE.CUSTOM)): + yield message + continue + # allow upstream generators to yield None to avoid + # blocking. + if message is None: + continue + + assert_sort_unframe_protocol(message) + + tnfm_value = self.state.update(message) + + out_message = message + out_message[self.namestring] = tnfm_value + yield out_message + + +class EventWindow(object): + """ + Abstract base class for transform classes that calculate iterative + metrics on events within a given timedelta. Maintains a list of + events that are within a certain timedelta of the most recent + tick. Calls self.handle_add(event) for each event added to the + window. Calls self.handle_remove(event) for each event removed + from the window. Subclass these methods along with init(*args, + **kwargs) to calculate metrics over the window. + + If the market_aware flag is True, the EventWindow drops old events + based on the number of elapsed trading days between newest and oldest. + Otherwise old events are dropped based on a raw timedelta. + + See zipline/transforms/mavg.py and zipline/transforms/vwap.py for example + implementations of moving average and volume-weighted average + price. + """ + # Mark this as an abstract base class. + __metaclass__ = ABCMeta + + def __init__(self, market_aware=True, window_length=None, delta=None): + + check_window_length(window_length) + self.window_length = window_length + + self.ticks = deque() + + # Only Market-aware mode is now supported. + if not market_aware: + raise UnsupportedEventWindowFlagValue( + "Non-'market aware' mode is no longer supported." + ) + if delta: + raise UnsupportedEventWindowFlagValue( + "delta values are no longer supported." + ) + # Set the behavior for dropping events from the back of the + # event window. + self.drop_condition = self.out_of_market_window + + @abstractmethod + def handle_add(self, event): + raise NotImplementedError() + + @abstractmethod + def handle_remove(self, event): + raise NotImplementedError() + + def __len__(self): + return len(self.ticks) + + def update(self, event): + + if (hasattr(event, 'type') + and event.type not in ( + DATASOURCE_TYPE.TRADE, + DATASOURCE_TYPE.CUSTOM)): + return + + self.assert_well_formed(event) + # Add new event and increment totals. + self.ticks.append(event) + + # Subclasses should override handle_add to define behavior for + # adding new ticks. + self.handle_add(event) + # Clear out any expired events. + # + # oldest newest + # | | + # V V + while self.drop_condition(self.ticks[0].dt, self.ticks[-1].dt): + + # popleft removes and returns the oldest tick in self.ticks + popped = self.ticks.popleft() + + # Subclasses should override handle_remove to define + # behavior for removing ticks. + self.handle_remove(popped) + + def out_of_market_window(self, oldest, newest): + oldest_index = \ + trading.environment.trading_days.searchsorted(oldest) + newest_index = \ + trading.environment.trading_days.searchsorted(newest) + + trading_days_between = newest_index - oldest_index + + # "Put back" a day if oldest is earlier in its day than newest, + # reflecting the fact that we haven't yet completed the last + # day in the window. + if oldest.time() > newest.time(): + trading_days_between -= 1 + + return trading_days_between >= self.window_length + + # All event windows expect to receive events with datetime fields + # that arrive in sorted order. + def assert_well_formed(self, event): + assert isinstance(event.dt, datetime), \ + "Bad dt in EventWindow:%s" % event + if len(self.ticks) > 0: + # Something is wrong if new event is older than previous. + assert event.dt >= self.ticks[-1].dt, \ + "Events arrived out of order in EventWindow: %s -> %s" % \ + (event, self.ticks[0]) diff --git a/alephnull/transforms/vwap.py b/alephnull/transforms/vwap.py index 063d8c5..1dc83f8 100644 --- a/alephnull/transforms/vwap.py +++ b/alephnull/transforms/vwap.py @@ -19,11 +19,10 @@ from alephnull.transforms.utils import EventWindow, TransformMeta -class MovingVWAP(object): +class MovingVWAP(object, metaclass=TransformMeta): """ Class that maintains a dictionary from sids to VWAPEventWindows. """ - __metaclass__ = TransformMeta def __init__(self, market_aware=True, delta=None, window_length=None): diff --git a/alephnull/transforms/vwap.py.bak b/alephnull/transforms/vwap.py.bak new file mode 100644 index 0000000..063d8c5 --- /dev/null +++ b/alephnull/transforms/vwap.py.bak @@ -0,0 +1,105 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict + +from alephnull.errors import WrongDataForTransform +from alephnull.transforms.utils import EventWindow, TransformMeta + + +class MovingVWAP(object): + """ + Class that maintains a dictionary from sids to VWAPEventWindows. + """ + __metaclass__ = TransformMeta + + def __init__(self, market_aware=True, delta=None, window_length=None): + + self.market_aware = market_aware + self.delta = delta + self.window_length = window_length + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.window_length and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.window_length, \ + "Non-market-aware mode requires a timedelta." + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """Factory method for self.sid_windows.""" + return VWAPEventWindow( + self.market_aware, + window_length=self.window_length, + delta=self.delta + ) + + def update(self, event): + """ + Update the event window for this event's sid. Returns the + current vwap for the sid. + """ + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_vwap() + + +class VWAPEventWindow(EventWindow): + """ + Iteratively maintains a vwap for a single sid over a given + timedelta. + """ + def __init__(self, market_aware=True, window_length=None, delta=None): + EventWindow.__init__(self, market_aware, window_length, delta) + self.flux = 0.0 + self.totalvolume = 0.0 + + # Subclass customization for adding new events. + def handle_add(self, event): + # Sanity check on the event. + self.assert_required_fields(event) + self.flux += event.volume * event.price + self.totalvolume += event.volume + + # Subclass customization for removing expired events. + def handle_remove(self, event): + self.flux -= event.volume * event.price + self.totalvolume -= event.volume + + def get_vwap(self): + """ + Return the calculated vwap for this sid. + """ + # By convention, vwap is None if we have no events. + if len(self.ticks) == 0: + return None + else: + return (self.flux / self.totalvolume) + + # We need numerical price and volume to calculate a vwap. + def assert_required_fields(self, event): + if 'price' not in event or 'volume' not in event: + raise WrongDataForTransform( + transform="VWAPEventWindow", + fields=self.fields) diff --git a/alephnull/utils/data.py b/alephnull/utils/data.py index 67a3c4a..7c54cbd 100644 --- a/alephnull/utils/data.py +++ b/alephnull/utils/data.py @@ -50,7 +50,7 @@ def __init__(self, window, items, sids, cap_multiple=2, def _create_buffer(self): return pd.Panel(items=self.items, minor_axis=self.minor_axis, - major_axis=range(self.cap), + major_axis=list(range(self.cap)), dtype=self.dtype) def _update_buffer(self, frame): diff --git a/alephnull/utils/data.py.bak b/alephnull/utils/data.py.bak new file mode 100644 index 0000000..67a3c4a --- /dev/null +++ b/alephnull/utils/data.py.bak @@ -0,0 +1,114 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pandas as pd +from copy import deepcopy + + +def _ensure_index(x): + if not isinstance(x, pd.Index): + x = pd.Index(x) + + return x + + +class RollingPanel(object): + """ + Preallocation strategies for rolling window over expanding data set + + Restrictions: major_axis can only be a DatetimeIndex for now + """ + + def __init__(self, window, items, sids, cap_multiple=2, + dtype=np.float64): + self.pos = 0 + self.window = window + + self.items = _ensure_index(items) + self.minor_axis = _ensure_index(sids) + + self.cap_multiple = cap_multiple + self.cap = cap_multiple * window + + self.dtype = dtype + self.index_buf = np.empty(self.cap, dtype='M8[ns]') + + self.buffer = self._create_buffer() + + def _create_buffer(self): + return pd.Panel(items=self.items, minor_axis=self.minor_axis, + major_axis=range(self.cap), + dtype=self.dtype) + + def _update_buffer(self, frame): + # Drop outdated, nan-filled minors (sids) and items (fields) + non_nan_cols = set(self.buffer.dropna(axis=1).minor_axis) + new_cols = set(frame.columns) + self.minor_axis = _ensure_index(new_cols.union(non_nan_cols)) + + non_nan_items = set(self.buffer.dropna(axis=1).items) + new_items = set(frame.index) + self.items = _ensure_index(new_items.union(non_nan_items)) + + new_buffer = self._create_buffer() + # Copy old values we want to keep + # .update() is pretty slow. Ideally we would be using + # new_buffer.loc[non_nan_items, :, non_nan_cols] = + # but this triggers a bug in Pandas 0.11. Update + # this when 0.12 is released. + # https://github.com/pydata/pandas/issues/3777 + new_buffer.update( + self.buffer.loc[non_nan_items, :, non_nan_cols]) + + self.buffer = new_buffer + + def add_frame(self, tick, frame): + """ + """ + if self.pos == self.cap: + self._roll_data() + + if set(frame.columns).difference(set(self.minor_axis)) or \ + set(frame.index).difference(set(self.items)): + self._update_buffer(frame) + + self.buffer.loc[:, self.pos, :] = frame.ix[self.items].T + + self.index_buf[self.pos] = tick + + self.pos += 1 + + def get_current(self): + """ + Get a Panel that is the current data in view. It is not safe to persist + these objects because internal data might change + """ + where = slice(max(self.pos - self.window, 0), self.pos) + major_axis = pd.DatetimeIndex(deepcopy(self.index_buf[where]), + tz='utc') + + return pd.Panel(self.buffer.values[:, where, :], self.items, + major_axis, self.minor_axis) + + def _roll_data(self): + """ + Roll window worth of data up to position zero. + Save the effort of having to expensively roll at each iteration + """ + self.buffer.values[:, :self.window, :] = \ + self.buffer.values[:, -self.window:] + self.index_buf[:self.window] = self.index_buf[-self.window:] + self.pos = self.window diff --git a/alephnull/utils/protocol_utils.py b/alephnull/utils/protocol_utils.py index f453117..2babfc4 100644 --- a/alephnull/utils/protocol_utils.py +++ b/alephnull/utils/protocol_utils.py @@ -24,5 +24,5 @@ def Enum(*options): """ class cstruct(Structure): _fields_ = [(o, c_ubyte) for o in options] - __iter__ = lambda s: iter(range(len(options))) - return cstruct(*range(len(options))) + __iter__ = lambda s: iter(list(range(len(options)))) + return cstruct(*list(range(len(options)))) diff --git a/alephnull/utils/protocol_utils.py.bak b/alephnull/utils/protocol_utils.py.bak new file mode 100644 index 0000000..f453117 --- /dev/null +++ b/alephnull/utils/protocol_utils.py.bak @@ -0,0 +1,28 @@ +# +# Copyright 2012 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ctypes import Structure, c_ubyte + + +def Enum(*options): + """ + Fast enums are very important when we want really tight + loops. These are probably going to evolve into pure C structs + anyways so might as well get going on that. + """ + class cstruct(Structure): + _fields_ = [(o, c_ubyte) for o in options] + __iter__ = lambda s: iter(range(len(options))) + return cstruct(*range(len(options))) diff --git a/alephnull/utils/test_utils.py b/alephnull/utils/test_utils.py index a543c80..22ed2ea 100644 --- a/alephnull/utils/test_utils.py +++ b/alephnull/utils/test_utils.py @@ -57,7 +57,7 @@ def assert_single_position(test, zipline): for order in update['daily_perf']['orders']: orders_by_id[order['id']] = order - for order in orders_by_id.itervalues(): + for order in orders_by_id.values(): test.assertEqual( order['status'], ORDER_STATUS.FILLED, @@ -90,7 +90,7 @@ def get_hash(self): def __iter__(self): return self - def next(self): + def __next__(self): 5 / 0 diff --git a/alephnull/utils/test_utils.py.bak b/alephnull/utils/test_utils.py.bak new file mode 100644 index 0000000..a543c80 --- /dev/null +++ b/alephnull/utils/test_utils.py.bak @@ -0,0 +1,107 @@ +from logbook import FileHandler +from alephnull.finance.blotter import ORDER_STATUS + + +def setup_logger(test, path='test.log'): + test.log_handler = FileHandler(path) + test.log_handler.push_application() + + +def teardown_logger(test): + test.log_handler.pop_application() + test.log_handler.close() + + +def drain_zipline(test, zipline): + output = [] + transaction_count = 0 + msg_counter = 0 + # start the simulation + for update in zipline: + msg_counter += 1 + output.append(update) + if 'daily_perf' in update: + transaction_count += \ + len(update['daily_perf']['transactions']) + + return output, transaction_count + + +def assert_single_position(test, zipline): + + output, transaction_count = drain_zipline(test, zipline) + + if 'expected_transactions' in test.zipline_test_config: + test.assertEqual( + test.zipline_test_config['expected_transactions'], + transaction_count + ) + else: + test.assertEqual( + test.zipline_test_config['order_count'], + transaction_count + ) + + # the final message is the risk report, the second to + # last is the final day's results. Positions is a list of + # dicts. + closing_positions = output[-2]['daily_perf']['positions'] + + # confirm that all orders were filled. + # iterate over the output updates, overwriting + # orders when they are updated. Then check the status on all. + orders_by_id = {} + for update in output: + if 'daily_perf' in update: + if 'orders' in update['daily_perf']: + for order in update['daily_perf']['orders']: + orders_by_id[order['id']] = order + + for order in orders_by_id.itervalues(): + test.assertEqual( + order['status'], + ORDER_STATUS.FILLED, + "") + + test.assertEqual( + len(closing_positions), + 1, + "Portfolio should have one position." + ) + + sid = test.zipline_test_config['sid'] + test.assertEqual( + closing_positions[0]['sid'], + sid, + "Portfolio should have one position in " + str(sid) + ) + + return output, transaction_count + + +class ExceptionSource(object): + + def __init__(self): + pass + + def get_hash(self): + return "ExceptionSource" + + def __iter__(self): + return self + + def next(self): + 5 / 0 + + +class ExceptionTransform(object): + + def __init__(self): + self.window_length = 1 + pass + + def get_hash(self): + return "ExceptionTransform" + + def update(self, event): + assert False, "An assertion message" diff --git a/alephnull/utils/tradingcalendar.py b/alephnull/utils/tradingcalendar.py index 552566a..f9d775c 100644 --- a/alephnull/utils/tradingcalendar.py +++ b/alephnull/utils/tradingcalendar.py @@ -283,7 +283,7 @@ def get_early_closes(start, end): # 4th Friday isn't correct if month starts on Friday, so restrict to # day range: byweekday=(rrule.FR), - bymonthday=range(23, 30), + bymonthday=list(range(23, 30)), cache=True, dtstart=start, until=end @@ -397,7 +397,7 @@ def get_open_and_closes(trading_days, early_closes): get_o_and_c = partial(get_open_and_close, early_closes=early_closes) open_and_closes['market_open'], open_and_closes['market_close'] = \ - zip(*open_and_closes.index.map(get_o_and_c)) + list(zip(*open_and_closes.index.map(get_o_and_c))) return open_and_closes diff --git a/alephnull/utils/tradingcalendar.py.bak b/alephnull/utils/tradingcalendar.py.bak new file mode 100644 index 0000000..552566a --- /dev/null +++ b/alephnull/utils/tradingcalendar.py.bak @@ -0,0 +1,404 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd +import pytz + +from datetime import datetime, timedelta +from dateutil import rrule +from functools import partial + +start = pd.Timestamp('1990-01-01', tz='UTC') +end_base = pd.Timestamp('today', tz='UTC') +# Give an aggressive buffer for logic that needs to use the next trading +# day or minute. +end = end_base + timedelta(days=365) + + +def canonicalize_datetime(dt): + # Strip out any HHMMSS or timezone info in the user's datetime, so that + # all the datetimes we return will be 00:00:00 UTC. + return datetime(dt.year, dt.month, dt.day, tzinfo=pytz.utc) + + +def get_non_trading_days(start, end): + non_trading_rules = [] + + start = canonicalize_datetime(start) + end = canonicalize_datetime(end) + + weekends = rrule.rrule( + rrule.YEARLY, + byweekday=(rrule.SA, rrule.SU), + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(weekends) + + new_years = rrule.rrule( + rrule.MONTHLY, + byyearday=1, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(new_years) + + new_years_sunday = rrule.rrule( + rrule.MONTHLY, + byyearday=2, + byweekday=rrule.MO, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(new_years_sunday) + + mlk_day = rrule.rrule( + rrule.MONTHLY, + bymonth=1, + byweekday=(rrule.MO(+3)), + cache=True, + dtstart=datetime(1998, 1, 1, tzinfo=pytz.utc), + until=end + ) + non_trading_rules.append(mlk_day) + + presidents_day = rrule.rrule( + rrule.MONTHLY, + bymonth=2, + byweekday=(rrule.MO(3)), + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(presidents_day) + + good_friday = rrule.rrule( + rrule.DAILY, + byeaster=-2, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(good_friday) + + memorial_day = rrule.rrule( + rrule.MONTHLY, + bymonth=5, + byweekday=(rrule.MO(-1)), + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(memorial_day) + + july_4th = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=4, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(july_4th) + + july_4th_sunday = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=5, + byweekday=rrule.MO, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(july_4th_sunday) + + july_4th_saturday = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=3, + byweekday=rrule.FR, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(july_4th_saturday) + + labor_day = rrule.rrule( + rrule.MONTHLY, + bymonth=9, + byweekday=(rrule.MO(1)), + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(labor_day) + + thanksgiving = rrule.rrule( + rrule.MONTHLY, + bymonth=11, + byweekday=(rrule.TH(4)), + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(thanksgiving) + + christmas = rrule.rrule( + rrule.MONTHLY, + bymonth=12, + bymonthday=25, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(christmas) + + christmas_sunday = rrule.rrule( + rrule.MONTHLY, + bymonth=12, + bymonthday=26, + byweekday=rrule.MO, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(christmas_sunday) + + # If Christmas is a Saturday then 24th, a Friday is observed. + christmas_saturday = rrule.rrule( + rrule.MONTHLY, + bymonth=12, + bymonthday=24, + byweekday=rrule.FR, + cache=True, + dtstart=start, + until=end + ) + non_trading_rules.append(christmas_saturday) + + non_trading_ruleset = rrule.rruleset() + + for rule in non_trading_rules: + non_trading_ruleset.rrule(rule) + + non_trading_days = non_trading_ruleset.between(start, end, inc=True) + + # Add September 11th closings + # http://en.wikipedia.org/wiki/Aftermath_of_the_September_11_attacks + # Due to the terrorist attacks, the stock market did not open on 9/11/2001 + # It did not open again until 9/17/2001. + # + # September 2001 + # Su Mo Tu We Th Fr Sa + # 1 + # 2 3 4 5 6 7 8 + # 9 10 11 12 13 14 15 + # 16 17 18 19 20 21 22 + # 23 24 25 26 27 28 29 + # 30 + + for day_num in range(11, 17): + non_trading_days.append( + datetime(2001, 9, day_num, tzinfo=pytz.utc)) + + # Add closings due to Hurricane Sandy in 2012 + # http://en.wikipedia.org/wiki/Hurricane_sandy + # + # The stock exchange was closed due to Hurricane Sandy's + # impact on New York. + # It closed on 10/29 and 10/30, reopening on 10/31 + # October 2012 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 5 6 + # 7 8 9 10 11 12 13 + # 14 15 16 17 18 19 20 + # 21 22 23 24 25 26 27 + # 28 29 30 31 + + for day_num in range(29, 31): + non_trading_days.append( + datetime(2012, 10, day_num, tzinfo=pytz.utc)) + + # Misc closings from NYSE listing. + # http://www.nyse.com/pdfs/closings.pdf + # + # National Days of Mourning + # - President Richard Nixon + non_trading_days.append(datetime(1994, 4, 27, tzinfo=pytz.utc)) + # - President Ronald W. Reagan - June 11, 2004 + non_trading_days.append(datetime(2004, 6, 11, tzinfo=pytz.utc)) + # - President Gerald R. Ford - Jan 2, 2007 + non_trading_days.append(datetime(2007, 1, 2, tzinfo=pytz.utc)) + + non_trading_days.sort() + return pd.DatetimeIndex(non_trading_days) + +non_trading_days = get_non_trading_days(start, end) +trading_day = pd.tseries.offsets.CDay(holidays=non_trading_days) + + +def get_trading_days(start, end, trading_day=trading_day): + return pd.date_range(start=start.date(), + end=end.date(), + freq=trading_day).tz_localize('UTC') + +trading_days = get_trading_days(start, end) + + +def get_early_closes(start, end): + # 1:00 PM close rules based on + # http://quant.stackexchange.com/questions/4083/nyse-early-close-rules-july-4th-and-dec-25th # noqa + # and verified against http://www.nyse.com/pdfs/closings.pdf + + # These rules are valid starting in 1993 + + start = canonicalize_datetime(start) + end = canonicalize_datetime(end) + + start = max(start, datetime(1993, 1, 1, tzinfo=pytz.utc)) + end = max(end, datetime(1993, 1, 1, tzinfo=pytz.utc)) + + # Not included here are early closes prior to 1993 + # or unplanned early closes + + early_close_rules = [] + + day_after_thanksgiving = rrule.rrule( + rrule.MONTHLY, + bymonth=11, + # 4th Friday isn't correct if month starts on Friday, so restrict to + # day range: + byweekday=(rrule.FR), + bymonthday=range(23, 30), + cache=True, + dtstart=start, + until=end + ) + early_close_rules.append(day_after_thanksgiving) + + christmas_eve = rrule.rrule( + rrule.MONTHLY, + bymonth=12, + bymonthday=24, + byweekday=(rrule.MO, rrule.TU, rrule.WE, rrule.TH), + cache=True, + dtstart=start, + until=end + ) + early_close_rules.append(christmas_eve) + + friday_after_christmas = rrule.rrule( + rrule.MONTHLY, + bymonth=12, + bymonthday=26, + byweekday=rrule.FR, + cache=True, + dtstart=start, + # valid 1993-2007 + until=min(end, datetime(2007, 12, 31, tzinfo=pytz.utc)) + ) + early_close_rules.append(friday_after_christmas) + + day_before_independence_day = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=3, + byweekday=(rrule.MO, rrule.TU, rrule.TH), + cache=True, + dtstart=start, + until=end + ) + early_close_rules.append(day_before_independence_day) + + day_after_independence_day = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=5, + byweekday=rrule.FR, + cache=True, + dtstart=start, + # starting in 2013: wednesday before independence day + until=min(end, datetime(2012, 12, 31, tzinfo=pytz.utc)) + ) + early_close_rules.append(day_after_independence_day) + + wednesday_before_independence_day = rrule.rrule( + rrule.MONTHLY, + bymonth=7, + bymonthday=3, + byweekday=rrule.WE, + cache=True, + # starting in 2013 + dtstart=max(start, datetime(2013, 1, 1, tzinfo=pytz.utc)), + until=max(end, datetime(2013, 1, 1, tzinfo=pytz.utc)) + ) + early_close_rules.append(wednesday_before_independence_day) + + early_close_ruleset = rrule.rruleset() + + for rule in early_close_rules: + early_close_ruleset.rrule(rule) + early_closes = early_close_ruleset.between(start, end, inc=True) + + # Misc early closings from NYSE listing. + # http://www.nyse.com/pdfs/closings.pdf + # + # New Year's Eve + nye_1999 = datetime(1999, 12, 31, tzinfo=pytz.utc) + if start <= nye_1999 and nye_1999 <= end: + early_closes.append(nye_1999) + + early_closes.sort() + return pd.DatetimeIndex(early_closes) + +early_closes = get_early_closes(start, end) + + +def get_open_and_close(day, early_closes): + market_open = pd.Timestamp( + datetime( + year=day.year, + month=day.month, + day=day.day, + hour=9, + minute=31), + tz='US/Eastern').tz_convert('UTC') + # 1 PM if early close, 4 PM otherwise + close_hour = 13 if day in early_closes else 16 + market_close = pd.Timestamp( + datetime( + year=day.year, + month=day.month, + day=day.day, + hour=close_hour), + tz='US/Eastern').tz_convert('UTC') + + return market_open, market_close + + +def get_open_and_closes(trading_days, early_closes): + open_and_closes = pd.DataFrame(index=trading_days, + columns=('market_open', 'market_close')) + + get_o_and_c = partial(get_open_and_close, early_closes=early_closes) + + open_and_closes['market_open'], open_and_closes['market_close'] = \ + zip(*open_and_closes.index.map(get_o_and_c)) + + return open_and_closes + +open_and_closes = get_open_and_closes(trading_days, early_closes) diff --git a/docs/conf.py b/docs/conf.py index 2c61ea9..046bab9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,8 +46,8 @@ master_doc = 'index' # General information about the project. -project = u'Zipline' -copyright = u'2012, Quantopian: jean, fawce, sdiehl' +project = 'Zipline' +copyright = '2012, Quantopian: jean, fawce, sdiehl' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -189,8 +189,8 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'QSim.tex', u'QSim Documentation', - u'Quantopian: jean, fawce, sdiehl', 'manual'), + ('index', 'QSim.tex', 'QSim Documentation', + 'Quantopian: jean, fawce, sdiehl', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -219,8 +219,8 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'zipline', u'QSim Documentation', - [u'Quantopian: jean, fawce, sdiehl'], 1) + ('index', 'zipline', 'QSim Documentation', + ['Quantopian: jean, fawce, sdiehl'], 1) ] # If true, show URL addresses after external links. @@ -233,8 +233,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'QSim', u'QSim Documentation', - u'Quantopian: jean, fawce, sdiehl', 'QSim', 'One line description of project.', + ('index', 'QSim', 'QSim Documentation', + 'Quantopian: jean, fawce, sdiehl', 'QSim', 'One line description of project.', 'Miscellaneous'), ] diff --git a/docs/conf.py.bak b/docs/conf.py.bak new file mode 100644 index 0000000..2c61ea9 --- /dev/null +++ b/docs/conf.py.bak @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# +# QSim documentation build configuration file, created by +# sphinx-quickstart on Wed Feb 8 15:29:56 2012. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.append(os.path.abspath('..')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Zipline' +copyright = u'2012, Quantopian: jean, fawce, sdiehl' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.0' +# The full version, including alpha/beta/rc tags. +release = 'dev' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'nature' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'QSimdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'QSim.tex', u'QSim Documentation', + u'Quantopian: jean, fawce, sdiehl', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'zipline', u'QSim Documentation', + [u'Quantopian: jean, fawce, sdiehl'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'QSim', u'QSim Documentation', + u'Quantopian: jean, fawce, sdiehl', 'QSim', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/tests/finance/test_slippage.py b/tests/finance/test_slippage.py index 7f0871c..0519f0e 100644 --- a/tests/finance/test_slippage.py +++ b/tests/finance/test_slippage.py @@ -137,7 +137,7 @@ def test_orders_limit(self): self.assertIsNotNone(txn) - for key, value in expected_txn.items(): + for key, value in list(expected_txn.items()): self.assertEquals(value, txn[key]) # short, does not trade @@ -189,7 +189,7 @@ def test_orders_limit(self): self.assertIsNotNone(txn) - for key, value in expected_txn.items(): + for key, value in list(expected_txn.items()): self.assertEquals(value, txn[key]) STOP_ORDER_CASES = { @@ -320,7 +320,7 @@ def test_orders_limit(self): @parameterized.expand([ (name, case['order'], case['event'], case['expected']) - for name, case in STOP_ORDER_CASES.items() + for name, case in list(STOP_ORDER_CASES.items()) ]) def test_orders_stop(self, name, order_data, event_data, expected): order = Order(**order_data) @@ -329,7 +329,7 @@ def test_orders_stop(self, name, order_data, event_data, expected): slippage_model = VolumeShareSlippage() try: - _, txn = slippage_model.simulate(event, [order]).next() + _, txn = next(slippage_model.simulate(event, [order])) except StopIteration: txn = None @@ -338,7 +338,7 @@ def test_orders_stop(self, name, order_data, event_data, expected): else: self.assertIsNotNone(txn) - for key, value in expected['transaction'].items(): + for key, value in list(expected['transaction'].items()): self.assertEquals(value, txn[key]) def test_orders_stop_limit(self): @@ -407,7 +407,7 @@ def test_orders_stop_limit(self): 'sid': int(133) } - for key, value in expected_txn.items(): + for key, value in list(expected_txn.items()): self.assertEquals(value, txn[key]) # short, does not trade @@ -471,7 +471,7 @@ def test_orders_stop_limit(self): 'sid': int(133) } - for key, value in expected_txn.items(): + for key, value in list(expected_txn.items()): self.assertEquals(value, txn[key]) def gen_trades(self): diff --git a/tests/finance/test_slippage.py.bak b/tests/finance/test_slippage.py.bak new file mode 100644 index 0000000..7f0871c --- /dev/null +++ b/tests/finance/test_slippage.py.bak @@ -0,0 +1,556 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unit tests for finance.slippage +""" +import datetime + +import pytz + +from unittest import TestCase + +from nose_parameterized import parameterized + +import pandas as pd + +from zipline.finance.slippage import VolumeShareSlippage + +from zipline.protocol import Event, DATASOURCE_TYPE +from zipline.finance.blotter import Order + + +class SlippageTestCase(TestCase): + + def test_volume_share_slippage(self): + event = Event( + {'volume': 200, + 'type': 4, + 'price': 3.0, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 31, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 3.0, + 'dt': + datetime.datetime(2006, 1, 5, 14, 31, tzinfo=pytz.utc), + 'open': 3.0} + ) + + slippage_model = VolumeShareSlippage() + + open_orders = [ + Order(dt=datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + amount=100, + filled=0, + sid=133) + ] + + orders_txns = list(slippage_model.simulate( + event, + open_orders + )) + + self.assertEquals(len(orders_txns), 1) + _, txn = orders_txns[0] + + expected_txn = { + 'price': float(3.01875), + 'dt': datetime.datetime( + 2006, 1, 5, 14, 31, tzinfo=pytz.utc), + 'amount': int(50), + 'sid': int(133), + 'commission': None, + 'type': DATASOURCE_TYPE.TRANSACTION, + 'order_id': open_orders[0].id + } + + self.assertIsNotNone(txn) + + # TODO: Make expected_txn an Transaction object and ensure there + # is a __eq__ for that class. + self.assertEquals(expected_txn, txn.__dict__) + + def test_orders_limit(self): + + events = self.gen_trades() + + slippage_model = VolumeShareSlippage() + + # long, does not trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[2], + open_orders + )) + self.assertEquals(len(orders_txns), 0) + + # long, does trade + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[3], + open_orders + )) + + self.assertEquals(len(orders_txns), 1) + txn = orders_txns[0][1] + + expected_txn = { + 'price': float(3.500875), + 'dt': datetime.datetime( + 2006, 1, 5, 14, 34, tzinfo=pytz.utc), + 'amount': int(100), + 'sid': int(133), + 'order_id': open_orders[0].id + } + + self.assertIsNotNone(txn) + + for key, value in expected_txn.items(): + self.assertEquals(value, txn[key]) + + # short, does not trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[0], + open_orders + )) + + expected_txn = {} + + self.assertEquals(len(orders_txns), 0) + + # short, does trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[1], + open_orders + )) + + self.assertEquals(len(orders_txns), 1) + _, txn = orders_txns[0] + + expected_txn = { + 'price': float(3.499125), + 'dt': datetime.datetime( + 2006, 1, 5, 14, 32, tzinfo=pytz.utc), + 'amount': int(-100), + 'sid': int(133) + } + + self.assertIsNotNone(txn) + + for key, value in expected_txn.items(): + self.assertEquals(value, txn[key]) + + STOP_ORDER_CASES = { + # Stop orders can be long/short and have their price greater or + # less than the stop. + # + # A stop being reached is conditional on the order direction. + # Long orders reach the stop when the price is greater than the stop. + # Short orders reach the stop when the price is less than the stop. + # + # Which leads to the following 4 cases: + # + # | long | short | + # | price > stop | | | + # | price < stop | | | + # + # Currently the slippage module acts according to the following table, + # where 'X' represents triggering a transaction + # | long | short | + # | price > stop | | X | + # | price < stop | X | | + # + # However, the following behavior *should* be followed. + # + # | long | short | + # | price > stop | X | | + # | price < stop | | X | + + 'long | price gt stop': { + 'order': { + 'dt': pd.Timestamp('2006-01-05 14:30', tz='UTC'), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'stop': 3.5 + }, + 'event': { + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'volume': 2000, + 'price': 4.0, + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'close': 4.0, + 'open': 3.5 + }, + 'expected': { + 'transaction': { + 'price': 4.001, + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'amount': 100, + 'sid': 133, + } + } + }, + 'long | price lt stop': { + 'order': { + 'dt': pd.Timestamp('2006-01-05 14:30', tz='UTC'), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'stop': 3.6 + }, + 'event': { + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'volume': 2000, + 'price': 3.5, + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'close': 3.5, + 'open': 4.0 + }, + 'expected': { + 'transaction': None + } + }, + 'short | price gt stop': { + 'order': { + 'dt': pd.Timestamp('2006-01-05 14:30', tz='UTC'), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'stop': 3.4 + }, + 'event': { + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'volume': 2000, + 'price': 3.5, + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'close': 3.5, + 'open': 3.0 + }, + 'expected': { + 'transaction': None + } + }, + 'short | price lt stop': { + 'order': { + 'dt': pd.Timestamp('2006-01-05 14:30', tz='UTC'), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'stop': 3.5 + }, + 'event': { + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'volume': 2000, + 'price': 3.0, + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'close': 3.0, + 'open': 3.0 + }, + 'expected': { + 'transaction': { + 'price': 2.99925, + 'dt': pd.Timestamp('2006-01-05 14:31', tz='UTC'), + 'amount': -100, + 'sid': 133, + } + } + }, + } + + @parameterized.expand([ + (name, case['order'], case['event'], case['expected']) + for name, case in STOP_ORDER_CASES.items() + ]) + def test_orders_stop(self, name, order_data, event_data, expected): + order = Order(**order_data) + event = Event(initial_values=event_data) + + slippage_model = VolumeShareSlippage() + + try: + _, txn = slippage_model.simulate(event, [order]).next() + except StopIteration: + txn = None + + if expected['transaction'] is None: + self.assertIsNone(txn) + else: + self.assertIsNotNone(txn) + + for key, value in expected['transaction'].items(): + self.assertEquals(value, txn[key]) + + def test_orders_stop_limit(self): + + events = self.gen_trades() + slippage_model = VolumeShareSlippage() + + # long, does not trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'stop': 4.0, + 'limit': 3.0}) + ] + + orders_txns = list(slippage_model.simulate( + events[2], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + orders_txns = list(slippage_model.simulate( + events[3], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + # long, does trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': 100, + 'filled': 0, + 'sid': 133, + 'stop': 4.0, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[2], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + orders_txns = list(slippage_model.simulate( + events[3], + open_orders + )) + + self.assertEquals(len(orders_txns), 1) + _, txn = orders_txns[0] + + expected_txn = { + 'price': float(3.500875), + 'dt': datetime.datetime( + 2006, 1, 5, 14, 34, tzinfo=pytz.utc), + 'amount': int(100), + 'sid': int(133) + } + + for key, value in expected_txn.items(): + self.assertEquals(value, txn[key]) + + # short, does not trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'stop': 3.0, + 'limit': 4.0}) + ] + + orders_txns = list(slippage_model.simulate( + events[0], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + orders_txns = list(slippage_model.simulate( + events[1], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + # short, does trade + + open_orders = [ + Order(**{ + 'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), + 'amount': -100, + 'filled': 0, + 'sid': 133, + 'stop': 3.0, + 'limit': 3.5}) + ] + + orders_txns = list(slippage_model.simulate( + events[0], + open_orders + )) + + self.assertEquals(len(orders_txns), 0) + + orders_txns = list(slippage_model.simulate( + events[1], + open_orders + )) + + self.assertEquals(len(orders_txns), 1) + _, txn = orders_txns[0] + + expected_txn = { + 'price': float(3.499125), + 'dt': datetime.datetime( + 2006, 1, 5, 14, 32, tzinfo=pytz.utc), + 'amount': int(-100), + 'sid': int(133) + } + + for key, value in expected_txn.items(): + self.assertEquals(value, txn[key]) + + def gen_trades(self): + # create a sequence of trades + events = [ + Event({ + 'volume': 2000, + 'type': 4, + 'price': 3.0, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 31, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 3.0, + 'dt': + datetime.datetime(2006, 1, 5, 14, 31, tzinfo=pytz.utc), + 'open': 3.0 + }), + Event({ + 'volume': 2000, + 'type': 4, + 'price': 3.5, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 32, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 3.5, + 'dt': + datetime.datetime(2006, 1, 5, 14, 32, tzinfo=pytz.utc), + 'open': 3.0 + }), + Event({ + 'volume': 2000, + 'type': 4, + 'price': 4.0, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 33, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 4.0, + 'dt': + datetime.datetime(2006, 1, 5, 14, 33, tzinfo=pytz.utc), + 'open': 3.5 + }), + Event({ + 'volume': 2000, + 'type': 4, + 'price': 3.5, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 34, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 3.5, + 'dt': + datetime.datetime(2006, 1, 5, 14, 34, tzinfo=pytz.utc), + 'open': 4.0 + }), + Event({ + 'volume': 2000, + 'type': 4, + 'price': 3.0, + 'datetime': datetime.datetime( + 2006, 1, 5, 14, 35, tzinfo=pytz.utc), + 'high': 3.15, + 'low': 2.85, + 'sid': 133, + 'source_id': 'test_source', + 'close': 3.0, + 'dt': + datetime.datetime(2006, 1, 5, 14, 35, tzinfo=pytz.utc), + 'open': 3.5 + }) + ] + return events diff --git a/tests/risk/answer_key.py b/tests/risk/answer_key.py index a1eb8ec..6e1d13b 100644 --- a/tests/risk/answer_key.py +++ b/tests/risk/answer_key.py @@ -258,10 +258,10 @@ def __init__(self): 'Sim Cumulative') self.sheets['s_p'] = self.workbook.sheet_by_name('s_p') - for name, index in self.INDEXES.items(): + for name, index in list(self.INDEXES.items()): if isinstance(index, dict): subvalues = {} - for subkey, subindex in index.items(): + for subkey, subindex in list(index.items()): subvalues[subkey] = self.get_values(subindex) setattr(self, name, subvalues) else: @@ -288,7 +288,7 @@ def value_type_to_value_func(self): def get_values(self, data_index): value_parser = self.value_type_to_value_func[data_index.value_type] - return map(value_parser, self.get_raw_values(data_index)) + return list(map(value_parser, self.get_raw_values(data_index))) ANSWER_KEY = AnswerKey() @@ -296,24 +296,24 @@ def get_values(self, data_index): BENCHMARK_DATES = ANSWER_KEY.BENCHMARK['Dates'] BENCHMARK_RETURNS = ANSWER_KEY.BENCHMARK['Returns'] DATES = [datetime.datetime(*x, tzinfo=pytz.UTC) for x in BENCHMARK_DATES] -BENCHMARK = pd.Series(dict(zip(DATES, BENCHMARK_RETURNS))) +BENCHMARK = pd.Series(dict(list(zip(DATES, BENCHMARK_RETURNS)))) ALGORITHM_RETURNS = pd.Series( - dict(zip(DATES, ANSWER_KEY.ALGORITHM_RETURN_VALUES))) + dict(list(zip(DATES, ANSWER_KEY.ALGORITHM_RETURN_VALUES)))) RETURNS_DATA = pd.DataFrame({'Benchmark Returns': BENCHMARK, 'Algorithm Returns': ALGORITHM_RETURNS}) RISK_CUMULATIVE = pd.DataFrame({ - 'volatility': pd.Series(dict(zip( - DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY))), - 'sharpe': pd.Series(dict(zip( - DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_SHARPE))), - 'downside_risk': pd.Series(dict(zip( - DATES, ANSWER_KEY.CUMULATIVE_DOWNSIDE_RISK))), - 'sortino': pd.Series(dict(zip( - DATES, ANSWER_KEY.CUMULATIVE_SORTINO))), - 'information': pd.Series(dict(zip( - DATES, ANSWER_KEY.CUMULATIVE_INFORMATION))), - 'alpha': pd.Series(dict(zip( - DATES, ANSWER_KEY.CUMULATIVE_ALPHA))), - 'beta': pd.Series(dict(zip( - DATES, ANSWER_KEY.CUMULATIVE_BETA))), + 'volatility': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY)))), + 'sharpe': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_SHARPE)))), + 'downside_risk': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.CUMULATIVE_DOWNSIDE_RISK)))), + 'sortino': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.CUMULATIVE_SORTINO)))), + 'information': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.CUMULATIVE_INFORMATION)))), + 'alpha': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.CUMULATIVE_ALPHA)))), + 'beta': pd.Series(dict(list(zip( + DATES, ANSWER_KEY.CUMULATIVE_BETA)))), }) diff --git a/tests/risk/answer_key.py.bak b/tests/risk/answer_key.py.bak new file mode 100644 index 0000000..a1eb8ec --- /dev/null +++ b/tests/risk/answer_key.py.bak @@ -0,0 +1,319 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import hashlib +import os + +import numpy as np +import pandas as pd +import pytz +import xlrd +import requests + + +def col_letter_to_index(col_letter): + # Only supports single letter, + # but answer key doesn't need multi-letter, yet. + index = 0 + for i, char in enumerate(reversed(col_letter)): + index += ((ord(char) - 65) + 1) * pow(26, i) + return index + +DIR = os.path.dirname(os.path.realpath(__file__)) + +ANSWER_KEY_CHECKSUMS_PATH = os.path.join(DIR, 'risk-answer-key-checksums') +ANSWER_KEY_CHECKSUMS = open(ANSWER_KEY_CHECKSUMS_PATH, 'r').read().splitlines() + +ANSWER_KEY_FILENAME = 'risk-answer-key.xlsx' + +ANSWER_KEY_PATH = os.path.join(DIR, ANSWER_KEY_FILENAME) + +ANSWER_KEY_BUCKET_NAME = 'zipline-test_data' + +ANSWER_KEY_DL_TEMPLATE = """ +https://s3.amazonaws.com/zipline-test-data/risk/{md5}/risk-answer-key.xlsx +""".strip() + +LATEST_ANSWER_KEY_URL = ANSWER_KEY_DL_TEMPLATE.format( + md5=ANSWER_KEY_CHECKSUMS[-1]) + + +def answer_key_signature(): + with open(ANSWER_KEY_PATH, 'r') as f: + md5 = hashlib.md5() + while True: + buf = f.read(1024) + if not buf: + break + md5.update(buf) + return md5.hexdigest() + + +def ensure_latest_answer_key(): + """ + Get the latest answer key from a publically available location. + + Logic for determining what and when to download is as such: + + - If there is no local spreadsheet file, then get the lastest answer key, + as defined by the last row in the checksum file. + - If there is a local spreadsheet file: + -- If the spreadsheet's checksum is in the checksum file: + --- If the spreadsheet's checksum does not match the latest, then grab the + the latest checksum and replace the local checksum file. + --- If the spreadsheet's checksum matches the latest, then skip download, + and use the local spreadsheet as a cached copy. + -- If the spreadsheet's checksum is not in the checksum file, then leave + the local file alone, assuming that the local xls's md5 is not in the list + due to local modifications during development. + + It is possible that md5's could collide, if that is ever case, we should + then find an alternative naming scheme. + + The spreadsheet answer sheet is not kept in SCM, as every edit would + increase the repo size by the file size, since it is treated as a binary. + """ + + answer_key_dl_checksum = None + + local_answer_key_exists = os.path.exists(ANSWER_KEY_PATH) + if local_answer_key_exists: + local_hash = answer_key_signature() + + if local_hash in ANSWER_KEY_CHECKSUMS: + # Assume previously downloaded version. + # Check for latest. + if local_hash != ANSWER_KEY_CHECKSUMS[-1]: + # More recent checksum, download + answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1] + else: + # Assume local copy that is being developed on + answer_key_dl_checksum = None + else: + answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1] + + if answer_key_dl_checksum: + res = requests.get( + ANSWER_KEY_DL_TEMPLATE.format(md5=answer_key_dl_checksum)) + with open(ANSWER_KEY_PATH, 'w') as f: + f.write(res.content) + +# Get latest answer key on load. +ensure_latest_answer_key() + + +class DataIndex(object): + """ + Coordinates for the spreadsheet, using the values as seen in the notebook. + The python-excel libraries use 0 index, while the spreadsheet in a GUI + uses a 1 index. + """ + def __init__(self, sheet_name, col, row_start, row_end, + value_type='float'): + self.sheet_name = sheet_name + self.col = col + self.row_start = row_start + self.row_end = row_end + self.value_type = value_type + + @property + def col_index(self): + return col_letter_to_index(self.col) - 1 + + @property + def row_start_index(self): + return self.row_start - 1 + + @property + def row_end_index(self): + return self.row_end - 1 + + def __str__(self): + return "'{sheet_name}'!{col}{row_start}:{col}{row_end}".format( + sheet_name=self.sheet_name, + col=self.col, + row_start=self.row_start, + row_end=self.row_end + ) + + +class AnswerKey(object): + + INDEXES = { + 'RETURNS': DataIndex('Sim Period', 'D', 4, 255), + + 'BENCHMARK': { + 'Dates': DataIndex('s_p', 'A', 4, 254, value_type='date'), + 'Returns': DataIndex('s_p', 'H', 4, 254) + }, + + # Below matches the inconsistent capitalization in spreadsheet + 'BENCHMARK_PERIOD_RETURNS': { + 'Monthly': DataIndex('s_p', 'P', 8, 19), + '3-Month': DataIndex('s_p', 'Q', 10, 19), + '6-month': DataIndex('s_p', 'R', 13, 19), + 'year': DataIndex('s_p', 'S', 19, 19), + }, + + 'BENCHMARK_PERIOD_VOLATILITY': { + 'Monthly': DataIndex('s_p', 'T', 8, 19), + '3-Month': DataIndex('s_p', 'U', 10, 19), + '6-month': DataIndex('s_p', 'V', 13, 19), + 'year': DataIndex('s_p', 'W', 19, 19), + }, + + 'ALGORITHM_PERIOD_RETURNS': { + 'Monthly': DataIndex('Sim Period', 'V', 23, 34), + '3-Month': DataIndex('Sim Period', 'W', 25, 34), + '6-month': DataIndex('Sim Period', 'X', 28, 34), + 'year': DataIndex('Sim Period', 'Y', 34, 34), + }, + + 'ALGORITHM_PERIOD_VOLATILITY': { + 'Monthly': DataIndex('Sim Period', 'Z', 23, 34), + '3-Month': DataIndex('Sim Period', 'AA', 25, 34), + '6-month': DataIndex('Sim Period', 'AB', 28, 34), + 'year': DataIndex('Sim Period', 'AC', 34, 34), + }, + + 'ALGORITHM_PERIOD_SHARPE': { + 'Monthly': DataIndex('Sim Period', 'AD', 23, 34), + '3-Month': DataIndex('Sim Period', 'AE', 25, 34), + '6-month': DataIndex('Sim Period', 'AF', 28, 34), + 'year': DataIndex('Sim Period', 'AG', 34, 34), + }, + + 'ALGORITHM_PERIOD_BETA': { + 'Monthly': DataIndex('Sim Period', 'AH', 23, 34), + '3-Month': DataIndex('Sim Period', 'AI', 25, 34), + '6-month': DataIndex('Sim Period', 'AJ', 28, 34), + 'year': DataIndex('Sim Period', 'AK', 34, 34), + }, + + 'ALGORITHM_PERIOD_ALPHA': { + 'Monthly': DataIndex('Sim Period', 'AL', 23, 34), + '3-Month': DataIndex('Sim Period', 'AM', 25, 34), + '6-month': DataIndex('Sim Period', 'AN', 28, 34), + 'year': DataIndex('Sim Period', 'AO', 34, 34), + }, + + 'ALGORITHM_PERIOD_BENCHMARK_VARIANCE': { + 'Monthly': DataIndex('Sim Period', 'BB', 23, 34), + '3-Month': DataIndex('Sim Period', 'BC', 25, 34), + '6-month': DataIndex('Sim Period', 'BD', 28, 34), + 'year': DataIndex('Sim Period', 'BE', 34, 34), + }, + + 'ALGORITHM_PERIOD_COVARIANCE': { + 'Monthly': DataIndex('Sim Period', 'AX', 23, 34), + '3-Month': DataIndex('Sim Period', 'AY', 25, 34), + '6-month': DataIndex('Sim Period', 'AZ', 28, 34), + 'year': DataIndex('Sim Period', 'BA', 34, 34), + }, + + 'ALGORITHM_RETURN_VALUES': DataIndex( + 'Sim Cumulative', 'D', 4, 254), + + 'ALGORITHM_CUMULATIVE_VOLATILITY': DataIndex( + 'Sim Cumulative', 'P', 4, 254), + + 'ALGORITHM_CUMULATIVE_SHARPE': DataIndex( + 'Sim Cumulative', 'R', 4, 254), + + 'CUMULATIVE_DOWNSIDE_RISK': DataIndex( + 'Sim Cumulative', 'U', 4, 254), + + 'CUMULATIVE_SORTINO': DataIndex( + 'Sim Cumulative', 'V', 4, 254), + + 'CUMULATIVE_INFORMATION': DataIndex( + 'Sim Cumulative', 'Y', 4, 254), + + 'CUMULATIVE_BETA': DataIndex( + 'Sim Cumulative', 'AB', 4, 254), + + 'CUMULATIVE_ALPHA': DataIndex( + 'Sim Cumulative', 'AC', 4, 254), + + } + + def __init__(self): + self.workbook = xlrd.open_workbook(ANSWER_KEY_PATH) + + self.sheets = {} + self.sheets['Sim Period'] = self.workbook.sheet_by_name('Sim Period') + self.sheets['Sim Cumulative'] = self.workbook.sheet_by_name( + 'Sim Cumulative') + self.sheets['s_p'] = self.workbook.sheet_by_name('s_p') + + for name, index in self.INDEXES.items(): + if isinstance(index, dict): + subvalues = {} + for subkey, subindex in index.items(): + subvalues[subkey] = self.get_values(subindex) + setattr(self, name, subvalues) + else: + setattr(self, name, self.get_values(index)) + + def parse_date_value(self, value): + return xlrd.xldate_as_tuple(value, 0) + + def parse_float_value(self, value): + return value if value != '' else np.nan + + def get_raw_values(self, data_index): + return self.sheets[data_index.sheet_name].col_values( + data_index.col_index, + data_index.row_start_index, + data_index.row_end_index + 1) + + @property + def value_type_to_value_func(self): + return { + 'float': self.parse_float_value, + 'date': self.parse_date_value, + } + + def get_values(self, data_index): + value_parser = self.value_type_to_value_func[data_index.value_type] + return map(value_parser, self.get_raw_values(data_index)) + + +ANSWER_KEY = AnswerKey() + +BENCHMARK_DATES = ANSWER_KEY.BENCHMARK['Dates'] +BENCHMARK_RETURNS = ANSWER_KEY.BENCHMARK['Returns'] +DATES = [datetime.datetime(*x, tzinfo=pytz.UTC) for x in BENCHMARK_DATES] +BENCHMARK = pd.Series(dict(zip(DATES, BENCHMARK_RETURNS))) +ALGORITHM_RETURNS = pd.Series( + dict(zip(DATES, ANSWER_KEY.ALGORITHM_RETURN_VALUES))) +RETURNS_DATA = pd.DataFrame({'Benchmark Returns': BENCHMARK, + 'Algorithm Returns': ALGORITHM_RETURNS}) +RISK_CUMULATIVE = pd.DataFrame({ + 'volatility': pd.Series(dict(zip( + DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY))), + 'sharpe': pd.Series(dict(zip( + DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_SHARPE))), + 'downside_risk': pd.Series(dict(zip( + DATES, ANSWER_KEY.CUMULATIVE_DOWNSIDE_RISK))), + 'sortino': pd.Series(dict(zip( + DATES, ANSWER_KEY.CUMULATIVE_SORTINO))), + 'information': pd.Series(dict(zip( + DATES, ANSWER_KEY.CUMULATIVE_INFORMATION))), + 'alpha': pd.Series(dict(zip( + DATES, ANSWER_KEY.CUMULATIVE_ALPHA))), + 'beta': pd.Series(dict(zip( + DATES, ANSWER_KEY.CUMULATIVE_BETA))), +}) diff --git a/tests/risk/test_risk_cumulative.py b/tests/risk/test_risk_cumulative.py index 97be2f7..16fb3a9 100644 --- a/tests/risk/test_risk_cumulative.py +++ b/tests/risk/test_risk_cumulative.py @@ -23,7 +23,7 @@ from zipline.finance.trading import SimulationParameters -import answer_key +from . import answer_key ANSWER_KEY = answer_key.ANSWER_KEY diff --git a/tests/risk/test_risk_cumulative.py.bak b/tests/risk/test_risk_cumulative.py.bak new file mode 100644 index 0000000..97be2f7 --- /dev/null +++ b/tests/risk/test_risk_cumulative.py.bak @@ -0,0 +1,118 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import datetime +import numpy as np +import pytz +import zipline.finance.risk as risk +from zipline.utils import factory + +from zipline.finance.trading import SimulationParameters + +import answer_key +ANSWER_KEY = answer_key.ANSWER_KEY + + +class TestRisk(unittest.TestCase): + + def setUp(self): + start_date = datetime.datetime( + year=2006, + month=1, + day=1, + hour=0, + minute=0, + tzinfo=pytz.utc) + end_date = datetime.datetime( + year=2006, month=12, day=29, tzinfo=pytz.utc) + + self.sim_params = SimulationParameters( + period_start=start_date, + period_end=end_date + ) + + self.algo_returns_06 = factory.create_returns_from_list( + answer_key.ALGORITHM_RETURNS.values, + self.sim_params + ) + + self.cumulative_metrics_06 = risk.RiskMetricsCumulative( + self.sim_params) + + for dt, returns in answer_key.RETURNS_DATA.iterrows(): + self.cumulative_metrics_06.update(dt, + returns['Algorithm Returns'], + returns['Benchmark Returns']) + + def test_algorithm_volatility_06(self): + np.testing.assert_almost_equal( + ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY, + self.cumulative_metrics_06.metrics.algorithm_volatility.values) + + def test_sharpe_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.sharpe.iterkv(): + np.testing.assert_almost_equal( + value, + self.cumulative_metrics_06.metrics.sharpe[dt], + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_downside_risk_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.downside_risk.iterkv(): + np.testing.assert_almost_equal( + self.cumulative_metrics_06.metrics.downside_risk[dt], + value, + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_sortino_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.sortino.iterkv(): + np.testing.assert_almost_equal( + self.cumulative_metrics_06.metrics.sortino[dt], + value, + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_information_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.information.iterkv(): + np.testing.assert_almost_equal( + self.cumulative_metrics_06.metrics.information[dt], + value, + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_alpha_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.alpha.iterkv(): + np.testing.assert_almost_equal( + self.cumulative_metrics_06.metrics.alpha[dt], + value, + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_beta_06(self): + for dt, value in answer_key.RISK_CUMULATIVE.beta.iterkv(): + np.testing.assert_almost_equal( + self.cumulative_metrics_06.metrics.beta[dt], + value, + decimal=2, + err_msg="Mismatch at %s" % (dt,)) + + def test_max_drawdown_calculated(self): + # We don't track max_drawdown by day, so it doesn't make sense to + # generate a full answer key for it. For now, ensure it's just + # "not zero" + self.assertNotEqual(self.cumulative_metrics_06.max_drawdown, 0.0) diff --git a/tests/risk/upload_answer_key.py b/tests/risk/upload_answer_key.py index 9c4f532..e4327d2 100644 --- a/tests/risk/upload_answer_key.py +++ b/tests/risk/upload_answer_key.py @@ -50,8 +50,8 @@ def main(): bucket_name=BUCKET_NAME, key=key.key) - print("Uploaded to key: {key}".format(key=key.key)) - print("Download link: {download_link}".format(download_link=download_link)) + print(("Uploaded to key: {key}".format(key=key.key))) + print(("Download link: {download_link}".format(download_link=download_link))) # Now update checksum file with the recently added answer key. # checksum file update will be then need to be commited via git. diff --git a/tests/risk/upload_answer_key.py.bak b/tests/risk/upload_answer_key.py.bak new file mode 100644 index 0000000..9c4f532 --- /dev/null +++ b/tests/risk/upload_answer_key.py.bak @@ -0,0 +1,63 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Utility script for maintainer use to upload current version of the answer key +spreadsheet to S3. +""" +import hashlib + +import boto + +from . import answer_key + +BUCKET_NAME = 'zipline-test-data' + + +def main(): + with open(answer_key.ANSWER_KEY_PATH, 'r') as f: + md5 = hashlib.md5() + while True: + buf = f.read(1024) + if not buf: + break + md5.update(buf) + local_hash = md5.hexdigest() + + s3_conn = boto.connect_s3() + + bucket = s3_conn.get_bucket(BUCKET_NAME) + key = boto.s3.key.Key(bucket) + + key.key = "risk/{local_hash}/risk-answer-key.xlsx".format( + local_hash=local_hash) + key.set_contents_from_filename(answer_key.ANSWER_KEY_PATH) + key.set_acl('public-read') + + download_link = "http://s3.amazonaws.com/{bucket_name}/{key}".format( + bucket_name=BUCKET_NAME, + key=key.key) + + print("Uploaded to key: {key}".format(key=key.key)) + print("Download link: {download_link}".format(download_link=download_link)) + + # Now update checksum file with the recently added answer key. + # checksum file update will be then need to be commited via git. + with open(answer_key.ANSWER_KEY_CHECKSUMS_PATH, 'a') as checksum_file: + checksum_file.write(local_hash) + checksum_file.write("\n") + +if __name__ == "__main__": + main() diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py index 146ddcb..fd1ddfb 100644 --- a/tests/test_algorithm.py +++ b/tests/test_algorithm.py @@ -58,7 +58,7 @@ def test_record_incr(self): output = algo.run(self.source) np.testing.assert_array_equal(output['incr'].values, - range(1, len(output) + 1)) + list(range(1, len(output) + 1))) class TestTransformAlgorithm(TestCase): diff --git a/tests/test_algorithm.py.bak b/tests/test_algorithm.py.bak new file mode 100644 index 0000000..146ddcb --- /dev/null +++ b/tests/test_algorithm.py.bak @@ -0,0 +1,196 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase +from datetime import timedelta +import numpy as np + +from zipline.utils.test_utils import setup_logger +import zipline.utils.factory as factory +from zipline.test_algorithms import (TestRegisterTransformAlgorithm, + RecordAlgorithm, + TestOrderAlgorithm, + TestOrderInstantAlgorithm, + TestOrderValueAlgorithm, + TestTargetAlgorithm, + TestOrderPercentAlgorithm, + TestTargetPercentAlgorithm, + TestTargetValueAlgorithm) + +from zipline.sources import (SpecificEquityTrades, + DataFrameSource, + DataPanelSource) +from zipline.transforms import MovingAverage +from zipline.finance.trading import SimulationParameters + + +class TestRecordAlgorithm(TestCase): + def setUp(self): + self.sim_params = factory.create_simulation_parameters(num_days=4) + trade_history = factory.create_trade_history( + 133, + [10.0, 10.0, 11.0, 11.0], + [100, 100, 100, 300], + timedelta(days=1), + self.sim_params + ) + + self.source = SpecificEquityTrades(event_list=trade_history) + self.df_source, self.df = \ + factory.create_test_df_source(self.sim_params) + + def test_record_incr(self): + algo = RecordAlgorithm( + sim_params=self.sim_params, + data_frequency='daily') + output = algo.run(self.source) + + np.testing.assert_array_equal(output['incr'].values, + range(1, len(output) + 1)) + + +class TestTransformAlgorithm(TestCase): + def setUp(self): + setup_logger(self) + self.sim_params = factory.create_simulation_parameters(num_days=4) + setup_logger(self) + + trade_history = factory.create_trade_history( + 133, + [10.0, 10.0, 11.0, 11.0], + [100, 100, 100, 300], + timedelta(days=1), + self.sim_params + ) + self.source = SpecificEquityTrades(event_list=trade_history) + + self.df_source, self.df = \ + factory.create_test_df_source(self.sim_params) + + self.panel_source, self.panel = \ + factory.create_test_panel_source(self.sim_params) + + def test_source_as_input(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + sids=[133] + ) + algo.run(self.source) + self.assertEqual(len(algo.sources), 1) + assert isinstance(algo.sources[0], SpecificEquityTrades) + + def test_multi_source_as_input_no_start_end(self): + algo = TestRegisterTransformAlgorithm( + sids=[133] + ) + + with self.assertRaises(AssertionError): + algo.run([self.source, self.df_source]) + + def test_multi_source_as_input(self): + sim_params = SimulationParameters( + self.df.index[0], + self.df.index[-1] + ) + algo = TestRegisterTransformAlgorithm( + sim_params=sim_params, + sids=[0, 1, 133] + ) + algo.run([self.source, self.df_source]) + self.assertEqual(len(algo.sources), 2) + + def test_df_as_input(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + sids=[0, 1] + ) + algo.run(self.df) + assert isinstance(algo.sources[0], DataFrameSource) + + def test_panel_as_input(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + sids=[0, 1]) + algo.run(self.panel) + assert isinstance(algo.sources[0], DataPanelSource) + + def test_run_twice(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + sids=[0, 1] + ) + + res1 = algo.run(self.df) + res2 = algo.run(self.df) + + np.testing.assert_array_equal(res1, res2) + + def test_transform_registered(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + sids=[133] + ) + + algo.run(self.source) + assert 'mavg' in algo.registered_transforms + assert algo.registered_transforms['mavg']['args'] == (['price'],) + assert algo.registered_transforms['mavg']['kwargs'] == \ + {'window_length': 2, 'market_aware': True} + assert algo.registered_transforms['mavg']['class'] is MovingAverage + + def test_data_frequency_setting(self): + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + data_frequency='daily' + ) + self.assertEqual(algo.data_frequency, 'daily') + self.assertEqual(algo.annualizer, 250) + + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + data_frequency='minute' + ) + self.assertEqual(algo.data_frequency, 'minute') + self.assertEqual(algo.annualizer, 250 * 6 * 60) + + algo = TestRegisterTransformAlgorithm( + sim_params=self.sim_params, + data_frequency='minute', + annualizer=10 + ) + self.assertEqual(algo.data_frequency, 'minute') + self.assertEqual(algo.annualizer, 10) + + def test_order_methods(self): + AlgoClasses = [TestOrderAlgorithm, + TestOrderValueAlgorithm, + TestTargetAlgorithm, + TestOrderPercentAlgorithm, + TestTargetPercentAlgorithm, + TestTargetValueAlgorithm] + + for AlgoClass in AlgoClasses: + algo = AlgoClass( + sim_params=self.sim_params, + data_frequency='daily' + ) + algo.run(self.df) + + def test_order_instant(self): + algo = TestOrderInstantAlgorithm(sim_params=self.sim_params, + data_frequency='daily', + instant_fill=True) + + algo.run(self.df) diff --git a/tests/test_data_util.py b/tests/test_data_util.py index ded2c8c..87234bf 100644 --- a/tests/test_data_util.py +++ b/tests/test_data_util.py @@ -61,8 +61,8 @@ def test_basics(self): def f(option='clever', n=500, copy=False): - items = range(5) - minor = range(20) + items = list(range(5)) + minor = list(range(20)) window = 100 periods = n diff --git a/tests/test_data_util.py.bak b/tests/test_data_util.py.bak new file mode 100644 index 0000000..ded2c8c --- /dev/null +++ b/tests/test_data_util.py.bak @@ -0,0 +1,108 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from collections import deque + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + +from zipline.utils.data import RollingPanel + + +class TestRollingPanel(unittest.TestCase): + + def test_basics(self): + items = ['foo', 'bar', 'baz'] + minor = ['A', 'B', 'C', 'D'] + + window = 10 + + rp = RollingPanel(window, items, minor, cap_multiple=2) + + dates = pd.date_range('2000-01-01', periods=30, tz='utc') + + major_deque = deque() + + frames = {} + + for i in range(30): + frame = pd.DataFrame(np.random.randn(3, 4), index=items, + columns=minor) + date = dates[i] + + rp.add_frame(date, frame) + + frames[date] = frame + major_deque.append(date) + + if i >= window: + major_deque.popleft() + + result = rp.get_current() + expected = pd.Panel(frames, items=list(major_deque), + major_axis=items, minor_axis=minor) + tm.assert_panel_equal(result, expected.swapaxes(0, 1)) + + +def f(option='clever', n=500, copy=False): + items = range(5) + minor = range(20) + window = 100 + periods = n + + dates = pd.date_range('2000-01-01', periods=periods, tz='utc') + frames = {} + + if option == 'clever': + rp = RollingPanel(window, items, minor, cap_multiple=2) + major_deque = deque() + dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), + index=items, columns=minor) + + for i in range(periods): + frame = dummy * (1 + 0.001 * i) + date = dates[i] + + rp.add_frame(date, frame) + + frames[date] = frame + major_deque.append(date) + + if i >= window: + del frames[major_deque.popleft()] + + result = rp.get_current() + if copy: + result = result.copy() + else: + major_deque = deque() + dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), + index=items, columns=minor) + + for i in range(periods): + frame = dummy * (1 + 0.001 * i) + date = dates[i] + frames[date] = frame + major_deque.append(date) + + if i >= window: + del frames[major_deque.popleft()] + + result = pd.Panel(frames, items=list(major_deque), + major_axis=items, minor_axis=minor) diff --git a/tests/test_events_through_risk.py b/tests/test_events_through_risk.py index 6026c4a..5e141ca 100644 --- a/tests/test_events_through_risk.py +++ b/tests/test_events_through_risk.py @@ -293,7 +293,7 @@ def test_minute_buy_and_hold(self): crm = algo.perf_tracker.cumulative_risk_metrics - first_msg = gen.next() + first_msg = next(gen) self.assertIsNotNone(first_msg, "There should be a message emitted.") @@ -310,7 +310,7 @@ def test_minute_buy_and_hold(self): crm.metrics.algorithm_volatility[algo.datetime.date()], "On the first day algorithm volatility does not exist.") - second_msg = gen.next() + second_msg = next(gen) self.assertIsNotNone(second_msg, "There should be a message " "emitted.") @@ -325,7 +325,7 @@ def test_minute_buy_and_hold(self): crm.algorithm_returns[-1], decimal=6) - third_msg = gen.next() + third_msg = next(gen) self.assertEqual(1, len(algo.portfolio.positions), "Number of positions should stay the same.") diff --git a/tests/test_events_through_risk.py.bak b/tests/test_events_through_risk.py.bak new file mode 100644 index 0000000..6026c4a --- /dev/null +++ b/tests/test_events_through_risk.py.bak @@ -0,0 +1,341 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import datetime +import pytz + +import numpy as np + +from zipline.finance.trading import SimulationParameters +from zipline.finance import trading +from zipline.algorithm import TradingAlgorithm +from zipline.protocol import ( + Event, + DATASOURCE_TYPE +) + + +class BuyAndHoldAlgorithm(TradingAlgorithm): + + SID_TO_BUY_AND_HOLD = 1 + + def initialize(self): + self.holding = False + + def handle_data(self, data): + if not self.holding: + self.order(self.SID_TO_BUY_AND_HOLD, 100) + self.holding = True + + +class TestEventsThroughRisk(unittest.TestCase): + + def test_daily_buy_and_hold(self): + + start_date = datetime.datetime( + year=2006, + month=1, + day=3, + hour=0, + minute=0, + tzinfo=pytz.utc) + end_date = datetime.datetime( + year=2006, + month=1, + day=5, + hour=0, + minute=0, + tzinfo=pytz.utc) + + sim_params = SimulationParameters( + period_start=start_date, + period_end=end_date, + emission_rate='daily' + ) + + algo = BuyAndHoldAlgorithm( + sim_params=sim_params, + data_frequency='daily') + + first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc) + second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc) + third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc) + + trade_bar_data = [ + Event({ + 'open_price': 10, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': first_date, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 15, + 'close_price': 20, + 'price': 20, + 'volume': 2000, + 'sid': 1, + 'dt': second_date, + 'source_id': 'test_list', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 20, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': third_date, + 'source_id': 'test_list', + 'type': DATASOURCE_TYPE.TRADE + }), + ] + benchmark_data = [ + Event({ + 'returns': 0.1, + 'dt': first_date, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.2, + 'dt': second_date, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.4, + 'dt': third_date, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + ] + + algo.benchmark_return_source = benchmark_data + algo.sources = list([trade_bar_data]) + gen = algo._create_generator(sim_params) + + # TODO: Hand derive these results. + # Currently, the output from the time of this writing to + # at least be an early warning against changes. + expected_algorithm_returns = { + first_date: 0.0, + second_date: -0.000350, + third_date: -0.050018 + } + + # TODO: Hand derive these results. + # Currently, the output from the time of this writing to + # at least be an early warning against changes. + expected_sharpe = { + first_date: np.nan, + second_date: -31.56903265, + third_date: -11.459888981, + } + + for bar in gen: + current_dt = algo.datetime + crm = algo.perf_tracker.cumulative_risk_metrics + + np.testing.assert_almost_equal( + crm.algorithm_returns[current_dt], + expected_algorithm_returns[current_dt], + decimal=6) + + np.testing.assert_almost_equal( + crm.metrics.sharpe[current_dt], + expected_sharpe[current_dt], + decimal=6, + err_msg="Mismatch at %s" % (current_dt,)) + + def test_minute_buy_and_hold(self): + with trading.TradingEnvironment(): + start_date = datetime.datetime( + year=2006, + month=1, + day=3, + hour=0, + minute=0, + tzinfo=pytz.utc) + end_date = datetime.datetime( + year=2006, + month=1, + day=5, + hour=0, + minute=0, + tzinfo=pytz.utc) + + sim_params = SimulationParameters( + period_start=start_date, + period_end=end_date, + emission_rate='daily', + data_frequency='minute') + + algo = BuyAndHoldAlgorithm( + sim_params=sim_params, + data_frequency='minute') + + first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc) + first_open, first_close = \ + trading.environment.get_open_and_close(first_date) + + second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc) + second_open, second_close = \ + trading.environment.get_open_and_close(second_date) + + third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc) + third_open, third_close = \ + trading.environment.get_open_and_close(third_date) + + benchmark_data = [ + Event({ + 'returns': 0.1, + 'dt': first_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.2, + 'dt': second_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.4, + 'dt': third_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + ] + + trade_bar_data = [ + Event({ + 'open_price': 10, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': first_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 10, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': first_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 15, + 'close_price': 20, + 'price': 20, + 'volume': 2000, + 'sid': 1, + 'dt': second_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 15, + 'close_price': 20, + 'price': 20, + 'volume': 2000, + 'sid': 1, + 'dt': second_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 20, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': third_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 20, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': third_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + ] + + algo.benchmark_return_source = benchmark_data + algo.sources = list([trade_bar_data]) + gen = algo._create_generator(sim_params) + + crm = algo.perf_tracker.cumulative_risk_metrics + + first_msg = gen.next() + + self.assertIsNotNone(first_msg, + "There should be a message emitted.") + + # Protects against bug where the positions appeared to be + # a day late, because benchmarks were triggering + # calculations before the events for the day were + # processed. + self.assertEqual(1, len(algo.portfolio.positions), "There should " + "be one position after the first day.") + + self.assertEquals( + 0, + crm.metrics.algorithm_volatility[algo.datetime.date()], + "On the first day algorithm volatility does not exist.") + + second_msg = gen.next() + + self.assertIsNotNone(second_msg, "There should be a message " + "emitted.") + + self.assertEqual(1, len(algo.portfolio.positions), + "Number of positions should stay the same.") + + # TODO: Hand derive. Current value is just a canary to + # detect changes. + np.testing.assert_almost_equal( + 0.050022510129558301, + crm.algorithm_returns[-1], + decimal=6) + + third_msg = gen.next() + + self.assertEqual(1, len(algo.portfolio.positions), + "Number of positions should stay the same.") + + self.assertIsNotNone(third_msg, "There should be a message " + "emitted.") + + # TODO: Hand derive. Current value is just a canary to + # detect changes. + np.testing.assert_almost_equal( + -0.047639464532418657, + crm.algorithm_returns[-1], + decimal=6) diff --git a/tests/test_finance.py b/tests/test_finance.py index 689edd7..91db876 100644 --- a/tests/test_finance.py +++ b/tests/test_finance.py @@ -314,7 +314,7 @@ def transaction_sim(self, **params): alternator = 1 order_date = start_date - for i in xrange(order_count): + for i in range(order_count): blotter.set_date(order_date) blotter.order(sid, order_amount * alternator ** i, None, None) @@ -334,7 +334,7 @@ def transaction_sim(self, **params): order_list = oo[sid] self.assertEqual(order_count, len(order_list)) - for i in xrange(order_count): + for i in range(order_count): order = order_list[i] self.assertEqual(order.sid, sid) self.assertEqual(order.amount, order_amount * alternator ** i) @@ -372,7 +372,7 @@ def transaction_sim(self, **params): self.assertEqual(len(transactions), len(order_list)) total_volume = 0 - for i in xrange(len(transactions)): + for i in range(len(transactions)): txn = transactions[i] total_volume += txn.amount if complete_fill: diff --git a/tests/test_finance.py.bak b/tests/test_finance.py.bak new file mode 100644 index 0000000..689edd7 --- /dev/null +++ b/tests/test_finance.py.bak @@ -0,0 +1,428 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for the zipline.finance package +""" +import itertools +import operator + +import pytz + +from unittest import TestCase +from datetime import datetime, timedelta + +import numpy as np + +from nose.tools import timed + +import zipline.protocol +from zipline.protocol import Event, DATASOURCE_TYPE + +import zipline.utils.factory as factory +import zipline.utils.simfactory as simfactory + +from zipline.finance.blotter import Blotter +from zipline.gens.composites import date_sorted_sources + +from zipline.finance import trading +from zipline.finance.trading import SimulationParameters + +from zipline.finance.performance import PerformanceTracker +from zipline.utils.test_utils import( + setup_logger, + teardown_logger, + assert_single_position +) + +DEFAULT_TIMEOUT = 15 # seconds +EXTENDED_TIMEOUT = 90 + + +class FinanceTestCase(TestCase): + + def setUp(self): + self.zipline_test_config = { + 'sid': 133, + } + + setup_logger(self) + + def tearDown(self): + teardown_logger(self) + + @timed(DEFAULT_TIMEOUT) + def test_factory_daily(self): + sim_params = factory.create_simulation_parameters() + trade_source = factory.create_daily_trade_source( + [133], + 200, + sim_params + ) + prev = None + for trade in trade_source: + if prev: + self.assertTrue(trade.dt > prev.dt) + prev = trade + + @timed(DEFAULT_TIMEOUT) + def test_trading_environment(self): + # holidays taken from: http://www.nyse.com/press/1191407641943.html + new_years = datetime(2008, 1, 1, tzinfo=pytz.utc) + mlk_day = datetime(2008, 1, 21, tzinfo=pytz.utc) + presidents = datetime(2008, 2, 18, tzinfo=pytz.utc) + good_friday = datetime(2008, 3, 21, tzinfo=pytz.utc) + memorial_day = datetime(2008, 5, 26, tzinfo=pytz.utc) + july_4th = datetime(2008, 7, 4, tzinfo=pytz.utc) + labor_day = datetime(2008, 9, 1, tzinfo=pytz.utc) + tgiving = datetime(2008, 11, 27, tzinfo=pytz.utc) + christmas = datetime(2008, 5, 25, tzinfo=pytz.utc) + a_saturday = datetime(2008, 8, 2, tzinfo=pytz.utc) + a_sunday = datetime(2008, 10, 12, tzinfo=pytz.utc) + holidays = [ + new_years, + mlk_day, + presidents, + good_friday, + memorial_day, + july_4th, + labor_day, + tgiving, + christmas, + a_saturday, + a_sunday + ] + + for holiday in holidays: + self.assertTrue(not trading.environment.is_trading_day(holiday)) + + first_trading_day = datetime(2008, 1, 2, tzinfo=pytz.utc) + last_trading_day = datetime(2008, 12, 31, tzinfo=pytz.utc) + workdays = [first_trading_day, last_trading_day] + + for workday in workdays: + self.assertTrue(trading.environment.is_trading_day(workday)) + + def test_simulation_parameters(self): + env = SimulationParameters( + period_start=datetime(2008, 1, 1, tzinfo=pytz.utc), + period_end=datetime(2008, 12, 31, tzinfo=pytz.utc), + capital_base=100000, + ) + + self.assertTrue(env.last_close.month == 12) + self.assertTrue(env.last_close.day == 31) + + @timed(DEFAULT_TIMEOUT) + def test_sim_params_days_in_period(self): + + # January 2008 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 5 + # 6 7 8 9 10 11 12 + # 13 14 15 16 17 18 19 + # 20 21 22 23 24 25 26 + # 27 28 29 30 31 + + env = SimulationParameters( + period_start=datetime(2007, 12, 31, tzinfo=pytz.utc), + period_end=datetime(2008, 1, 7, tzinfo=pytz.utc), + capital_base=100000, + ) + + expected_trading_days = ( + datetime(2007, 12, 31, tzinfo=pytz.utc), + # Skip new years + # holidays taken from: http://www.nyse.com/press/1191407641943.html + datetime(2008, 1, 2, tzinfo=pytz.utc), + datetime(2008, 1, 3, tzinfo=pytz.utc), + datetime(2008, 1, 4, tzinfo=pytz.utc), + # Skip Saturday + # Skip Sunday + datetime(2008, 1, 7, tzinfo=pytz.utc) + ) + + num_expected_trading_days = 5 + self.assertEquals(num_expected_trading_days, env.days_in_period) + np.testing.assert_array_equal(expected_trading_days, + env.trading_days.tolist()) + + @timed(EXTENDED_TIMEOUT) + def test_full_zipline(self): + # provide enough trades to ensure all orders are filled. + self.zipline_test_config['order_count'] = 100 + # making a small order amount, so that each order is filled + # in a single transaction, and txn_count == order_count. + self.zipline_test_config['order_amount'] = 25 + # No transactions can be filled on the first trade, so + # we have one extra trade to ensure all orders are filled. + self.zipline_test_config['trade_count'] = 101 + full_zipline = simfactory.create_test_zipline( + **self.zipline_test_config) + assert_single_position(self, full_zipline) + + # TODO: write tests for short sales + # TODO: write a test to do massive buying or shorting. + + @timed(DEFAULT_TIMEOUT) + def test_partially_filled_orders(self): + + # create a scenario where order size and trade size are equal + # so that orders must be spread out over several trades. + params = { + 'trade_count': 360, + 'trade_amount': 100, + 'trade_interval': timedelta(minutes=1), + 'order_count': 2, + 'order_amount': 100, + 'order_interval': timedelta(minutes=1), + # because we placed an order for 100 shares, and the volume + # of each trade is 100, the simulator should spread the order + # into 4 trades of 25 shares per order. + 'expected_txn_count': 8, + 'expected_txn_volume': 2 * 100 + } + + self.transaction_sim(**params) + + # same scenario, but with short sales + params2 = { + 'trade_count': 360, + 'trade_amount': 100, + 'trade_interval': timedelta(minutes=1), + 'order_count': 2, + 'order_amount': -100, + 'order_interval': timedelta(minutes=1), + 'expected_txn_count': 8, + 'expected_txn_volume': 2 * -100 + } + + self.transaction_sim(**params2) + + @timed(DEFAULT_TIMEOUT) + def test_collapsing_orders(self): + # create a scenario where order.amount <<< trade.volume + # to test that several orders can be covered properly by one trade, + # but are represented by multiple transactions. + params1 = { + 'trade_count': 6, + 'trade_amount': 100, + 'trade_interval': timedelta(hours=1), + 'order_count': 24, + 'order_amount': 1, + 'order_interval': timedelta(minutes=1), + # because we placed an orders totaling less than 25% of one trade + # the simulator should produce just one transaction. + 'expected_txn_count': 24, + 'expected_txn_volume': 24 + } + self.transaction_sim(**params1) + + # second verse, same as the first. except short! + params2 = { + 'trade_count': 6, + 'trade_amount': 100, + 'trade_interval': timedelta(hours=1), + 'order_count': 24, + 'order_amount': -1, + 'order_interval': timedelta(minutes=1), + 'expected_txn_count': 24, + 'expected_txn_volume': -24 + } + self.transaction_sim(**params2) + + # Runs the collapsed trades over daily trade intervals. + # Ensuring that our delay works for daily intervals as well. + params3 = { + 'trade_count': 6, + 'trade_amount': 100, + 'trade_interval': timedelta(days=1), + 'order_count': 24, + 'order_amount': 1, + 'order_interval': timedelta(minutes=1), + 'expected_txn_count': 24, + 'expected_txn_volume': 24 + } + self.transaction_sim(**params3) + + @timed(DEFAULT_TIMEOUT) + def test_alternating_long_short(self): + # create a scenario where we alternate buys and sells + params1 = { + 'trade_count': int(6.5 * 60 * 4), + 'trade_amount': 100, + 'trade_interval': timedelta(minutes=1), + 'order_count': 4, + 'order_amount': 10, + 'order_interval': timedelta(hours=24), + 'alternate': True, + 'complete_fill': True, + 'expected_txn_count': 4, + 'expected_txn_volume': 0 # equal buys and sells + } + self.transaction_sim(**params1) + + def transaction_sim(self, **params): + """ This is a utility method that asserts expected + results for conversion of orders to transactions given a + trade history""" + + trade_count = params['trade_count'] + trade_interval = params['trade_interval'] + order_count = params['order_count'] + order_amount = params['order_amount'] + order_interval = params['order_interval'] + expected_txn_count = params['expected_txn_count'] + expected_txn_volume = params['expected_txn_volume'] + # optional parameters + # --------------------- + # if present, alternate between long and short sales + alternate = params.get('alternate') + # if present, expect transaction amounts to match orders exactly. + complete_fill = params.get('complete_fill') + + sid = 1 + sim_params = factory.create_simulation_parameters() + blotter = Blotter() + price = [10.1] * trade_count + volume = [100] * trade_count + start_date = sim_params.first_open + + generated_trades = factory.create_trade_history( + sid, + price, + volume, + trade_interval, + sim_params + ) + + if alternate: + alternator = -1 + else: + alternator = 1 + + order_date = start_date + for i in xrange(order_count): + + blotter.set_date(order_date) + blotter.order(sid, order_amount * alternator ** i, None, None) + + order_date = order_date + order_interval + # move after market orders to just after market next + # market open. + if order_date.hour >= 21: + if order_date.minute >= 00: + order_date = order_date + timedelta(days=1) + order_date = order_date.replace(hour=14, minute=30) + + # there should now be one open order list stored under the sid + oo = blotter.open_orders + self.assertEqual(len(oo), 1) + self.assertTrue(sid in oo) + order_list = oo[sid] + self.assertEqual(order_count, len(order_list)) + + for i in xrange(order_count): + order = order_list[i] + self.assertEqual(order.sid, sid) + self.assertEqual(order.amount, order_amount * alternator ** i) + + tracker = PerformanceTracker(sim_params) + + benchmark_returns = [ + Event({'dt': dt, + 'returns': ret, + 'type': + zipline.protocol.DATASOURCE_TYPE.BENCHMARK, + 'source_id': 'benchmarks'}) + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() + ] + + generated_events = date_sorted_sources(generated_trades, + benchmark_returns) + + # this approximates the loop inside TradingSimulationClient + transactions = [] + for dt, events in itertools.groupby(generated_events, + operator.attrgetter('dt')): + for event in events: + if event.type == DATASOURCE_TYPE.TRADE: + + for txn, order in blotter.process_trade(event): + transactions.append(txn) + tracker.process_event(txn) + + tracker.process_event(event) + + if complete_fill: + self.assertEqual(len(transactions), len(order_list)) + + total_volume = 0 + for i in xrange(len(transactions)): + txn = transactions[i] + total_volume += txn.amount + if complete_fill: + order = order_list[i] + self.assertEqual(order.amount, txn.amount) + + self.assertEqual(total_volume, expected_txn_volume) + self.assertEqual(len(transactions), expected_txn_count) + + cumulative_pos = tracker.cumulative_performance.positions[sid] + self.assertEqual(total_volume, cumulative_pos.amount) + + # the open orders should now be empty + oo = blotter.open_orders + self.assertTrue(sid in oo) + order_list = oo[sid] + self.assertEqual(0, len(order_list)) + + def test_blotter_processes_splits(self): + sim_params = factory.create_simulation_parameters() + blotter = Blotter() + blotter.set_date(sim_params.period_start) + + # set up two open limit orders with very low limit prices, + # one for sid 1 and one for sid 2 + blotter.order(1, 100, 10, None, None) + blotter.order(2, 100, 10, None, None) + + # send in a split for sid 2 + split_event = factory.create_split(2, 0.33333, + sim_params.period_start + + timedelta(days=1)) + + blotter.process_split(split_event) + + for sid in [1, 2]: + order_lists = blotter.open_orders[sid] + self.assertIsNotNone(order_lists) + self.assertEqual(1, len(order_lists)) + + aapl_order = blotter.open_orders[1][0].to_dict() + fls_order = blotter.open_orders[2][0].to_dict() + + # make sure the aapl order didn't change + self.assertEqual(100, aapl_order['amount']) + self.assertEqual(10, aapl_order['limit']) + self.assertEqual(1, aapl_order['sid']) + + # make sure the fls order did change + # to 300 shares at 3.33 + self.assertEqual(300, fls_order['amount']) + self.assertEqual(3.33, fls_order['limit']) + self.assertEqual(2, fls_order['sid']) diff --git a/tests/test_perf_tracking.py b/tests/test_perf_tracking.py index 738c80f..088c9b8 100644 --- a/tests/test_perf_tracking.py +++ b/tests/test_perf_tracking.py @@ -431,7 +431,7 @@ def test_ending_before_pay_date(self): pay_date = self.sim_params.first_open # find pay date that is much later. - for i in xrange(30): + for i in range(30): pay_date = factory.get_next_trading_dt(pay_date, oneday) dividend = factory.create_dividend( 1, diff --git a/tests/test_perf_tracking.py.bak b/tests/test_perf_tracking.py.bak new file mode 100644 index 0000000..738c80f --- /dev/null +++ b/tests/test_perf_tracking.py.bak @@ -0,0 +1,1282 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import heapq +import logging +import operator + +import unittest +from nose_parameterized import parameterized +import datetime +import pytz +import itertools + +import zipline.utils.factory as factory +import zipline.finance.performance as perf +from zipline.finance.slippage import Transaction, create_transaction +import zipline.utils.math_utils as zp_math + +from zipline.gens.composites import date_sorted_sources +from zipline.finance.trading import SimulationParameters +from zipline.finance.blotter import Order +from zipline.finance import trading +from zipline.protocol import DATASOURCE_TYPE +from zipline.utils.factory import create_random_simulation_parameters +import zipline.protocol +from zipline.protocol import Event + +logger = logging.getLogger('Test Perf Tracking') + +onesec = datetime.timedelta(seconds=1) +oneday = datetime.timedelta(days=1) +tradingday = datetime.timedelta(hours=6, minutes=30) + + +def create_txn(event, price, amount): + mock_order = Order(None, None, event.sid, id=None) + return create_transaction(event, mock_order, price, amount) + + +def benchmark_events_in_range(sim_params): + return [ + Event({'dt': dt, + 'returns': ret, + 'type': + zipline.protocol.DATASOURCE_TYPE.BENCHMARK, + 'source_id': 'benchmarks'}) + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() + ] + + +def calculate_results(host, events): + + perf_tracker = perf.PerformanceTracker(host.sim_params) + + all_events = heapq.merge( + ((event.dt, event) for event in events), + ((event.dt, event) for event in host.benchmark_events)) + + filtered_events = [(date, filt_event) for (date, filt_event) + in all_events if date <= events[-1].dt] + filtered_events.sort(key=lambda x: x[0]) + grouped_events = itertools.groupby(filtered_events, lambda x: x[0]) + results = [] + + bm_updated = False + for date, group in grouped_events: + for _, event in group: + perf_tracker.process_event(event) + if event.type == DATASOURCE_TYPE.BENCHMARK: + bm_updated = True + if bm_updated: + msg = perf_tracker.handle_market_close() + results.append(msg) + bm_updated = False + return results + + +class TestSplitPerformance(unittest.TestCase): + def setUp(self): + self.sim_params, self.dt, self.end_dt = \ + create_random_simulation_parameters() + + # start with $10,000 + self.sim_params.capital_base = 10e3 + + self.benchmark_events = benchmark_events_in_range(self.sim_params) + + def test_split_long_position(self): + with trading.TradingEnvironment() as env: + events = factory.create_trade_history( + 1, + [20, 20], + [100, 100], + oneday, + self.sim_params + ) + + # set up a long position in sid 1 + # 100 shares at $20 apiece = $2000 position + events.insert(0, create_txn(events[0], 20, 100)) + + # set up a split with ratio 3 + events.append(factory.create_split(1, 3, + env.next_trading_day(events[1].dt))) + + results = calculate_results(self, events) + + # should have 33 shares (at $60 apiece) and $20 in cash + self.assertEqual(2, len(results)) + + latest_positions = results[1]['daily_perf']['positions'] + self.assertEqual(1, len(latest_positions)) + + # check the last position to make sure it's been updated + position = latest_positions[0] + + self.assertEqual(1, position['sid']) + self.assertEqual(33, position['amount']) + self.assertEqual(60, position['cost_basis']) + self.assertEqual(60, position['last_sale_price']) + + # since we started with $10000, and we spent $2000 on the + # position, but then got $20 back, we should have $8020 + # (or close to it) in cash. + + # we won't get exactly 8020 because sometimes a split is + # denoted as a ratio like 0.3333, and we lose some digits + # of precision. thus, make sure we're pretty close. + daily_perf = results[1]['daily_perf'] + + self.assertTrue( + zp_math.tolerant_equals(8020, + daily_perf['ending_cash'], 1)) + + +class TestCommissionEvents(unittest.TestCase): + + def setUp(self): + self.sim_params, self.dt, self.end_dt = \ + create_random_simulation_parameters() + + logger.info("sim_params: %s, dt: %s, end_dt: %s" % + (self.sim_params, self.dt, self.end_dt)) + + self.sim_params.capital_base = 10e3 + + self.benchmark_events = benchmark_events_in_range(self.sim_params) + + def test_commission_event(self): + with trading.TradingEnvironment(): + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + cash_adj_dt = self.sim_params.first_open \ + + datetime.timedelta(hours=3) + cash_adjustment = factory.create_commission(1, 300.0, + cash_adj_dt) + + # Insert a purchase order. + events.insert(0, create_txn(events[0], 20, 1)) + + events.insert(1, cash_adjustment) + results = calculate_results(self, events) + # Validate that we lost 320 dollars from our cash pool. + self.assertEqual(results[-1]['cumulative_perf']['ending_cash'], + 9680) + # Validate that the cost basis of our position changed. + self.assertEqual(results[-1]['daily_perf']['positions'] + [0]['cost_basis'], 320.0) + + def test_commission_zero_position(self): + """ + Ensure no div-by-zero errors. + """ + with trading.TradingEnvironment(): + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + cash_adj_dt = self.sim_params.first_open \ + + datetime.timedelta(hours=3) + cash_adjustment = factory.create_commission(1, 300.0, + cash_adj_dt) + + # Insert a purchase order. + events.insert(0, create_txn(events[0], 20, 1)) + + # Sell that order. + events.insert(1, create_txn(events[1], 20, -1)) + + events.insert(2, cash_adjustment) + results = calculate_results(self, events) + # Validate that we lost 300 dollars from our cash pool. + self.assertEqual(results[-1]['cumulative_perf']['ending_cash'], + 9700) + + def test_commission_no_position(self): + """ + Ensure no position-not-found or sid-not-found errors. + """ + with trading.TradingEnvironment(): + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + cash_adj_dt = self.sim_params.first_open \ + + datetime.timedelta(hours=3) + cash_adjustment = factory.create_commission(1, 300.0, + cash_adj_dt) + + events.insert(0, cash_adjustment) + results = calculate_results(self, events) + # Validate that we lost 300 dollars from our cash pool. + self.assertEqual(results[-1]['cumulative_perf']['ending_cash'], + 9700) + + +class TestDividendPerformance(unittest.TestCase): + + def setUp(self): + + self.sim_params, self.dt, self.end_dt = \ + create_random_simulation_parameters() + + self.sim_params.capital_base = 10e3 + + self.benchmark_events = benchmark_events_in_range(self.sim_params) + + def test_market_hours_calculations(self): + with trading.TradingEnvironment(): + # DST in US/Eastern began on Sunday March 14, 2010 + before = datetime.datetime(2010, 3, 12, 14, 31, tzinfo=pytz.utc) + after = factory.get_next_trading_dt( + before, + datetime.timedelta(days=1) + ) + self.assertEqual(after.hour, 13) + + def test_long_position_receives_dividend(self): + with trading.TradingEnvironment(): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + # declared date, when the algorithm finds out about + # the dividend + events[1].dt, + # ex_date, when the algorithm is credited with the + # dividend + events[1].dt, + # pay date, when the algorithm receives the dividend. + events[2].dt + ) + + txn = create_txn(events[0], 10.0, 100) + events.insert(0, txn) + events.insert(1, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0.0, 0.0, 0.1, 0.1, 0.1]) + daily_returns = [event['daily_perf']['returns'] + for event in results] + self.assertEqual(daily_returns, [0.0, 0.0, 0.10, 0.0, 0.0]) + cash_flows = [event['daily_perf']['capital_used'] + for event in results] + self.assertEqual(cash_flows, [-1000, 0, 1000, 0, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [-1000, -1000, 0, 0, 0]) + cash_pos = \ + [event['cumulative_perf']['ending_cash'] for event in results] + self.assertEqual(cash_pos, [9000, 9000, 10000, 10000, 10000]) + + def test_post_ex_long_position_receives_no_dividend(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + events[0].dt, + events[1].dt, + events[2].dt + ) + + events.insert(1, dividend) + txn = create_txn(events[3], 10.0, 100) + events.insert(4, txn) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0, 0, 0, 0, 0]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0, 0, 0, 0, 0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [0, 0, -1000, 0, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [0, 0, -1000, -1000, -1000]) + + def test_selling_before_dividend_payment_still_gets_paid(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + events[0].dt, + events[1].dt, + events[3].dt + ) + + buy_txn = create_txn(events[0], 10.0, 100) + events.insert(1, buy_txn) + sell_txn = create_txn(events[3], 10.0, -100) + events.insert(4, sell_txn) + events.insert(0, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0, 0, 0, 0.1, 0.1]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0, 0, 0, 0.1, 0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [-1000, 0, 1000, 1000, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [-1000, -1000, 0, 1000, 1000]) + + def test_buy_and_sell_before_ex(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10, 10], + [100, 100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + events[3].dt, + events[4].dt, + events[5].dt + ) + + buy_txn = create_txn(events[1], 10.0, 100) + events.insert(1, buy_txn) + sell_txn = create_txn(events[3], 10.0, -100) + events.insert(3, sell_txn) + events.insert(1, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 6) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0, 0, 0, 0, 0, 0]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0, 0, 0, 0, 0, 0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [0, -1000, 1000, 0, 0, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [0, -1000, 0, 0, 0, 0]) + + def test_ending_before_pay_date(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + pay_date = self.sim_params.first_open + # find pay date that is much later. + for i in xrange(30): + pay_date = factory.get_next_trading_dt(pay_date, oneday) + dividend = factory.create_dividend( + 1, + 10.00, + events[0].dt, + events[1].dt, + pay_date + ) + + buy_txn = create_txn(events[1], 10.0, 100) + events.insert(2, buy_txn) + events.insert(1, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0, 0, 0, 0.0, 0.0]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0, 0, 0, 0, 0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [0, -1000, 0, 0, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual( + cumulative_cash_flows, + [0, -1000, -1000, -1000, -1000] + ) + + def test_short_position_pays_dividend(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + # declare at open of test + events[0].dt, + # ex_date same as trade 2 + events[2].dt, + events[3].dt + ) + + txn = create_txn(events[1], 10.0, -100) + events.insert(1, txn) + events.insert(0, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0.0, 0.0, 0.0, -0.1, -0.1]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0.0, 0.0, 0.0, -0.1, 0.0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [0, 1000, 0, -1000, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [0, 1000, 1000, 0, 0]) + + def test_no_position_receives_no_dividend(self): + # post some trades in the market + events = factory.create_trade_history( + 1, + [10, 10, 10, 10, 10], + [100, 100, 100, 100, 100], + oneday, + self.sim_params + ) + + dividend = factory.create_dividend( + 1, + 10.00, + events[0].dt, + events[1].dt, + events[2].dt + ) + + events.insert(1, dividend) + results = calculate_results(self, events) + + self.assertEqual(len(results), 5) + cumulative_returns = \ + [event['cumulative_perf']['returns'] for event in results] + self.assertEqual(cumulative_returns, [0.0, 0.0, 0.0, 0.0, 0.0]) + daily_returns = [event['daily_perf']['returns'] for event in results] + self.assertEqual(daily_returns, [0.0, 0.0, 0.0, 0.0, 0.0]) + cash_flows = [event['daily_perf']['capital_used'] for event in results] + self.assertEqual(cash_flows, [0, 0, 0, 0, 0]) + cumulative_cash_flows = \ + [event['cumulative_perf']['capital_used'] for event in results] + self.assertEqual(cumulative_cash_flows, [0, 0, 0, 0, 0]) + + +class TestDividendPerformanceHolidayStyle(TestDividendPerformance): + + # The holiday tests begins the simulation on the day + # before Thanksgiving, so that the next trading day is + # two days ahead. Any tests that hard code events + # to be start + oneday will fail, since those events will + # be skipped by the simulation. + + def setUp(self): + self.dt = datetime.datetime(2003, 11, 30, tzinfo=pytz.utc) + self.end_dt = datetime.datetime(2004, 11, 25, tzinfo=pytz.utc) + self.sim_params = SimulationParameters( + self.dt, + self.end_dt) + self.benchmark_events = benchmark_events_in_range(self.sim_params) + + +class TestPositionPerformance(unittest.TestCase): + + def setUp(self): + self.sim_params, self.dt, self.end_dt = \ + create_random_simulation_parameters() + + self.benchmark_events = benchmark_events_in_range(self.sim_params) + + def test_long_position(self): + """ + verify that the performance period calculates properly for a + single buy transaction + """ + # post some trades in the market + trades = factory.create_trade_history( + 1, + [10, 10, 10, 11], + [100, 100, 100, 100], + onesec, + self.sim_params + ) + + txn = create_txn(trades[1], 10.0, 100) + pp = perf.PerformancePeriod(1000.0) + + pp.execute_transaction(txn) + for trade in trades: + pp.update_last_sale(trade) + + pp.calculate_performance() + + self.assertEqual( + pp.period_cash_flow, + -1 * txn.price * txn.amount, + "capital used should be equal to the opposite of the transaction \ + cost of sole txn in test" + ) + + self.assertEqual( + len(pp.positions), + 1, + "should be just one position") + + self.assertEqual( + pp.positions[1].sid, + txn.sid, + "position should be in security with id 1") + + self.assertEqual( + pp.positions[1].amount, + txn.amount, + "should have a position of {sharecount} shares".format( + sharecount=txn.amount + ) + ) + + self.assertEqual( + pp.positions[1].cost_basis, + txn.price, + "should have a cost basis of 10" + ) + + self.assertEqual( + pp.positions[1].last_sale_price, + trades[-1]['price'], + "last sale should be same as last trade. \ + expected {exp} actual {act}".format( + exp=trades[-1]['price'], + act=pp.positions[1].last_sale_price) + ) + + self.assertEqual( + pp.ending_value, + 1100, + "ending value should be price of last trade times number of \ + shares in position" + ) + + self.assertEqual(pp.pnl, 100, "gain of 1 on 100 shares should be 100") + + def test_short_position(self): + """verify that the performance period calculates properly for a \ +single short-sale transaction""" + trades = factory.create_trade_history( + 1, + [10, 10, 10, 11, 10, 9], + [100, 100, 100, 100, 100, 100], + onesec, + self.sim_params + ) + + trades_1 = trades[:-2] + + txn = create_txn(trades[1], 10.0, -100) + pp = perf.PerformancePeriod(1000.0) + + pp.execute_transaction(txn) + for trade in trades_1: + pp.update_last_sale(trade) + + pp.calculate_performance() + + self.assertEqual( + pp.period_cash_flow, + -1 * txn.price * txn.amount, + "capital used should be equal to the opposite of the transaction\ + cost of sole txn in test" + ) + + self.assertEqual( + len(pp.positions), + 1, + "should be just one position") + + self.assertEqual( + pp.positions[1].sid, + txn.sid, + "position should be in security from the transaction" + ) + + self.assertEqual( + pp.positions[1].amount, + -100, + "should have a position of -100 shares" + ) + + self.assertEqual( + pp.positions[1].cost_basis, + txn.price, + "should have a cost basis of 10" + ) + + self.assertEqual( + pp.positions[1].last_sale_price, + trades_1[-1]['price'], + "last sale should be price of last trade" + ) + + self.assertEqual( + pp.ending_value, + -1100, + "ending value should be price of last trade times number of \ + shares in position" + ) + + self.assertEqual(pp.pnl, -100, "gain of 1 on 100 shares should be 100") + + # simulate additional trades, and ensure that the position value + # reflects the new price + trades_2 = trades[-2:] + + # simulate a rollover to a new period + pp.rollover() + + for trade in trades_2: + pp.update_last_sale(trade) + + pp.calculate_performance() + + self.assertEqual( + pp.period_cash_flow, + 0, + "capital used should be zero, there were no transactions in \ + performance period" + ) + + self.assertEqual( + len(pp.positions), + 1, + "should be just one position" + ) + + self.assertEqual( + pp.positions[1].sid, + txn.sid, + "position should be in security from the transaction" + ) + + self.assertEqual( + pp.positions[1].amount, + -100, + "should have a position of -100 shares" + ) + + self.assertEqual( + pp.positions[1].cost_basis, + txn.price, + "should have a cost basis of 10" + ) + + self.assertEqual( + pp.positions[1].last_sale_price, + trades_2[-1].price, + "last sale should be price of last trade" + ) + + self.assertEqual( + pp.ending_value, + -900, + "ending value should be price of last trade times number of \ + shares in position") + + self.assertEqual( + pp.pnl, + 200, + "drop of 2 on -100 shares should be 200" + ) + + # now run a performance period encompassing the entire trade sample. + ppTotal = perf.PerformancePeriod(1000.0) + + for trade in trades_1: + ppTotal.update_last_sale(trade) + + ppTotal.execute_transaction(txn) + + for trade in trades_2: + ppTotal.update_last_sale(trade) + + ppTotal.calculate_performance() + + self.assertEqual( + ppTotal.period_cash_flow, + -1 * txn.price * txn.amount, + "capital used should be equal to the opposite of the transaction \ +cost of sole txn in test" + ) + + self.assertEqual( + len(ppTotal.positions), + 1, + "should be just one position" + ) + self.assertEqual( + ppTotal.positions[1].sid, + txn.sid, + "position should be in security from the transaction" + ) + + self.assertEqual( + ppTotal.positions[1].amount, + -100, + "should have a position of -100 shares" + ) + + self.assertEqual( + ppTotal.positions[1].cost_basis, + txn.price, + "should have a cost basis of 10" + ) + + self.assertEqual( + ppTotal.positions[1].last_sale_price, + trades_2[-1].price, + "last sale should be price of last trade" + ) + + self.assertEqual( + ppTotal.ending_value, + -900, + "ending value should be price of last trade times number of \ + shares in position") + + self.assertEqual( + ppTotal.pnl, + 100, + "drop of 1 on -100 shares should be 100" + ) + + def test_covering_short(self): + """verify performance where short is bought and covered, and shares \ +trade after cover""" + + trades = factory.create_trade_history( + 1, + [10, 10, 10, 11, 9, 8, 7, 8, 9, 10], + [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], + onesec, + self.sim_params + ) + + short_txn = create_txn( + trades[1], + 10.0, + -100, + ) + + cover_txn = create_txn(trades[6], 7.0, 100) + pp = perf.PerformancePeriod(1000.0) + + pp.execute_transaction(short_txn) + pp.execute_transaction(cover_txn) + + for trade in trades: + pp.update_last_sale(trade) + + pp.calculate_performance() + + short_txn_cost = short_txn.price * short_txn.amount + cover_txn_cost = cover_txn.price * cover_txn.amount + + self.assertEqual( + pp.period_cash_flow, + -1 * short_txn_cost - cover_txn_cost, + "capital used should be equal to the net transaction costs" + ) + + self.assertEqual( + len(pp.positions), + 1, + "should be just one position" + ) + + self.assertEqual( + pp.positions[1].sid, + short_txn.sid, + "position should be in security from the transaction" + ) + + self.assertEqual( + pp.positions[1].amount, + 0, + "should have a position of -100 shares" + ) + + self.assertEqual( + pp.positions[1].cost_basis, + 0, + "a covered position should have a cost basis of 0" + ) + + self.assertEqual( + pp.positions[1].last_sale_price, + trades[-1].price, + "last sale should be price of last trade" + ) + + self.assertEqual( + pp.ending_value, + 0, + "ending value should be price of last trade times number of \ +shares in position" + ) + + self.assertEqual( + pp.pnl, + 300, + "gain of 1 on 100 shares should be 300" + ) + + def test_cost_basis_calc(self): + trades = factory.create_trade_history( + 1, + [10, 11, 11, 12], + [100, 100, 100, 100], + onesec, + self.sim_params + ) + + transactions = factory.create_txn_history( + 1, + [10, 11, 11, 12], + [100, 100, 100, 100], + onesec, + self.sim_params + ) + + pp = perf.PerformancePeriod(1000.0) + + for txn in transactions: + pp.execute_transaction(txn) + + for trade in trades: + pp.update_last_sale(trade) + + pp.calculate_performance() + + self.assertEqual( + pp.positions[1].last_sale_price, + trades[-1].price, + "should have a last sale of 12, got {val}".format( + val=pp.positions[1].last_sale_price) + ) + + self.assertEqual( + pp.positions[1].cost_basis, + 11, + "should have a cost basis of 11" + ) + + self.assertEqual( + pp.pnl, + 400 + ) + + down_tick = factory.create_trade( + 1, + 10.0, + 100, + trades[-1].dt + onesec) + + saleTxn = create_txn( + down_tick, + 10.0, + -100) + + pp.rollover() + + pp.execute_transaction(saleTxn) + pp.update_last_sale(down_tick) + + pp.calculate_performance() + self.assertEqual( + pp.positions[1].last_sale_price, + 10, + "should have a last sale of 10, was {val}".format( + val=pp.positions[1].last_sale_price) + ) + + self.assertEqual( + round(pp.positions[1].cost_basis, 2), + 11.33, + "should have a cost basis of 11.33" + ) + + # print "second period pnl is {pnl}".format(pnl=pp2.pnl) + self.assertEqual(pp.pnl, -800, "this period goes from +400 to -400") + + pp3 = perf.PerformancePeriod(1000.0) + + transactions.append(saleTxn) + for txn in transactions: + pp3.execute_transaction(txn) + + trades.append(down_tick) + for trade in trades: + pp3.update_last_sale(trade) + + pp3.calculate_performance() + self.assertEqual( + pp3.positions[1].last_sale_price, + 10, + "should have a last sale of 10" + ) + + self.assertEqual( + round(pp3.positions[1].cost_basis, 2), + 11.33, + "should have a cost basis of 11.33" + ) + + self.assertEqual( + pp3.pnl, + -400, + "should be -400 for all trades and transactions in period" + ) + + +class TestPerformanceTracker(unittest.TestCase): + + NumDaysToDelete = collections.namedtuple( + 'NumDaysToDelete', ('start', 'middle', 'end')) + + @parameterized.expand([ + ("Don't delete any events", + NumDaysToDelete(start=0, middle=0, end=0)), + ("Delete first day of events", + NumDaysToDelete(start=1, middle=0, end=0)), + ("Delete first two days of events", + NumDaysToDelete(start=2, middle=0, end=0)), + ("Delete one day of events from the middle", + NumDaysToDelete(start=0, middle=1, end=0)), + ("Delete two events from the middle", + NumDaysToDelete(start=0, middle=2, end=0)), + ("Delete last day of events", + NumDaysToDelete(start=0, middle=0, end=1)), + ("Delete last two days of events", + NumDaysToDelete(start=0, middle=0, end=2)), + ("Delete all but one event.", + NumDaysToDelete(start=2, middle=1, end=2)), + ]) + def test_tracker(self, parameter_comment, days_to_delete): + """ + @days_to_delete - configures which days in the data set we should + remove, used for ensuring that we still return performance messages + even when there is no data. + """ + # This date range covers Columbus day, + # however Columbus day is not a market holiday + # + # October 2008 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 + # 5 6 7 8 9 10 11 + # 12 13 14 15 16 17 18 + # 19 20 21 22 23 24 25 + # 26 27 28 29 30 31 + start_dt = datetime.datetime(year=2008, + month=10, + day=9, + tzinfo=pytz.utc) + end_dt = datetime.datetime(year=2008, + month=10, + day=16, + tzinfo=pytz.utc) + + trade_count = 6 + sid = 133 + price = 10.1 + price_list = [price] * trade_count + volume = [100] * trade_count + trade_time_increment = datetime.timedelta(days=1) + + sim_params = SimulationParameters( + period_start=start_dt, + period_end=end_dt + ) + + benchmark_events = benchmark_events_in_range(sim_params) + + trade_history = factory.create_trade_history( + sid, + price_list, + volume, + trade_time_increment, + sim_params, + source_id="factory1" + ) + + sid2 = 134 + price2 = 12.12 + price2_list = [price2] * trade_count + trade_history2 = factory.create_trade_history( + sid2, + price2_list, + volume, + trade_time_increment, + sim_params, + source_id="factory2" + ) + # 'middle' start of 3 depends on number of days == 7 + middle = 3 + + # First delete from middle + if days_to_delete.middle: + del trade_history[middle:(middle + days_to_delete.middle)] + del trade_history2[middle:(middle + days_to_delete.middle)] + + # Delete start + if days_to_delete.start: + del trade_history[:days_to_delete.start] + del trade_history2[:days_to_delete.start] + + # Delete from end + if days_to_delete.end: + del trade_history[-days_to_delete.end:] + del trade_history2[-days_to_delete.end:] + + sim_params.first_open = \ + sim_params.calculate_first_open() + sim_params.last_close = \ + sim_params.calculate_last_close() + sim_params.capital_base = 1000.0 + sim_params.frame_index = [ + 'sid', + 'volume', + 'dt', + 'price', + 'changed'] + perf_tracker = perf.PerformanceTracker( + sim_params + ) + + events = date_sorted_sources(trade_history, trade_history2) + + events = [event for event in + self.trades_with_txns(events, trade_history[0].dt)] + + # Extract events with transactions to use for verification. + txns = [event for event in + events if event.type == DATASOURCE_TYPE.TRANSACTION] + + orders = [event for event in + events if event.type == DATASOURCE_TYPE.ORDER] + + all_events = (msg[1] for msg in heapq.merge( + ((event.dt, event) for event in events), + ((event.dt, event) for event in benchmark_events))) + + filtered_events = [filt_event for filt_event + in all_events if event.dt <= end_dt] + filtered_events.sort(key=lambda x: x.dt) + grouped_events = itertools.groupby(filtered_events, lambda x: x.dt) + perf_messages = [] + + for date, group in grouped_events: + for event in group: + perf_tracker.process_event(event) + msg = perf_tracker.handle_market_close() + perf_messages.append(msg) + + self.assertEqual(perf_tracker.txn_count, len(txns)) + self.assertEqual(perf_tracker.txn_count, len(orders)) + + cumulative_pos = perf_tracker.cumulative_performance.positions[sid] + expected_size = len(txns) / 2 * -25 + self.assertEqual(cumulative_pos.amount, expected_size) + + self.assertEqual(len(perf_messages), + sim_params.days_in_period) + + def trades_with_txns(self, events, no_txn_dt): + for event in events: + + # create a transaction for all but + # first trade in each sid, to simulate None transaction + if event.dt != no_txn_dt: + order = Order( + sid=event.sid, + amount=-25, + dt=event.dt + ) + yield order + yield event + txn = Transaction( + sid=event.sid, + amount=-25, + dt=event.dt, + price=10.0, + commission=0.50, + order_id=order.id + ) + yield txn + else: + yield event + + def test_minute_tracker(self): + """ Tests minute performance tracking.""" + with trading.TradingEnvironment(): + start_dt = trading.environment.exchange_dt_in_utc( + datetime.datetime(2013, 3, 1, 9, 31)) + end_dt = trading.environment.exchange_dt_in_utc( + datetime.datetime(2013, 3, 1, 16, 0)) + + sim_params = SimulationParameters( + period_start=start_dt, + period_end=end_dt, + emission_rate='minute' + ) + tracker = perf.PerformanceTracker(sim_params) + + foo_event_1 = factory.create_trade('foo', 10.0, 20, start_dt) + order_event_1 = Order(sid=foo_event_1.sid, + amount=-25, + dt=foo_event_1.dt) + bar_event_1 = factory.create_trade('bar', 100.0, 200, start_dt) + txn_event_1 = Transaction(sid=foo_event_1.sid, + amount=-25, + dt=foo_event_1.dt, + price=10.0, + commission=0.50, + order_id=order_event_1.id) + benchmark_event_1 = Event({ + 'dt': start_dt, + 'returns': 0.01, + 'type': DATASOURCE_TYPE.BENCHMARK + }) + + foo_event_2 = factory.create_trade( + 'foo', 11.0, 20, start_dt + datetime.timedelta(minutes=1)) + bar_event_2 = factory.create_trade( + 'bar', 11.0, 20, start_dt + datetime.timedelta(minutes=1)) + benchmark_event_2 = Event({ + 'dt': start_dt + datetime.timedelta(minutes=1), + 'returns': 0.02, + 'type': DATASOURCE_TYPE.BENCHMARK + }) + + events = [ + foo_event_1, + order_event_1, + benchmark_event_1, + txn_event_1, + bar_event_1, + foo_event_2, + benchmark_event_2, + bar_event_2, + ] + + grouped_events = itertools.groupby( + events, operator.attrgetter('dt')) + + messages = {} + for date, group in grouped_events: + tracker.set_date(date) + for event in group: + tracker.process_event(event) + tracker.handle_minute_close(date) + msg = tracker.to_dict() + messages[date] = msg + + self.assertEquals(2, len(messages)) + + msg_1 = messages[foo_event_1.dt] + msg_2 = messages[foo_event_2.dt] + + self.assertEquals(1, len(msg_1['minute_perf']['transactions']), + "The first message should contain one " + "transaction.") + # Check that transactions aren't emitted for previous events. + self.assertEquals(0, len(msg_2['minute_perf']['transactions']), + "The second message should have no " + "transactions.") + + self.assertEquals(1, len(msg_1['minute_perf']['orders']), + "The first message should contain one orders.") + # Check that orders aren't emitted for previous events. + self.assertEquals(0, len(msg_2['minute_perf']['orders']), + "The second message should have no orders.") + + # Ensure that period_close moves through time. + # Also, ensure that the period_closes are the expected dts. + self.assertEquals(foo_event_1.dt, + msg_1['minute_perf']['period_close']) + self.assertEquals(foo_event_2.dt, + msg_2['minute_perf']['period_close']) + + # Ensure that a Sharpe value for cumulative metrics is being + # created. + self.assertIsNotNone(msg_1['cumulative_risk_metrics']['sharpe']) + self.assertIsNotNone(msg_2['cumulative_risk_metrics']['sharpe']) diff --git a/tests/test_sources.py b/tests/test_sources.py index e109613..38b2ec5 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -29,7 +29,7 @@ def test_df_source(self): assert isinstance(source.end, pd.lib.Timestamp) for expected_dt, expected_price in df.iterrows(): - sid0 = source.next() + sid0 = next(source) assert expected_dt == sid0.dt assert expected_price[0] == sid0.price @@ -71,5 +71,5 @@ def test_yahoo_bars_to_panel_source(self): for event in source: for check_field in check_fields: self.assertIn(check_field, event) - self.assertTrue(isinstance(event['volume'], (int, long))) - self.assertEqual(stocks_iter.next(), event['sid']) + self.assertTrue(isinstance(event['volume'], int)) + self.assertEqual(next(stocks_iter), event['sid']) diff --git a/tests/test_sources.py.bak b/tests/test_sources.py.bak new file mode 100644 index 0000000..e109613 --- /dev/null +++ b/tests/test_sources.py.bak @@ -0,0 +1,75 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd +import pytz +from itertools import cycle + +from unittest import TestCase + +import zipline.utils.factory as factory +from zipline.sources import DataFrameSource, DataPanelSource + + +class TestDataFrameSource(TestCase): + def test_df_source(self): + source, df = factory.create_test_df_source() + assert isinstance(source.start, pd.lib.Timestamp) + assert isinstance(source.end, pd.lib.Timestamp) + + for expected_dt, expected_price in df.iterrows(): + sid0 = source.next() + + assert expected_dt == sid0.dt + assert expected_price[0] == sid0.price + + def test_df_sid_filtering(self): + _, df = factory.create_test_df_source() + source = DataFrameSource(df, sids=[0]) + assert 1 not in [event.sid for event in source], \ + "DataFrameSource should only stream selected sid 0, not sid 1." + + def test_panel_source(self): + source, panel = factory.create_test_panel_source() + assert isinstance(source.start, pd.lib.Timestamp) + assert isinstance(source.end, pd.lib.Timestamp) + for event in source: + self.assertTrue('sid' in event) + self.assertTrue('arbitrary' in event) + self.assertTrue('volume' in event) + self.assertTrue('price' in event) + self.assertEquals(event['arbitrary'], 1.) + self.assertEquals(event['volume'], 1000) + self.assertEquals(event['sid'], 0) + self.assertTrue(isinstance(event['volume'], int)) + self.assertTrue(isinstance(event['arbitrary'], float)) + + def test_yahoo_bars_to_panel_source(self): + stocks = ['AAPL', 'GE'] + start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) + end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) + data = factory.load_bars_from_yahoo(stocks=stocks, + indexes={}, + start=start, + end=end) + + check_fields = ['sid', 'open', 'high', 'low', 'close', + 'volume', 'price'] + source = DataPanelSource(data) + stocks_iter = cycle(stocks) + for event in source: + for check_field in check_fields: + self.assertIn(check_field, event) + self.assertTrue(isinstance(event['volume'], (int, long))) + self.assertEqual(stocks_iter.next(), event['sid']) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 5aeed71..5ee1e56 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -64,7 +64,7 @@ def setUp(self): self.monday = datetime(2012, 7, 9, 16, tzinfo=pytz.utc) self.eleven_normal_days = [self.monday + i * timedelta(days=1) - for i in xrange(11)] + for i in range(11)] # Modify the end of the period slightly to exercise the # incomplete day logic. @@ -75,7 +75,7 @@ def setUp(self): # Second set of dates to test holiday handling. self.jul4_monday = datetime(2012, 7, 2, 16, tzinfo=pytz.utc) self.week_of_jul4 = [self.jul4_monday + i * timedelta(days=1) - for i in xrange(5)] + for i in range(5)] def test_market_aware_window_normal_week(self): window = NoopEventWindow( @@ -300,7 +300,7 @@ def test_talib_with_default_params(self): and n not in BLACKLIST] for name in names: - print name + print(name) zipline_transform = getattr(ta, name)(sid=0) talib_fn = getattr(talib.abstract, name) @@ -337,7 +337,7 @@ def test_talib_with_default_params(self): and np.all(np.isnan(expected_result))): self.assertTrue(np.allclose(zipline_result, expected_result)) else: - print '--- NAN' + print('--- NAN') # reset generator so next iteration has data # self.source, self.panel = \ diff --git a/tests/test_transforms.py.bak b/tests/test_transforms.py.bak new file mode 100644 index 0000000..5aeed71 --- /dev/null +++ b/tests/test_transforms.py.bak @@ -0,0 +1,398 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytz +import numpy as np +import pandas as pd + +from datetime import timedelta, datetime +from unittest import TestCase + +from zipline.utils.test_utils import setup_logger + +from zipline.protocol import Event +from zipline.sources import SpecificEquityTrades +from zipline.transforms.utils import StatefulTransform, EventWindow +from zipline.transforms import MovingVWAP +from zipline.transforms import MovingAverage +from zipline.transforms import MovingStandardDev +from zipline.transforms import Returns +import zipline.utils.factory as factory + +from zipline.test_algorithms import TALIBAlgorithm + + +def to_dt(msg): + return Event({'dt': msg}) + + +class NoopEventWindow(EventWindow): + """ + A no-op EventWindow subclass for testing the base EventWindow logic. + Keeps lists of all added and dropped events. + """ + def __init__(self, market_aware, days, delta): + EventWindow.__init__(self, market_aware, days, delta) + + self.added = [] + self.removed = [] + + def handle_add(self, event): + self.added.append(event) + + def handle_remove(self, event): + self.removed.append(event) + + +class TestEventWindow(TestCase): + def setUp(self): + self.sim_params = factory.create_simulation_parameters() + + setup_logger(self) + + self.monday = datetime(2012, 7, 9, 16, tzinfo=pytz.utc) + self.eleven_normal_days = [self.monday + i * timedelta(days=1) + for i in xrange(11)] + + # Modify the end of the period slightly to exercise the + # incomplete day logic. + self.eleven_normal_days[-1] -= timedelta(minutes=1) + self.eleven_normal_days.append(self.monday + + timedelta(days=11, seconds=1)) + + # Second set of dates to test holiday handling. + self.jul4_monday = datetime(2012, 7, 2, 16, tzinfo=pytz.utc) + self.week_of_jul4 = [self.jul4_monday + i * timedelta(days=1) + for i in xrange(5)] + + def test_market_aware_window_normal_week(self): + window = NoopEventWindow( + market_aware=True, + delta=None, + days=3 + ) + events = [to_dt(date) for date in self.eleven_normal_days] + lengths = [] + # Run the events. + for event in events: + window.update(event) + # Record the length of the window after each event. + lengths.append(len(window.ticks)) + + # The window stretches out during the weekend because we wait + # to drop events until the weekend ends. The last window is + # briefly longer because it doesn't complete a full day. The + # window then shrinks once the day completes + self.assertEquals(lengths, [1, 2, 3, 3, 3, 4, 5, 5, 5, 3, 4, 3]) + self.assertEquals(window.added, events) + self.assertEquals(window.removed, events[:-3]) + + def test_market_aware_window_holiday(self): + window = NoopEventWindow( + market_aware=True, + delta=None, + days=2 + ) + events = [to_dt(date) for date in self.week_of_jul4] + lengths = [] + + # Run the events. + for event in events: + window.update(event) + # Record the length of the window after each event. + lengths.append(len(window.ticks)) + + self.assertEquals(lengths, [1, 2, 3, 3, 2]) + self.assertEquals(window.added, events) + self.assertEquals(window.removed, events[:-2]) + + def tearDown(self): + setup_logger(self) + + +class TestFinanceTransforms(TestCase): + + def setUp(self): + self.sim_params = factory.create_simulation_parameters() + setup_logger(self) + + trade_history = factory.create_trade_history( + 133, + [10.0, 10.0, 11.0, 11.0], + [100, 100, 100, 300], + timedelta(days=1), + self.sim_params + ) + self.source = SpecificEquityTrades(event_list=trade_history) + + def tearDown(self): + self.log_handler.pop_application() + + def test_vwap(self): + vwap = MovingVWAP( + market_aware=True, + window_length=2 + ) + transformed = list(vwap.transform(self.source)) + + # Output values + tnfm_vals = [message[vwap.get_hash()] for message in transformed] + # "Hand calculated" values. + expected = [ + (10.0 * 100) / 100.0, + ((10.0 * 100) + (10.0 * 100)) / (200.0), + # We should drop the first event here. + ((10.0 * 100) + (11.0 * 100)) / (200.0), + # We should drop the second event here. + ((11.0 * 100) + (11.0 * 300)) / (400.0) + ] + + # Output should match the expected. + self.assertEquals(tnfm_vals, expected) + + def test_returns(self): + # Daily returns. + returns = Returns(1) + + transformed = list(returns.transform(self.source)) + tnfm_vals = [message[returns.get_hash()] for message in transformed] + + # No returns for the first event because we don't have a + # previous close. + expected = [0.0, 0.0, 0.1, 0.0] + + self.assertEquals(tnfm_vals, expected) + + # Two-day returns. An extra kink here is that the + # factory will automatically skip a weekend for the + # last event. Results shouldn't notice this blip. + + trade_history = factory.create_trade_history( + 133, + [10.0, 15.0, 13.0, 12.0, 13.0], + [100, 100, 100, 300, 100], + timedelta(days=1), + self.sim_params + ) + self.source = SpecificEquityTrades(event_list=trade_history) + + returns = StatefulTransform(Returns, 2) + + transformed = list(returns.transform(self.source)) + tnfm_vals = [message[returns.get_hash()] for message in transformed] + + expected = [ + 0.0, + 0.0, + (13.0 - 10.0) / 10.0, + (12.0 - 15.0) / 15.0, + (13.0 - 13.0) / 13.0 + ] + + self.assertEquals(tnfm_vals, expected) + + def test_moving_average(self): + + mavg = MovingAverage( + market_aware=True, + fields=['price', 'volume'], + window_length=2 + ) + + transformed = list(mavg.transform(self.source)) + # Output values. + tnfm_prices = [message[mavg.get_hash()].price + for message in transformed] + tnfm_volumes = [message[mavg.get_hash()].volume + for message in transformed] + + # "Hand-calculated" values + expected_prices = [ + ((10.0) / 1.0), + ((10.0 + 10.0) / 2.0), + # First event should get dropped here. + ((10.0 + 11.0) / 2.0), + # Second event should get dropped here. + ((11.0 + 11.0) / 2.0) + ] + expected_volumes = [ + ((100.0) / 1.0), + ((100.0 + 100.0) / 2.0), + # First event should get dropped here. + ((100.0 + 100.0) / 2.0), + # Second event should get dropped here. + ((100.0 + 300.0) / 2.0) + ] + + self.assertEquals(tnfm_prices, expected_prices) + self.assertEquals(tnfm_volumes, expected_volumes) + + def test_moving_stddev(self): + trade_history = factory.create_trade_history( + 133, + [10.0, 15.0, 13.0, 12.0], + [100, 100, 100, 100], + timedelta(days=1), + self.sim_params + ) + + stddev = MovingStandardDev( + market_aware=True, + window_length=3, + ) + + self.source = SpecificEquityTrades(event_list=trade_history) + + transformed = list(stddev.transform(self.source)) + + vals = [message[stddev.get_hash()] for message in transformed] + + expected = [ + None, + np.std([10.0, 15.0], ddof=1), + np.std([10.0, 15.0, 13.0], ddof=1), + np.std([15.0, 13.0, 12.0], ddof=1), + ] + + # np has odd rounding behavior, cf. + # http://docs.scipy.org/doc/np/reference/generated/np.std.html + for v1, v2 in zip(vals, expected): + + if v1 is None: + self.assertIsNone(v2) + continue + self.assertEquals(round(v1, 5), round(v2, 5)) + + +############################################################ +# Test TALIB + +import talib +import zipline.transforms.ta as ta + + +class TestTALIB(TestCase): + def setUp(self): + setup_logger(self) + sim_params = factory.create_simulation_parameters( + start=datetime(1990, 1, 1, tzinfo=pytz.utc), + end=datetime(1990, 3, 30, tzinfo=pytz.utc)) + self.source, self.panel = \ + factory.create_test_panel_ohlc_source(sim_params) + + def test_talib_with_default_params(self): + BLACKLIST = ['make_transform', 'BatchTransform', + # TODO: Figure out why MAVP generates a KeyError + 'MAVP'] + names = [n for n in dir(ta) if n[0].isupper() + and n not in BLACKLIST] + + for name in names: + print name + zipline_transform = getattr(ta, name)(sid=0) + talib_fn = getattr(talib.abstract, name) + + start = datetime(1990, 1, 1, tzinfo=pytz.utc) + end = start + timedelta(days=zipline_transform.lookback + 10) + sim_params = factory.create_simulation_parameters( + start=start, end=end) + source, panel = \ + factory.create_test_panel_ohlc_source(sim_params) + + algo = TALIBAlgorithm(talib=zipline_transform) + algo.run(source) + + zipline_result = np.array( + algo.talib_results[zipline_transform][-1]) + + talib_data = dict() + data = zipline_transform.window + # TODO: Figure out if we are clobbering the tests by this + # protection against empty windows + if not data: + continue + for key in ['open', 'high', 'low', 'volume']: + if key in data: + talib_data[key] = data[key][0].values + talib_data['close'] = data['price'][0].values + expected_result = talib_fn(talib_data) + + if isinstance(expected_result, list): + expected_result = np.array([e[-1] for e in expected_result]) + else: + expected_result = np.array(expected_result[-1]) + if not (np.all(np.isnan(zipline_result)) + and np.all(np.isnan(expected_result))): + self.assertTrue(np.allclose(zipline_result, expected_result)) + else: + print '--- NAN' + + # reset generator so next iteration has data + # self.source, self.panel = \ + # factory.create_test_panel_ohlc_source(self.sim_params) + + def test_multiple_talib_with_args(self): + zipline_transforms = [ta.MA(timeperiod=10), + ta.MA(timeperiod=25)] + talib_fn = talib.abstract.MA + algo = TALIBAlgorithm(talib=zipline_transforms) + algo.run(self.source) + # Test if computed values match those computed by pandas rolling mean. + sid = 0 + talib_values = np.array([x[sid] for x in + algo.talib_results[zipline_transforms[0]]]) + np.testing.assert_array_equal(talib_values, + pd.rolling_mean(self.panel[0]['price'], + 10).values) + talib_values = np.array([x[sid] for x in + algo.talib_results[zipline_transforms[1]]]) + np.testing.assert_array_equal(talib_values, + pd.rolling_mean(self.panel[0]['price'], + 25).values) + for t in zipline_transforms: + talib_result = np.array(algo.talib_results[t][-1]) + talib_data = dict() + data = t.window + # TODO: Figure out if we are clobbering the tests by this + # protection against empty windows + if not data: + continue + for key in ['open', 'high', 'low', 'volume']: + if key in data: + talib_data[key] = data[key][0].values + talib_data['close'] = data['price'][0].values + expected_result = talib_fn(talib_data, **t.call_kwargs)[-1] + np.testing.assert_allclose(talib_result, expected_result) + + def test_talib_with_minute_data(self): + + ma_one_day_minutes = ta.MA(timeperiod=10, bars='minute') + + # Assert that the BatchTransform window length is enough to cover + # the amount of minutes in the timeperiod. + + # Here, 10 minutes only needs a window length of 1. + self.assertEquals(1, ma_one_day_minutes.window_length) + + # With minutes greater than the 390, i.e. one trading day, we should + # have a window_length of two days. + ma_two_day_minutes = ta.MA(timeperiod=490, bars='minute') + self.assertEquals(2, ma_two_day_minutes.window_length) + + # TODO: Ensure that the lookback into the datapanel is returning + # expected results. + # Requires supplying minute instead of day data to the unit test. + # When adding test data, should add more minute events than the + # timeperiod to ensure that lookback is behaving properly. diff --git a/zipline/finance/blotter.py b/zipline/finance/blotter.py index 07f3a03..ce3de72 100644 --- a/zipline/finance/blotter.py +++ b/zipline/finance/blotter.py @@ -176,9 +176,7 @@ def process_trade(self, trade_event): orders = self.open_orders[trade_event.sid] orders = sorted(orders, key=lambda o: o.dt) # Only use orders for the current day or before - current_orders = filter( - lambda o: o.dt <= trade_event.dt, - orders) + current_orders = [o for o in orders if o.dt <= trade_event.dt] for order, txn in self.process_transactions(trade_event, current_orders): diff --git a/zipline/finance/blotter.py.bak b/zipline/finance/blotter.py.bak new file mode 100644 index 0000000..07f3a03 --- /dev/null +++ b/zipline/finance/blotter.py.bak @@ -0,0 +1,329 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import uuid + +from copy import copy +from logbook import Logger +from collections import defaultdict + +import zipline.errors +import zipline.protocol as zp + +from zipline.finance.slippage import ( + VolumeShareSlippage, + transact_partial, + check_order_triggers +) +from zipline.finance.commission import PerShare +import zipline.utils.math_utils as zp_math + +log = Logger('Blotter') + +from zipline.utils.protocol_utils import Enum + +ORDER_STATUS = Enum( + 'OPEN', + 'FILLED', + 'CANCELLED' +) + + +# On an order to buy, between .05 below to .95 above a penny, use that penny. +# On an order to sell, between .05 above to .95 below a penny, use that penny. +# buy: [.0095, .0195) -> round to .01, sell: (.0005, .0105] -> round to .01 +def round_for_minimum_price_variation(x, is_buy, diff=(0.0095 - .005)): + # relies on rounding half away from zero, unlike numpy's bankers' rounding + rounded = round(x - (diff if is_buy else -diff), 2) + if zp_math.tolerant_equals(rounded, 0.0): + return 0.0 + return rounded + + +class Blotter(object): + + def __init__(self): + self.transact = transact_partial(VolumeShareSlippage(), PerShare()) + # these orders are aggregated by sid + self.open_orders = defaultdict(list) + # keep a dict of orders by their own id + self.orders = {} + # holding orders that have come in since the last + # event. + self.new_orders = [] + self.current_dt = None + self.max_shares = int(1e+11) + + def __repr__(self): + return """ +{class_name}( + transact_partial={transact_partial}, + open_orders={open_orders}, + orders={orders}, + new_orders={new_orders}, + current_dt={current_dt}) +""".strip().format(class_name=self.__class__.__name__, + transact_partial=self.transact.args, + open_orders=self.open_orders, + orders=self.orders, + new_orders=self.new_orders, + current_dt=self.current_dt) + + def set_date(self, dt): + self.current_dt = dt + + def order(self, sid, amount, limit_price, stop_price, order_id=None): + + # something could be done with amount to further divide + # between buy by share count OR buy shares up to a dollar amount + # numeric == share count AND "$dollar.cents" == cost amount + + """ + amount > 0 :: Buy/Cover + amount < 0 :: Sell/Short + Market order: order(sid, amount) + Limit order: order(sid, amount, limit_price) + Stop order: order(sid, amount, None, stop_price) + StopLimit order: order(sid, amount, limit_price, stop_price) + """ + + # Fractional shares are not supported. + amount = int(amount) + + # just validates amount and passes rest on to TransactionSimulator + # Tell the user if they try to buy 0 shares of something. + if amount == 0: + zero_message = "Requested to trade zero shares of {psid}".format( + psid=sid + ) + log.debug(zero_message) + # Don't bother placing orders for 0 shares. + return + elif amount > self.max_shares: + # Arbitrary limit of 100 billion (US) shares will never be + # exceeded except by a buggy algorithm. + raise OverflowError("Can't order more than %d shares" % + self.max_shares) + + if limit_price: + limit_price = round_for_minimum_price_variation(limit_price, + amount > 0) + order = Order( + dt=self.current_dt, + sid=sid, + amount=amount, + stop=stop_price, + limit=limit_price, + id=order_id + ) + + self.open_orders[order.sid].append(order) + self.orders[order.id] = order + self.new_orders.append(order) + + return order.id + + def cancel(self, order_id): + if order_id not in self.orders: + return + + cur_order = self.orders[order_id] + if cur_order.open: + order_list = self.open_orders[cur_order.sid] + if cur_order in order_list: + order_list.remove(cur_order) + + if cur_order in self.new_orders: + self.new_orders.remove(cur_order) + cur_order.cancel() + cur_order.dt = self.current_dt + # we want this order's new status to be relayed out + # along with newly placed orders. + self.new_orders.append(cur_order) + + def process_split(self, split_event): + if split_event.sid not in self.open_orders: + return + + orders_to_modify = self.open_orders[split_event.sid] + for order in orders_to_modify: + order.handle_split(split_event) + + def process_trade(self, trade_event): + if trade_event.type != zp.DATASOURCE_TYPE.TRADE: + return + + if trade_event.sid not in self.open_orders: + return + + if trade_event.volume < 1: + # there are zero volume trade_events bc some stocks trade + # less frequently than once per minute. + return + + orders = self.open_orders[trade_event.sid] + orders = sorted(orders, key=lambda o: o.dt) + # Only use orders for the current day or before + current_orders = filter( + lambda o: o.dt <= trade_event.dt, + orders) + + for order, txn in self.process_transactions(trade_event, + current_orders): + yield order, txn + + # update the open orders for the trade_event's sid + self.open_orders[trade_event.sid] = \ + [order for order + in self.open_orders[trade_event.sid] + if order.open] + + def process_transactions(self, trade_event, current_orders): + for order, txn in self.transact(trade_event, current_orders): + if txn.type == zp.DATASOURCE_TYPE.COMMISSION: + order.commission = (order.commission or 0.0) + txn.cost + else: + if txn.amount == 0: + raise zipline.errors.TransactionWithNoAmount(txn=txn) + if math.copysign(1, txn.amount) != order.direction: + raise zipline.errors.TransactionWithWrongDirection( + txn=txn, order=order) + if abs(txn.amount) > abs(self.orders[txn.order_id].amount): + raise zipline.errors.TransactionVolumeExceedsOrder( + txn=txn, order=order) + + order.filled += txn.amount + if txn.commission is not None: + order.commission = ((order.commission or 0.0) + + txn.commission) + + # mark the date of the order to match the transaction + # that is filling it. + order.dt = txn.dt + + yield txn, order + + +class Order(object): + def __init__(self, dt, sid, amount, stop=None, limit=None, filled=0, + commission=None, id=None): + """ + @dt - datetime.datetime that the order was placed + @sid - stock sid of the order + @amount - the number of shares to buy/sell + a positive sign indicates a buy + a negative sign indicates a sell + @filled - how many shares of the order have been filled so far + """ + # get a string representation of the uuid. + self.id = id or self.make_id() + self.dt = dt + self.created = dt + self.sid = sid + self.amount = amount + self.filled = filled + self.commission = commission + self._cancelled = False + self.stop = stop + self.limit = limit + self.stop_reached = False + self.limit_reached = False + self.direction = math.copysign(1, self.amount) + self.type = zp.DATASOURCE_TYPE.ORDER + + def make_id(self): + return uuid.uuid4().hex + + def to_dict(self): + py = copy(self.__dict__) + for field in ['type', 'direction', '_cancelled']: + del py[field] + py['status'] = self.status + return py + + def to_api_obj(self): + pydict = self.to_dict() + obj = zp.Order(initial_values=pydict) + return obj + + def check_triggers(self, event): + """ + Update internal state based on price triggers and the + trade event's price. + """ + stop_reached, limit_reached, sl_stop_reached = \ + check_order_triggers(self, event) + if (stop_reached, limit_reached) \ + != (self.stop_reached, self.limit_reached): + self.dt = event.dt + self.stop_reached = stop_reached + self.limit_reached = limit_reached + if sl_stop_reached: + # Change the STOP LIMIT order into a LIMIT order + self.stop = None + + def handle_split(self, split_event): + ratio = split_event.ratio + + # update the amount, limit_price, and stop_price + # by the split's ratio + + # info here: http://finra.complinet.com/en/display/display_plain.html? + # rbid=2403&element_id=8950&record_id=12208&print=1 + + # new_share_amount = old_share_amount / ratio + # new_price = old_price * ratio + + self.amount = int(self.amount / ratio) + + if self.limit: + self.limit = round(self.limit * ratio, 2) + + if self.stop: + self.stop = round(self.stop * ratio, 2) + + @property + def status(self): + if self._cancelled: + return ORDER_STATUS.CANCELLED + + return ORDER_STATUS.FILLED \ + if not self.open_amount else ORDER_STATUS.OPEN + + def cancel(self): + self._cancelled = True + + @property + def open(self): + return self.status == ORDER_STATUS.OPEN + + @property + def triggered(self): + """ + For a market order, True. + For a stop order, True IFF stop_reached. + For a limit order, True IFF limit_reached. + """ + if self.stop and not self.stop_reached: + return False + + if self.limit and not self.limit_reached: + return False + + return True + + @property + def open_amount(self): + return self.amount - self.filled diff --git a/zipline/finance/performance/position.py b/zipline/finance/performance/position.py index 3ad0ff3..160e999 100644 --- a/zipline/finance/performance/position.py +++ b/zipline/finance/performance/position.py @@ -32,7 +32,7 @@ """ -from __future__ import division + import logbook import math diff --git a/zipline/finance/performance/position.py.bak b/zipline/finance/performance/position.py.bak new file mode 100644 index 0000000..3ad0ff3 --- /dev/null +++ b/zipline/finance/performance/position.py.bak @@ -0,0 +1,204 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Position Tracking +================= + + +-----------------+----------------------------------------------------+ + | key | value | + +=================+====================================================+ + | sid | the identifier for the security held in this | + | | position. | + +-----------------+----------------------------------------------------+ + | amount | whole number of shares in the position | + +-----------------+----------------------------------------------------+ + | last_sale_price | price at last sale of the security on the exchange | + +-----------------+----------------------------------------------------+ + | cost_basis | the volume weighted average price paid per share | + +-----------------+----------------------------------------------------+ + +""" + +from __future__ import division +import logbook +import math + +log = logbook.Logger('Performance') + + +class Position(object): + + def __init__(self, sid, amount=0, cost_basis=0.0, + last_sale_price=0.0, last_sale_date=None, + dividends=None): + self.sid = sid + self.amount = amount + self.cost_basis = cost_basis # per share + self.last_sale_price = last_sale_price + self.last_sale_date = last_sale_date + self.dividends = dividends or [] + + def update_dividends(self, midnight_utc): + """ + midnight_utc is the 0 hour for the current (not yet open) trading day. + This method will be invoked at the end of the market + close handling, before the next market open. + """ + payment = 0.0 + unpaid_dividends = [] + for dividend in self.dividends: + if midnight_utc == dividend.ex_date: + # if we own shares at midnight of the div_ex date + # we are entitled to the dividend. + dividend.amount_on_ex_date = self.amount + if dividend.net_amount: + dividend.payment = self.amount * dividend.net_amount + else: + dividend.payment = self.amount * dividend.gross_amount + + if midnight_utc == dividend.pay_date: + # if it is the payment date, include this + # dividend's actual payment (calculated on + # ex_date) + payment += dividend.payment + else: + unpaid_dividends.append(dividend) + + self.dividends = unpaid_dividends + return payment + + def add_dividend(self, dividend): + self.dividends.append(dividend) + + # Update the position by the split ratio, and return the + # resulting fractional share that will be converted into cash. + + # Returns the unused cash. + def handle_split(self, split): + if (self.sid != split.sid): + raise NameError("updating split with the wrong sid!") + + ratio = split.ratio + + log.info("handling split for sid = " + str(split.sid) + + ", ratio = " + str(split.ratio)) + log.info("before split: " + str(self)) + + # adjust the # of shares by the ratio + # (if we had 100 shares, and the ratio is 3, + # we now have 33 shares) + # (old_share_count / ratio = new_share_count) + # (old_price * ratio = new_price) + + # ie, 33.333 + raw_share_count = self.amount / float(ratio) + + # ie, 33 + full_share_count = math.floor(raw_share_count) + + # ie, 0.333 + fractional_share_count = raw_share_count - full_share_count + + # adjust the cost basis to the nearest cent, ie, 60.0 + new_cost_basis = round(self.cost_basis * ratio, 2) + + # adjust the last sale price + new_last_sale_price = round(self.last_sale_price * ratio, 2) + + self.cost_basis = new_cost_basis + self.last_sale_price = new_last_sale_price + self.amount = full_share_count + + return_cash = round(float(fractional_share_count * new_cost_basis), 2) + + log.info("after split: " + str(self)) + log.info("returning cash: " + str(return_cash)) + + # return the leftover cash, which will be converted into cash + # (rounded to the nearest cent) + return return_cash + + def update(self, txn): + if(self.sid != txn.sid): + raise Exception('updating position with txn for a ' + 'different sid') + + # we're covering a short or closing a position + if(self.amount + txn.amount == 0): + self.cost_basis = 0.0 + self.amount = 0 + else: + prev_cost = self.cost_basis * self.amount + txn_cost = txn.amount * txn.price + total_cost = prev_cost + txn_cost + total_shares = self.amount + txn.amount + self.cost_basis = total_cost / total_shares + self.amount = total_shares + + def adjust_commission_cost_basis(self, commission): + """ + A note about cost-basis in zipline: all positions are considered + to share a cost basis, even if they were executed in different + transactions with different commission costs, different prices, etc. + + Due to limitations about how zipline handles positions, zipline will + currently spread an externally-delivered commission charge across + all shares in a position. + """ + + if commission.sid != self.sid: + raise Exception('Updating a commission for a different sid?') + if commission.cost == 0.0: + return + + # If we no longer hold this position, there is no cost basis to + # adjust. + if self.amount == 0: + return + + prev_cost = self.cost_basis * self.amount + new_cost = prev_cost + commission.cost + self.cost_basis = new_cost / self.amount + + def __repr__(self): + template = "sid: {sid}, amount: {amount}, cost_basis: {cost_basis}, \ +last_sale_price: {last_sale_price}" + return template.format( + sid=self.sid, + amount=self.amount, + cost_basis=self.cost_basis, + last_sale_price=self.last_sale_price + ) + + def to_dict(self): + """ + Creates a dictionary representing the state of this position. + Returns a dict object of the form: + """ + return { + 'sid': self.sid, + 'amount': self.amount, + 'cost_basis': self.cost_basis, + 'last_sale_price': self.last_sale_price + } + + +class positiondict(dict): + + def __missing__(self, key): + pos = Position(key) + self[key] = pos + return pos diff --git a/zipline/finance/slippage.py b/zipline/finance/slippage.py index 25d396f..8240010 100644 --- a/zipline/finance/slippage.py +++ b/zipline/finance/slippage.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division + import abc @@ -152,9 +152,7 @@ def create_transaction(event, order, price, amount): return transaction -class SlippageModel(object): - - __metaclass__ = abc.ABCMeta +class SlippageModel(object, metaclass=abc.ABCMeta): @property def volume_for_bar(self): diff --git a/zipline/finance/slippage.py.bak b/zipline/finance/slippage.py.bak new file mode 100644 index 0000000..25d396f --- /dev/null +++ b/zipline/finance/slippage.py.bak @@ -0,0 +1,263 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division + +import abc + +import math + +from copy import copy +from functools import partial +from zipline.protocol import DATASOURCE_TYPE +import zipline.utils.math_utils as zp_math + +SELL = 1 << 0 +BUY = 1 << 1 +STOP = 1 << 2 +LIMIT = 1 << 3 + + +def check_order_triggers(order, event): + """ + Given an order and a trade event, return a tuple of + (stop_reached, limit_reached). + For market orders, will return (False, False). + For stop orders, limit_reached will always be False. + For limit orders, stop_reached will always be False. + For stop limit orders a Boolean is returned to flag + that the stop has been reached. + + Orders that have been triggered already (price targets reached), + the order's current values are returned. + """ + if order.triggered: + return (order.stop_reached, order.limit_reached, False) + + stop_reached = False + limit_reached = False + sl_stop_reached = False + + order_type = 0 + + if order.amount > 0: + order_type |= BUY + else: + order_type |= SELL + + if order.stop is not None: + order_type |= STOP + + if order.limit is not None: + order_type |= LIMIT + + if order_type == BUY | STOP | LIMIT: + if event.price >= order.stop: + sl_stop_reached = True + if event.price <= order.limit: + limit_reached = True + elif order_type == SELL | STOP | LIMIT: + if event.price <= order.stop: + sl_stop_reached = True + if event.price >= order.limit: + limit_reached = True + elif order_type == BUY | STOP: + if event.price >= order.stop: + stop_reached = True + elif order_type == SELL | STOP: + if event.price <= order.stop: + stop_reached = True + elif order_type == BUY | LIMIT: + if event.price <= order.limit: + limit_reached = True + elif order_type == SELL | LIMIT: + # This is a SELL LIMIT order + if event.price >= order.limit: + limit_reached = True + + return (stop_reached, limit_reached, sl_stop_reached) + + +def transact_stub(slippage, commission, event, open_orders): + """ + This is intended to be wrapped in a partial, so that the + slippage and commission models can be enclosed. + """ + for order, transaction in slippage(event, open_orders): + if ( + transaction + and not + zp_math.tolerant_equals(transaction.amount, 0) + ): + direction = math.copysign(1, transaction.amount) + per_share, total_commission = commission.calculate(transaction) + transaction.price = transaction.price + (per_share * direction) + transaction.commission = total_commission + yield order, transaction + + +def transact_partial(slippage, commission): + return partial(transact_stub, slippage, commission) + + +class Transaction(object): + + def __init__(self, sid, amount, dt, price, order_id, commission=None): + self.sid = sid + self.amount = amount + self.dt = dt + self.price = price + self.order_id = order_id + self.commission = commission + self.type = DATASOURCE_TYPE.TRANSACTION + + def __getitem__(self, name): + return self.__dict__[name] + + def to_dict(self): + py = copy(self.__dict__) + del py['type'] + return py + + +def create_transaction(event, order, price, amount): + + # floor the amount to protect against non-whole number orders + # TODO: Investigate whether we can add a robust check in blotter + # and/or tradesimulation, as well. + amount_magnitude = int(abs(amount)) + + if amount_magnitude < 1: + raise Exception("Transaction magnitude must be at least 1.") + + transaction = Transaction( + sid=event.sid, + amount=int(amount), + dt=event.dt, + price=price, + order_id=order.id + ) + + return transaction + + +class SlippageModel(object): + + __metaclass__ = abc.ABCMeta + + @property + def volume_for_bar(self): + return self._volume_for_bar + + @abc.abstractproperty + def process_order(self, event, order): + pass + + def simulate(self, event, current_orders): + + self._volume_for_bar = 0 + + for order in current_orders: + + if zp_math.tolerant_equals(order.open_amount, 0): + continue + + order.check_triggers(event) + if not order.triggered: + continue + + txn = self.process_order(event, order) + + if txn: + self._volume_for_bar += abs(txn.amount) + yield order, txn + + def __call__(self, event, current_orders, **kwargs): + return self.simulate(event, current_orders, **kwargs) + + +class VolumeShareSlippage(SlippageModel): + + def __init__(self, + volume_limit=.25, + price_impact=0.1): + + self.volume_limit = volume_limit + self.price_impact = price_impact + + def __repr__(self): + return """ +{class_name}( + volume_limit={volume_limit}, + price_impact={price_impact}) +""".strip().format(class_name=self.__class__.__name__, + volume_limit=self.volume_limit, + price_impact=self.price_impact) + + def process_order(self, event, order): + + max_volume = self.volume_limit * event.volume + + # price impact accounts for the total volume of transactions + # created against the current minute bar + remaining_volume = max_volume - self.volume_for_bar + if remaining_volume < 1: + # we can't fill any more transactions + return + + # the current order amount will be the min of the + # volume available in the bar or the open amount. + cur_volume = int(min(remaining_volume, abs(order.open_amount))) + + if cur_volume < 1: + return + + # tally the current amount into our total amount ordered. + # total amount will be used to calculate price impact + total_volume = self.volume_for_bar + cur_volume + + volume_share = min(total_volume / event.volume, + self.volume_limit) + + simulated_impact = (volume_share) ** 2 \ + * math.copysign(self.price_impact, order.direction) \ + * event.price + + return create_transaction( + event, + order, + # In the future, we may want to change the next line + # for limit pricing + event.price + simulated_impact, + math.copysign(cur_volume, order.direction) + ) + + +class FixedSlippage(SlippageModel): + + def __init__(self, spread=0.0): + """ + Use the fixed slippage model, which will just add/subtract + a specified spread spread/2 will be added on buys and subtracted + on sells per share + """ + self.spread = spread + + def process_order(self, event, order): + return create_transaction( + event, + order, + event.price + (self.spread / 2.0 * order.direction), + order.amount, + )