Source code for clairvoyant.backtest

"""Backtest provides a way of exploring and testing various parameterizations.

This module provides classes that allow clients to experiment with different
machine learning parameterizations and test those on historical stock data.
"""
from numpy import meshgrid, arange, c_
from sklearn.preprocessing import StandardScaler
from numpy import vstack, hstack
from pytz import timezone
from clairvoyant import Clair
import matplotlib
matplotlib.use('Agg')


[docs]class Backtest(Clair): """Backtest is a type of machine learning classifier. The purpose of ``Backtest`` is to collect statistics on the performance of learned classifications while providing a quick and easy way to vary parameters for rapid experimentation. Backtest also provides some convenience functions for visualizing collected statistics. :param variables: A list of columns that represent learning features. :param trainStart: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the start date for model training. :param trainEnd: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the end date for model training. :param testStart: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the start date for model testing. :param testEnd: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the end date for model testing. :param buyThreshold: Defines the confidence level at which Clair will will recommend a buy. Default 0.65. :param sellThreshold: Defines the confidence level at which Clair will recommend a sell. Default 0.65. :param C: A penalty parameter for false positives. See scikit-learn documentation for more details. Default 1. :param gamma: The kernel coefficient for machine learning. See scikit-learn documentation for more details. Default 10. :param continuedTraining: Determine if data from the testing period should be used to continue training the model during the testing phase. Default False. :param tz: The timezone associated with the datetime parameters. Default UTC. :ivar debug: A boolean value that determines if debug strings will be printed as backtesting is run. Warning: may result in a lot of output. """ def __init__( self, variables, trainStart, trainEnd, testStart, testEnd, buyThreshold=0.65, sellThreshold=0.65, C=1, gamma=10, continuedTraining=False, tz=timezone('UTC') ): super().__init__( variables, trainStart, trainEnd, testStart, testEnd, buyThreshold=buyThreshold, sellThreshold=sellThreshold, C=C, gamma=gamma, continuedTraining=continuedTraining, tz=tz ) # Stats self.stocks = [] self.dates = [] self.totalBuys = 0 self.correctBuys = 0 self.totalSells = 0 self.correctSells = 0 self.increases = 0 self.decreases = 0 self.periods = 0 self.debug = False # Visualize self.XX = None self.yy = None self.model = None
[docs] def runModel(self, data): """Run backtesting. :param data: A ``History`` of stock data that includes observations in both the training and test phases. """ # Learn and execute model, X, y = self.learn(data) self.execute(data, model, X, y) # Save for vizualization purposes self.dates.append([ self.trainStart.strftime('%m/%d/%Y'), self.trainEnd.strftime('%m/%d/%Y'), self.testStart.strftime('%m/%d/%Y'), self.testEnd.strftime('%m/%d/%Y') ]) XX = vstack(X) yy = hstack(y) self.XX = XX self.yy = yy self.model = model
[docs] def buyLogic(self, *args, **kwargs): """Increment the buy count.""" self.totalBuys += 1 if self.debug: super().buyLogic(*args, **kwargs)
[docs] def sellLogic(self, *args, **kwargs): """Increment the sell count.""" self.totalSells += 1 if self.debug: super().sellLogic(*args, **kwargs)
[docs] def nextPeriodLogic(self, prediction, performance, *args, **kwargs): """Collect statistics on correct and incorrect buys and sells. :param prediction: A 1 or -1 representing an up or down performance. :param performance: A positive or negative value representing the actual observed performance. """ self.periods += 1 if performance > 0: self.increases += 1 if prediction == 1: self.correctBuys += 1 elif performance < 0: self.decreases += 1 if prediction == -1: self.correctSells += 1 if self.debug: super().nextPeriodLogic(prediction, performance, *args, **kwargs)
[docs] def clearStats(self): """Reset all collected statistics.""" self.dates = [] self.totalBuys = 0 self.correctBuys = 0 self.totalSells = 0 self.correctSells = 0 self.increases = 0 self.decreases = 0 self.periods = 0
[docs] def buyStats(self): """Return the collected buy statistics.""" try: return round((float(self.correctBuys)/self.totalBuys)*100, 2) except ZeroDivisionError: return float(0)
[docs] def sellStats(self): """Return the collected sell statistics.""" try: return round((float(self.correctSells)/self.totalSells)*100, 2) except ZeroDivisionError: return float(0)
[docs] def displayConditions(self): """Print the learning and testing parameters.""" bld, end = '\033[1m', '\033[0m' print(f'{bld}Conditions{end}') i = 1 for var in self.variables: print(f"X{i}: {var}") i += 1 print(f"Buy Threshold: {self.buyThreshold*100}%") print(f"Sell Threshold: {self.sellThreshold*100}%") print(f"C: {self.C}") print(f"gamma: {self.gamma}") print(f"Continued Training: {self.continuedTraining}") print(f"Total Testing Periods: {self.periods}") print(f"Total Price Increases: {self.increases}") print(f"Total Price Decreases: {self.decreases}")
[docs] def displayStats(self): """Print the collected backtesting statistics.""" bld, gre, red, end = '\033[1m', '\033[92m', '\033[91m', '\033[0m' if len(self.dates) == 0: print("Error: Please run model before displaying stats") return print(f'{bld}Stats{end}') print("Stock(s):") i = 0 for stock in self.stocks: print(f'{stock} | ', f"Training: {self.dates[i][0]}-{self.dates[i][1]}", f"Testing: {self.dates[i][2]}-{self.dates[i][3]}") i += 1 print(f"\nTotal Buys: {self.totalBuys}") prnt = None if self.buyStats() > 50: prnt = f"{gre}{self.buyStats()}%{end}" elif self.buyStats() < 50: prnt = f"{red}{self.buyStats()}%{end}" else: prnt = f"{self.buyStats()}%" print(f"Buy Accuracy: {prnt}") print(f"Total Sells: {self.totalSells}") if self.sellStats() > 50: prnt = f'{gre}{self.sellStats()}%{end}' elif self.sellStats() < 50: prnt = f'{red}{self.sellStats()}%{end}' else: prnt = f'{self.sellStats()}%' print(f"Sell Accuracy: {prnt}")
[docs] def visualizeModel(self, width=5, height=5, stepsize=0.02): """Output a visualization of the backtesting results. The diagram overlays training and testing observations on top of a color coded representation of learned recommendations. The color intensity represents the distribution of probability. """ import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap if len(self.variables) != 2: print("Error: Plotting is restricted to 2 dimensions") return if (self.XX is None or self.yy is None or self.model is None): print("Error: Please run model before visualizing") return X, y = self.XX, self.yy X = StandardScaler().fit_transform(X) self.model.fit(X, y) x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 xx, yy = meshgrid( arange(x_min, x_max, stepsize), arange(y_min, y_max, stepsize) ) plt.figure(figsize=(width, height)) cm = plt.cm.RdBu RedBlue = ListedColormap(['#FF312E', '#6E8894']) Axes = plt.subplot(1, 1, 1) Z = self.model.decision_function(c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) stock = self.stocks[len(self.stocks)-1] Axes.set_title(stock) Axes.contourf(xx, yy, Z, cmap=cm, alpha=0.75) Axes.scatter(X[:, 0], X[:, 1], c=y, cmap=RedBlue) Axes.set_xlim(xx.min(), xx.max()) Axes.set_ylim(yy.min(), yy.max()) plt.savefig(stock+'.svg', format='svg')