"""Backtest provides a way of exploring and testing various parameterizations.
This module provides classes that allow clients to experiment with different
machine learning parameterizations and test those on historical stock data.
"""
from numpy import meshgrid, arange, c_
from sklearn.preprocessing import StandardScaler
from numpy import vstack, hstack
from pytz import timezone
from clairvoyant import Clair
import matplotlib
matplotlib.use('Agg')
[docs]class Backtest(Clair):
"""Backtest is a type of machine learning classifier.
The purpose of ``Backtest`` is to collect statistics on the performance of
learned classifications while providing a quick and easy way to vary
parameters for rapid experimentation. Backtest also provides some
convenience functions for visualizing collected statistics.
:param variables: A list of columns that represent learning features.
:param trainStart: A datetime as a string that should be consistent with
the ``tz`` parameter. Defines the start date for model
training.
:param trainEnd: A datetime as a string that should be consistent with the
``tz`` parameter. Defines the end date for model training.
:param testStart: A datetime as a string that should be consistent with the
``tz`` parameter. Defines the start date for model
testing.
:param testEnd: A datetime as a string that should be consistent with the
``tz`` parameter. Defines the end date for model testing.
:param buyThreshold: Defines the confidence level at which Clair will
will recommend a buy. Default 0.65.
:param sellThreshold: Defines the confidence level at which Clair will
recommend a sell. Default 0.65.
:param C: A penalty parameter for false positives. See scikit-learn
documentation for more details. Default 1.
:param gamma: The kernel coefficient for machine learning. See scikit-learn
documentation for more details. Default 10.
:param continuedTraining: Determine if data from the testing period should
be used to continue training the model during the
testing phase. Default False.
:param tz: The timezone associated with the datetime parameters. Default
UTC.
:ivar debug: A boolean value that determines if debug strings will be
printed as backtesting is run. Warning: may result in a lot of
output.
"""
def __init__(
self, variables, trainStart, trainEnd, testStart, testEnd,
buyThreshold=0.65, sellThreshold=0.65, C=1, gamma=10,
continuedTraining=False, tz=timezone('UTC')
):
super().__init__(
variables, trainStart, trainEnd, testStart, testEnd,
buyThreshold=buyThreshold, sellThreshold=sellThreshold, C=C,
gamma=gamma, continuedTraining=continuedTraining, tz=tz
)
# Stats
self.stocks = []
self.dates = []
self.totalBuys = 0
self.correctBuys = 0
self.totalSells = 0
self.correctSells = 0
self.increases = 0
self.decreases = 0
self.periods = 0
self.debug = False
# Visualize
self.XX = None
self.yy = None
self.model = None
[docs] def runModel(self, data):
"""Run backtesting.
:param data: A ``History`` of stock data that includes observations in
both the training and test phases.
"""
# Learn and execute
model, X, y = self.learn(data)
self.execute(data, model, X, y)
# Save for vizualization purposes
self.dates.append([
self.trainStart.strftime('%m/%d/%Y'),
self.trainEnd.strftime('%m/%d/%Y'),
self.testStart.strftime('%m/%d/%Y'),
self.testEnd.strftime('%m/%d/%Y')
])
XX = vstack(X)
yy = hstack(y)
self.XX = XX
self.yy = yy
self.model = model
[docs] def buyLogic(self, *args, **kwargs):
"""Increment the buy count."""
self.totalBuys += 1
if self.debug:
super().buyLogic(*args, **kwargs)
[docs] def sellLogic(self, *args, **kwargs):
"""Increment the sell count."""
self.totalSells += 1
if self.debug:
super().sellLogic(*args, **kwargs)
[docs] def nextPeriodLogic(self, prediction, performance, *args, **kwargs):
"""Collect statistics on correct and incorrect buys and sells.
:param prediction: A 1 or -1 representing an up or down performance.
:param performance: A positive or negative value representing the
actual observed performance.
"""
self.periods += 1
if performance > 0:
self.increases += 1
if prediction == 1:
self.correctBuys += 1
elif performance < 0:
self.decreases += 1
if prediction == -1:
self.correctSells += 1
if self.debug:
super().nextPeriodLogic(prediction, performance, *args, **kwargs)
[docs] def clearStats(self):
"""Reset all collected statistics."""
self.dates = []
self.totalBuys = 0
self.correctBuys = 0
self.totalSells = 0
self.correctSells = 0
self.increases = 0
self.decreases = 0
self.periods = 0
[docs] def buyStats(self):
"""Return the collected buy statistics."""
try:
return round((float(self.correctBuys)/self.totalBuys)*100, 2)
except ZeroDivisionError:
return float(0)
[docs] def sellStats(self):
"""Return the collected sell statistics."""
try:
return round((float(self.correctSells)/self.totalSells)*100, 2)
except ZeroDivisionError:
return float(0)
[docs] def displayConditions(self):
"""Print the learning and testing parameters."""
bld, end = '\033[1m', '\033[0m'
print(f'{bld}Conditions{end}')
i = 1
for var in self.variables:
print(f"X{i}: {var}")
i += 1
print(f"Buy Threshold: {self.buyThreshold*100}%")
print(f"Sell Threshold: {self.sellThreshold*100}%")
print(f"C: {self.C}")
print(f"gamma: {self.gamma}")
print(f"Continued Training: {self.continuedTraining}")
print(f"Total Testing Periods: {self.periods}")
print(f"Total Price Increases: {self.increases}")
print(f"Total Price Decreases: {self.decreases}")
[docs] def displayStats(self):
"""Print the collected backtesting statistics."""
bld, gre, red, end = '\033[1m', '\033[92m', '\033[91m', '\033[0m'
if len(self.dates) == 0:
print("Error: Please run model before displaying stats")
return
print(f'{bld}Stats{end}')
print("Stock(s):")
i = 0
for stock in self.stocks:
print(f'{stock} | ',
f"Training: {self.dates[i][0]}-{self.dates[i][1]}",
f"Testing: {self.dates[i][2]}-{self.dates[i][3]}")
i += 1
print(f"\nTotal Buys: {self.totalBuys}")
prnt = None
if self.buyStats() > 50:
prnt = f"{gre}{self.buyStats()}%{end}"
elif self.buyStats() < 50:
prnt = f"{red}{self.buyStats()}%{end}"
else:
prnt = f"{self.buyStats()}%"
print(f"Buy Accuracy: {prnt}")
print(f"Total Sells: {self.totalSells}")
if self.sellStats() > 50:
prnt = f'{gre}{self.sellStats()}%{end}'
elif self.sellStats() < 50:
prnt = f'{red}{self.sellStats()}%{end}'
else:
prnt = f'{self.sellStats()}%'
print(f"Sell Accuracy: {prnt}")
[docs] def visualizeModel(self, width=5, height=5, stepsize=0.02):
"""Output a visualization of the backtesting results.
The diagram overlays training and testing observations on top of
a color coded representation of learned recommendations. The color
intensity represents the distribution of probability.
"""
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
if len(self.variables) != 2:
print("Error: Plotting is restricted to 2 dimensions")
return
if (self.XX is None or self.yy is None or self.model is None):
print("Error: Please run model before visualizing")
return
X, y = self.XX, self.yy
X = StandardScaler().fit_transform(X)
self.model.fit(X, y)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = meshgrid(
arange(x_min, x_max, stepsize), arange(y_min, y_max, stepsize)
)
plt.figure(figsize=(width, height))
cm = plt.cm.RdBu
RedBlue = ListedColormap(['#FF312E', '#6E8894'])
Axes = plt.subplot(1, 1, 1)
Z = self.model.decision_function(c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
stock = self.stocks[len(self.stocks)-1]
Axes.set_title(stock)
Axes.contourf(xx, yy, Z, cmap=cm, alpha=0.75)
Axes.scatter(X[:, 0], X[:, 1], c=y, cmap=RedBlue)
Axes.set_xlim(xx.min(), xx.max())
Axes.set_ylim(yy.min(), yy.max())
plt.savefig(stock+'.svg', format='svg')