Source code for clairvoyant.clair

"""Clair provides the machine learning brains behind Clairvoyant.

Classes defined by this module provide a framework for implementing machine
learning algorithms for stock data.
"""

from sklearn.svm import SVC
from numpy import vstack, hstack
from pytz import timezone
from pandas import to_datetime
from abc import ABCMeta, abstractmethod
from clairvoyant import History


class Strategy(metaclass=ABCMeta):
    """Strategy defines the common interface for implementing recommendations.

    Inherit from Strategy if your class is a type of classifier that determines
    buying and selling of shares and requires additional logic to respond to
    model-supplied recommendations.
    """

    @abstractmethod
    def buyLogic(self, prob, row, attrs):
        """Buy shares.

        :param prob: The probability of being in the buy classification.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] buy with {prob} likelihood.')

    @abstractmethod
    def sellLogic(self, prob, row, attrs):
        """Sell shares.

        :param prob: The probability of being in the buy classification.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] sell with {prob} likelihood.')

    @abstractmethod
    def nextPeriodLogic(self, prediction, performance, row, attrs):
        """Determine what to do next period.

        This is primarily used on testing data to retrospectively evaulate the
        effectiveness of buying and selling based on particular logic.

        :param prediction: A prediction of performance in the next period.
        :param performance: Actual stock performance in the next period.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] prediction: {prediction}, performance: {performance}')


[docs]class Clair(Strategy): """Cla.I.R. - Classifier Inferred Recommendations. Clair uses the support vector machine supplied by the sk-learn library to to infer buy and sell classifications for stocks using a client-supplied feature specification. Clair uses the default Radial Basis Function kernel provided by SVC. For more details, see the `scikit learn documentation. <http://scikit-learn.org/stable/modules/svm.html#parameters-of-the-rbf-kernel>`_ Clients need to provide a date range for the training phase and another range for the testing phase. The learning phase determines classification probabilities that are used in the testing phase. Once the model is reliably trained, clients may use the :func:`predict` function to predict a result given an observed support vector. :param variables: A list of columns that represent learning features. :param trainStart: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the start date for model training. :param trainEnd: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the end date for model training. :param testStart: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the start date for model testing. :param testEnd: A datetime as a string that should be consistent with the ``tz`` parameter. Defines the end date for model testing. :param buyThreshold: Defines the confidence level at which Clair will will recommend a buy. Default 0.65. :param sellThreshold: Defines the confidence level at which Clair will recommend a sell. Default 0.65. :param C: A penalty parameter for false positives. See scikit-learn documentation for more details. Default 1. :param gamma: The kernel coefficient for machine learning. See scikit-learn documentation for more details. Default 10. :param continuedTraining: Determine if data from the testing period should be used to continue training the model during the testing phase. Default False. :param tz: The timezone associated with the datetime parameters. Default UTC. """ def __init__(self, variables, trainStart, trainEnd, testStart, testEnd, buyThreshold=0.65, sellThreshold=0.65, C=1, gamma=10, continuedTraining=False, tz=timezone('UTC')): # Conditions self.variables = variables self.trainStart = tz.localize(to_datetime(trainStart)) self.trainEnd = tz.localize(to_datetime(trainEnd)) self.testStart = tz.localize(to_datetime(testStart)) self.testEnd = tz.localize(to_datetime(testEnd)) self.buyThreshold = buyThreshold self.sellThreshold = sellThreshold self.C = C self.gamma = gamma self.continuedTraining = continuedTraining
[docs] def learn(self, data, X=[], y=[]): """Start the learning phase. :param data: A ``History`` object containing stock data along with training features. :param X: Optional preprocessed support vectors. :param y: Optional preprocessed target values. Should coincide with the ``X`` parameter. """ assert(isinstance(data, History)) assert(len(X) == len(y)) for row in data[self.trainStart:self.trainEnd]: Xs = [] for var in self.variables: Xs.append(getattr(row, data._col_map[var])) X.append(Xs) i = row.Index y1 = data.return_rate[i+1] if y1 > 0: y.append(1) else: y.append(0) XX = vstack(X) yy = hstack(y) model = SVC(C=self.C, gamma=self.gamma, probability=True) model.fit(XX, yy) return model, X, y
[docs] def predict(self, model, Xs): """Calculate the probability of a buy or sell classification. :param model: A trained model. :param Xs: A list containing support vector data for a single vector. """ prediction = model.predict_proba([Xs])[0] negative = prediction[0] positive = prediction[1] return negative, positive
[docs] def execute(self, data, model, X=[], y=[]): """Execute the strategy logic using a trained model and input data. :param data: A ``History`` object containing testing data. :param model: A trained model. :param X: Optional preprocessed support vectors used for continued training. :param y: Optional preprocessed target values corresponding to any supplied support vectors. """ assert(isinstance(data, History)) assert(len(X) == len(y)) for row in data[self.testStart:self.testEnd]: Xs = [] for var in self.variables: Xs.append(getattr(row, data._col_map[var])) neg, pos = self.predict(model, Xs) if pos >= self.buyThreshold: prediction = 1 elif neg >= self.sellThreshold: prediction = -1 else: prediction = 0 if prediction == 1: self.buyLogic(pos, row, data._col_map) elif prediction == -1: self.sellLogic(neg, row, data._col_map) if row.Index < len(data)-1: nextPeriodPerformance = data.return_rate[row.Index + 1] self.nextPeriodLogic( prediction, nextPeriodPerformance, row, data._col_map ) if self.continuedTraining is True: X.append(Xs) if nextPeriodPerformance > 0: y.append(1) else: y.append(0) XX = vstack(X) yy = hstack(y) model.fit(XX, yy)
[docs] def buyLogic(self, prob, row, attrs): """Override this function to provide your own logic.""" super().buyLogic(prob, row, attrs)
[docs] def sellLogic(self, prob, row, attrs): """Override this function to provide your own logic.""" super().sellLogic(prob, row, attrs)
[docs] def nextPeriodLogic(self, prediction, performance, row, attrs): """Override this function to provide your own logic.""" super().nextPeriodLogic(prediction, performance, row, attrs)