Source code for clairvoyant.clair

"""Clair provides the machine learning brains behind Clairvoyant.

Classes defined by this module provide a framework for implementing machine
learning algorithms for stock data.
"""

from sklearn.svm import SVC
from numpy import vstack, hstack
from pytz import timezone
from pandas import to_datetime
from abc import ABCMeta, abstractmethod
from clairvoyant import History


class Strategy(metaclass=ABCMeta):
    """Strategy defines the common interface for implementing recommendations.

    Inherit from Strategy if your class is a type of classifier that determines
    buying and selling of shares and requires additional logic to respond to
    model-supplied recommendations.
    """

    @abstractmethod
    def buyLogic(self, prob, row, attrs):
        """Buy shares.

        :param prob: The probability of being in the buy classification.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] buy with {prob} likelihood.')

    @abstractmethod
    def sellLogic(self, prob, row, attrs):
        """Sell shares.

        :param prob: The probability of being in the buy classification.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] sell with {prob} likelihood.')

    @abstractmethod
    def nextPeriodLogic(self, prediction, performance, row, attrs):
        """Determine what to do next period.

        This is primarily used on testing data to retrospectively evaulate the
        effectiveness of buying and selling based on particular logic.

        :param prediction: A prediction of performance in the next period.
        :param performance: Actual stock performance in the next period.
        :param row: A named tuple containing a row from ``History`` data.
        :param attrs: A dict map of attribute names to common names.
        """
        assert(isinstance(row, tuple))
        assert(isinstance(attrs, dict))
        dt = getattr(row, attrs['Date'])
        print(f'[{dt}] prediction: {prediction}, performance: {performance}')


[docs]class Clair(Strategy):
    """Cla.I.R. - Classifier Inferred Recommendations.

    Clair uses the support vector machine supplied by the sk-learn library to
    to infer buy and sell classifications for stocks using a client-supplied
    feature specification. Clair uses the default Radial Basis Function kernel
    provided by SVC. For more details, see the `scikit learn documentation.
    <http://scikit-learn.org/stable/modules/svm.html#parameters-of-the-rbf-kernel>`_

    Clients need to provide a date range for the training phase and another
    range for the testing phase. The learning phase determines classification
    probabilities that are used in the testing phase.

    Once the model is reliably trained, clients may use the :func:`predict`
    function to predict a result given an observed support vector.

    :param variables: A list of columns that represent learning features.
    :param trainStart: A datetime as a string that should be consistent with
                       the ``tz`` parameter. Defines the start date for model
                       training.
    :param trainEnd: A datetime as a string that should be consistent with the
                     ``tz`` parameter. Defines the end date for model training.
    :param testStart: A datetime as a string that should be consistent with the
                      ``tz`` parameter. Defines the start date for model
                      testing.
    :param testEnd: A datetime as a string that should be consistent with the
                    ``tz`` parameter. Defines the end date for model testing.
    :param buyThreshold: Defines the confidence level at which Clair will
                         will recommend a buy. Default 0.65.
    :param sellThreshold: Defines the confidence level at which Clair will
                          recommend a sell. Default 0.65.
    :param C: A penalty parameter for false positives. See scikit-learn
              documentation for more details. Default 1.
    :param gamma: The kernel coefficient for machine learning. See scikit-learn
                  documentation for more details. Default 10.
    :param continuedTraining: Determine if data from the testing period should
                              be used to continue training the model during the
                              testing phase. Default False.
    :param tz: The timezone associated with the datetime parameters. Default
               UTC.
    """

    def __init__(self, variables, trainStart, trainEnd, testStart, testEnd,
                 buyThreshold=0.65, sellThreshold=0.65, C=1, gamma=10,
                 continuedTraining=False, tz=timezone('UTC')):

        # Conditions
        self.variables = variables
        self.trainStart = tz.localize(to_datetime(trainStart))
        self.trainEnd = tz.localize(to_datetime(trainEnd))
        self.testStart = tz.localize(to_datetime(testStart))
        self.testEnd = tz.localize(to_datetime(testEnd))
        self.buyThreshold = buyThreshold
        self.sellThreshold = sellThreshold
        self.C = C
        self.gamma = gamma
        self.continuedTraining = continuedTraining

[docs]    def learn(self, data, X=[], y=[]):
        """Start the learning phase.

        :param data: A ``History`` object containing stock data along with
                     training features.
        :param X: Optional preprocessed support vectors.
        :param y: Optional preprocessed target values. Should coincide with the
                  ``X`` parameter.
        """
        assert(isinstance(data, History))
        assert(len(X) == len(y))

        for row in data[self.trainStart:self.trainEnd]:
            Xs = []
            for var in self.variables:
                Xs.append(getattr(row, data._col_map[var]))
            X.append(Xs)

            i = row.Index
            y1 = data.return_rate[i+1]
            if y1 > 0:
                y.append(1)
            else:
                y.append(0)

        XX = vstack(X)
        yy = hstack(y)

        model = SVC(C=self.C, gamma=self.gamma, probability=True)
        model.fit(XX, yy)

        return model, X, y

[docs]    def predict(self, model, Xs):
        """Calculate the probability of a buy or sell classification.

        :param model: A trained model.
        :param Xs: A list containing support vector data for a single vector.
        """
        prediction = model.predict_proba([Xs])[0]
        negative = prediction[0]
        positive = prediction[1]
        return negative, positive

[docs]    def execute(self, data, model, X=[], y=[]):
        """Execute the strategy logic using a trained model and input data.

        :param data: A ``History`` object containing testing data.
        :param model: A trained model.
        :param X: Optional preprocessed support vectors used for continued
                  training.
        :param y: Optional preprocessed target values corresponding to any
                  supplied support vectors.
        """
        assert(isinstance(data, History))
        assert(len(X) == len(y))

        for row in data[self.testStart:self.testEnd]:
            Xs = []
            for var in self.variables:
                Xs.append(getattr(row, data._col_map[var]))

            neg, pos = self.predict(model, Xs)

            if pos >= self.buyThreshold:
                prediction = 1
            elif neg >= self.sellThreshold:
                prediction = -1
            else:
                prediction = 0

            if prediction == 1:
                self.buyLogic(pos, row, data._col_map)
            elif prediction == -1:
                self.sellLogic(neg, row, data._col_map)

            if row.Index < len(data)-1:
                nextPeriodPerformance = data.return_rate[row.Index + 1]
                self.nextPeriodLogic(
                    prediction, nextPeriodPerformance, row, data._col_map
                    )

            if self.continuedTraining is True:
                X.append(Xs)
                if nextPeriodPerformance > 0:
                    y.append(1)
                else:
                    y.append(0)
                XX = vstack(X)
                yy = hstack(y)
                model.fit(XX, yy)

[docs]    def buyLogic(self, prob, row, attrs):
        """Override this function to provide your own logic."""
        super().buyLogic(prob, row, attrs)

[docs]    def sellLogic(self, prob, row, attrs):
        """Override this function to provide your own logic."""
        super().sellLogic(prob, row, attrs)

[docs]    def nextPeriodLogic(self, prediction, performance, row, attrs):
        """Override this function to provide your own logic."""
        super().nextPeriodLogic(prediction, performance, row, attrs)