mla/rbm.py

# coding:utf-8
import logging

import numpy as np
from scipy.special import expit

from mla.base import BaseEstimator
from mla.utils import batch_iterator

np.random.seed(9999)
sigmoid = expit

"""
References:
A Practical Guide to Training Restricted Boltzmann Machines https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
"""


class RBM(BaseEstimator):
    y_required = False

    def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, max_epochs=100):
        """Bernoulli Restricted Boltzmann Machine (RBM)

        Parameters
        ----------

        n_hidden : int, default 128
            The number of hidden units.
        learning_rate : float, default 0.1
        batch_size : int, default 10
        max_epochs : int, default 100
        """
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.lr = learning_rate
        self.n_hidden = n_hidden

    def fit(self, X, y=None):
        self.n_visible = X.shape[1]
        self._init_weights()
        self._setup_input(X, y)
        self._train()

    def _init_weights(self):

        self.W = np.random.randn(self.n_visible, self.n_hidden) * 0.1

        # Bias for visible and hidden units
        self.bias_v = np.zeros(self.n_visible, dtype=np.float32)
        self.bias_h = np.zeros(self.n_hidden, dtype=np.float32)

        self.errors = []

    def _train(self):
        """Use CD-1 training procedure, basically an exact inference for `positive_associations`,
        followed by a "non burn-in" block Gibbs Sampling for the `negative_associations`."""

        for i in range(self.max_epochs):
            error = 0
            for batch in batch_iterator(self.X, batch_size=self.batch_size):
                positive_hidden = sigmoid(np.dot(batch, self.W) + self.bias_h)
                hidden_states = self._sample(positive_hidden)  # sample hidden state h1
                positive_associations = np.dot(batch.T, positive_hidden)

                negative_visible = sigmoid(np.dot(hidden_states, self.W.T) + self.bias_v)
                negative_visible = self._sample(negative_visible)  # use the sampled hidden state h1 to sample v1
                negative_hidden = sigmoid(np.dot(negative_visible, self.W) + self.bias_h)
                negative_associations = np.dot(negative_visible.T, negative_hidden)

                lr = self.lr / float(batch.shape[0])
                self.W += lr * ((positive_associations - negative_associations) / float(self.batch_size))
                self.bias_h += lr * (negative_hidden.sum(axis=0) - negative_associations.sum(axis=0))
                self.bias_v += lr * (np.asarray(batch.sum(axis=0)).squeeze() - negative_visible.sum(axis=0))

                error += np.sum((batch - negative_visible) ** 2)

            self.errors.append(error)
            logging.info("Iteration %s, error %s" % (i, error))
        logging.debug("Weights: %s" % self.W)
        logging.debug("Hidden bias: %s" % self.bias_h)
        logging.debug("Visible bias: %s" % self.bias_v)

    def _sample(self, X):
        return X > np.random.random_sample(size=X.shape)

    def _predict(self, X=None):
        return sigmoid(np.dot(X, self.W) + self.bias_h)