Source code for touvlo.nnet.sgl_parm

"""
.. module:: sgl_parm
    :synopsis: Provides routines to construct a Single Parameter based
        Classification Neural Network.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from math import sqrt

from numpy.random import uniform
from numpy import (float64, ones, append, sum, dot, log,
                   power, zeros, reshape, empty)

from touvlo.utils import g, g_grad


[docs]def feed_forward(X, theta, n_hidden_layers=1):
    """Applies forward propagation to calculate model's hypothesis.

    Args:
        X (numpy.array): Features' dataset.
        theta (numpy.array): Column vector of model's parameters.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        (numpy.array(numpy.array), numpy.array(numpy.array)): A 2-tuple
        consisting of an array of parameters prior to activation by layer
        and an array of activation matrices by layer.
    """
    z = empty((n_hidden_layers + 2), dtype=object)
    a = empty((n_hidden_layers + 2), dtype=object)

    # Input layer
    a[0] = X

    # Hidden unit layers
    for l in range(1, (len(a) - 1)):
        z[l] = a[l - 1].dot(theta[l - 1].T)
        a[l] = g(z[l])
        a[l] = append(ones((len(a[l]), 1), float64),  # add intercept
                      a[l], axis=1)

    # Output layer
    z[len(a) - 1] = a[(len(a) - 2)].dot(theta[(len(a) - 2)].T)
    a[len(a) - 1] = g(z[len(a) - 1])  # hypothesis

    return z, a


[docs]def back_propagation(y, theta, a, z, num_labels, n_hidden_layers=1):
    """Applies back propagation to minimize model's loss.

    Args:
        y (numpy.array): Column vector of expected values.
        theta (numpy.array(numpy.array)): array of model's weight matrices by
            layer.
        a (numpy.array(numpy.array)): array of activation matrices by layer.
        z (numpy.array(numpy.array)): array of parameters prior to sigmoid by
            layer.
        num_labels (int): Number of classes in multiclass classification.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        numpy.array(numpy.array): array of matrices of 'error values' by layer.
    """
    delta = empty((n_hidden_layers + 2), dtype=object)
    L = n_hidden_layers + 1  # last layer
    delta[L] = zeros(shape=a[L].shape, dtype=float64)

    for c in range(num_labels):
        delta[L][:, c] = a[L][:, c] - (y == c)

    for l in range(L, 1, -1):
        delta[l - 1] = delta[l].dot(theta[l - 1])[:, 1:] * g_grad(z[l - 1])

    return delta


[docs]def h(X, theta, n_hidden_layers=1):
    """Classification Neural Network hypothesis.

    Args:
        X (numpy.array): Features' dataset.
        theta (numpy.array): Column vector of model's parameters.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        numpy.array: The probability that each entry belong to class 1.
    """
    _, a = feed_forward(X, theta, n_hidden_layers)
    L = n_hidden_layers + 1  # last layer

    hypothesis = a[L]
    return hypothesis


[docs]def cost_function(X, y, theta, _lambda, num_labels, n_hidden_layers=1):
    """Computes the cost function J for Neural Network.

    Args:
        X (numpy.array): Features' dataset.
        y (numpy.array): Column vector of expected values.
        theta (numpy.array): Column vector of model's parameters.
        _lambda (float): The regularization hyperparameter.
        num_labels (int): Number of classes in multiclass classification.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        float: Computed cost.
    """
    m, n = X.shape
    intercept = ones((m, 1), dtype=float64)
    X = append(intercept, X, axis=1)

    _h = h(X, theta, n_hidden_layers)  # model hypothesis

    J = 0
    for c in range(num_labels):
        _J = dot(1 - (y == c).T, log(1 - _h[:, c]))
        _J = _J + dot((y == c).T, log(_h[:, c]))
        J = J - (1 / m) * sum(_J)

    theta_squared_term = 0
    for j in range(len(theta)):
        theta_squared_term += sum(power(theta[j][:, 1:], 2))

    J = J + (_lambda / (2 * m)) * theta_squared_term

    return J


[docs]def grad(X, y, nn_params, _lambda, input_layer_size,
         hidden_layer_size, num_labels, n_hidden_layers=1):
    """Calculates gradient of neural network's parameters.

    Args:
        X (numpy.array): Features' dataset.
        y (numpy.array): Column vector of expected values.
        nn_params (numpy.array): Column vector of model's parameters.
        _lambda (float): The regularization hyperparameter.
        input_layer_size (int): Number of units in the input layer.
        hidden_layer_size (int): Number of units in a hidden layer.
        num_labels (int): Number of classes in multiclass classification.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        numpy.array(numpy.array): array of gradient values by weight matrix.
    """
    theta = unravel_params(nn_params, input_layer_size, hidden_layer_size,
                           num_labels, n_hidden_layers)

    # Initi gradient with zeros
    theta_grad = empty((n_hidden_layers + 1), dtype=object)
    for i in range(len(theta)):
        theta_grad[i] = zeros(shape=theta[i].shape, dtype=float64)

    m, n = X.shape
    intercept = ones((m, 1), dtype=float64)
    X = append(intercept, X, axis=1)

    for t in range(m):

        z, a = feed_forward(X[[t], :], theta, n_hidden_layers)
        delta = back_propagation(y[t, :], theta, a, z,
                                 num_labels, n_hidden_layers)

        for l in range(len(theta_grad)):
            theta_grad[l] = theta_grad[l] + dot(delta[l + 1].T, a[l])

    for i in range(len(theta_grad)):
        theta_grad[i] = (1 / m) * theta_grad[i]

    # regularization
    for i in range(len(theta_grad)):
        theta_grad[i][:, 1:] = theta_grad[i][:, 1:] + \
            (_lambda / m) * theta[i][:, 1:]

    flat_theta_grad = append(theta_grad[0].flatten(), theta_grad[1].flatten())
    for i in range(2, len(theta_grad)):
        flat_theta_grad = append(flat_theta_grad, theta_grad[i].flatten())

    return flat_theta_grad


[docs]def rand_init_weights(L_in, L_out):
    """Initializes weight matrix with random values.

    Args:
        X (numpy.array): Features' dataset.
        L_in (int): Number of units in previous layer.
        n_hidden_layers (int): Number of units in next layer.

    Returns:
        numpy.array: Random values' matrix of conforming dimensions.
    """
    W = zeros((L_out, 1 + L_in), float64)  # plus 1 for bias term
    epsilon_init = sqrt(6) / sqrt((L_in + 1) + L_out)

    W = uniform(size=(L_out, 1 + L_in)) * 2 * epsilon_init - epsilon_init
    return W


[docs]def unravel_params(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, n_hidden_layers=1):
    """Unravels flattened array into list of weight matrices

    Args:
        nn_params (numpy.array): Row vector of model's parameters.
        input_layer_size (int): Number of units in the input layer.
        hidden_layer_size (int): Number of units in a hidden layer.
        num_labels (int): Number of classes in multiclass classification.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        numpy.array(numpy.array): array with model's weight matrices.
    """
    input_layer_n_units = hidden_layer_size * (input_layer_size + 1)
    hidden_layer_n_units = hidden_layer_size * (hidden_layer_size + 1)

    theta = empty((n_hidden_layers + 1), dtype=object)

    # input layer to hidden layer
    theta[0] = nn_params[0:input_layer_n_units]
    theta[0] = reshape(theta[0], (hidden_layer_size, (input_layer_size + 1)))

    # hidden layer to hidden layer
    for i in range(1, n_hidden_layers):

        start = input_layer_n_units + (i - 1) * hidden_layer_n_units
        end = input_layer_n_units + i * hidden_layer_n_units

        theta[i] = nn_params[start:end]
        theta[i] = reshape(
            theta[i], (hidden_layer_size, (hidden_layer_size + 1)))

    # hidden layer to output layer
    start = input_layer_n_units + (n_hidden_layers - 1) * hidden_layer_n_units

    theta[n_hidden_layers] = nn_params[start:]
    theta[n_hidden_layers] = reshape(theta[n_hidden_layers],
                                     (num_labels, (hidden_layer_size + 1)))

    return theta


[docs]def init_nn_weights(input_layer_size, hidden_layer_size,
                    num_labels, n_hidden_layers=1):
    """Initialize the weight matrices of a network with random values.

    Args:
        hidden_layer_size (int): Number of units in a hidden layer.
        input_layer_size (int): Number of units in the input layer.
        num_labels (int): Number of classes in multiclass classification.
        n_hidden_layers (int): Number of hidden layers in network.

    Returns:
        numpy.array(numpy.array): array of weight matrices of random values.
    """
    theta = empty((n_hidden_layers + 1), dtype=object)
    theta[0] = rand_init_weights(input_layer_size, hidden_layer_size)

    for l in range(1, n_hidden_layers):
        theta[l] = rand_init_weights(hidden_layer_size, hidden_layer_size)

    theta[n_hidden_layers] = rand_init_weights(hidden_layer_size, num_labels)

    return theta