Source code for touvlo.nnet.sgl_parm

"""
.. module:: sgl_parm
    :synopsis: Provides routines to construct a Single Parameter based
        Classification Neural Network.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from math import sqrt

from numpy.random import uniform
from numpy import (float64, ones, append, sum, dot, log,
                   power, zeros, reshape, empty)

from touvlo.utils import g, g_grad


[docs]def feed_forward(X, theta, n_hidden_layers=1): """Applies forward propagation to calculate model's hypothesis. Args: X (numpy.array): Features' dataset. theta (numpy.array): Column vector of model's parameters. n_hidden_layers (int): Number of hidden layers in network. Returns: (numpy.array(numpy.array), numpy.array(numpy.array)): A 2-tuple consisting of an array of parameters prior to activation by layer and an array of activation matrices by layer. """ z = empty((n_hidden_layers + 2), dtype=object) a = empty((n_hidden_layers + 2), dtype=object) # Input layer a[0] = X # Hidden unit layers for l in range(1, (len(a) - 1)): z[l] = a[l - 1].dot(theta[l - 1].T) a[l] = g(z[l]) a[l] = append(ones((len(a[l]), 1), float64), # add intercept a[l], axis=1) # Output layer z[len(a) - 1] = a[(len(a) - 2)].dot(theta[(len(a) - 2)].T) a[len(a) - 1] = g(z[len(a) - 1]) # hypothesis return z, a
[docs]def back_propagation(y, theta, a, z, num_labels, n_hidden_layers=1): """Applies back propagation to minimize model's loss. Args: y (numpy.array): Column vector of expected values. theta (numpy.array(numpy.array)): array of model's weight matrices by layer. a (numpy.array(numpy.array)): array of activation matrices by layer. z (numpy.array(numpy.array)): array of parameters prior to sigmoid by layer. num_labels (int): Number of classes in multiclass classification. n_hidden_layers (int): Number of hidden layers in network. Returns: numpy.array(numpy.array): array of matrices of 'error values' by layer. """ delta = empty((n_hidden_layers + 2), dtype=object) L = n_hidden_layers + 1 # last layer delta[L] = zeros(shape=a[L].shape, dtype=float64) for c in range(num_labels): delta[L][:, c] = a[L][:, c] - (y == c) for l in range(L, 1, -1): delta[l - 1] = delta[l].dot(theta[l - 1])[:, 1:] * g_grad(z[l - 1]) return delta
[docs]def h(X, theta, n_hidden_layers=1): """Classification Neural Network hypothesis. Args: X (numpy.array): Features' dataset. theta (numpy.array): Column vector of model's parameters. n_hidden_layers (int): Number of hidden layers in network. Returns: numpy.array: The probability that each entry belong to class 1. """ _, a = feed_forward(X, theta, n_hidden_layers) L = n_hidden_layers + 1 # last layer hypothesis = a[L] return hypothesis
[docs]def cost_function(X, y, theta, _lambda, num_labels, n_hidden_layers=1): """Computes the cost function J for Neural Network. Args: X (numpy.array): Features' dataset. y (numpy.array): Column vector of expected values. theta (numpy.array): Column vector of model's parameters. _lambda (float): The regularization hyperparameter. num_labels (int): Number of classes in multiclass classification. n_hidden_layers (int): Number of hidden layers in network. Returns: float: Computed cost. """ m, n = X.shape intercept = ones((m, 1), dtype=float64) X = append(intercept, X, axis=1) _h = h(X, theta, n_hidden_layers) # model hypothesis J = 0 for c in range(num_labels): _J = dot(1 - (y == c).T, log(1 - _h[:, c])) _J = _J + dot((y == c).T, log(_h[:, c])) J = J - (1 / m) * sum(_J) theta_squared_term = 0 for j in range(len(theta)): theta_squared_term += sum(power(theta[j][:, 1:], 2)) J = J + (_lambda / (2 * m)) * theta_squared_term return J
[docs]def grad(X, y, nn_params, _lambda, input_layer_size, hidden_layer_size, num_labels, n_hidden_layers=1): """Calculates gradient of neural network's parameters. Args: X (numpy.array): Features' dataset. y (numpy.array): Column vector of expected values. nn_params (numpy.array): Column vector of model's parameters. _lambda (float): The regularization hyperparameter. input_layer_size (int): Number of units in the input layer. hidden_layer_size (int): Number of units in a hidden layer. num_labels (int): Number of classes in multiclass classification. n_hidden_layers (int): Number of hidden layers in network. Returns: numpy.array(numpy.array): array of gradient values by weight matrix. """ theta = unravel_params(nn_params, input_layer_size, hidden_layer_size, num_labels, n_hidden_layers) # Initi gradient with zeros theta_grad = empty((n_hidden_layers + 1), dtype=object) for i in range(len(theta)): theta_grad[i] = zeros(shape=theta[i].shape, dtype=float64) m, n = X.shape intercept = ones((m, 1), dtype=float64) X = append(intercept, X, axis=1) for t in range(m): z, a = feed_forward(X[[t], :], theta, n_hidden_layers) delta = back_propagation(y[t, :], theta, a, z, num_labels, n_hidden_layers) for l in range(len(theta_grad)): theta_grad[l] = theta_grad[l] + dot(delta[l + 1].T, a[l]) for i in range(len(theta_grad)): theta_grad[i] = (1 / m) * theta_grad[i] # regularization for i in range(len(theta_grad)): theta_grad[i][:, 1:] = theta_grad[i][:, 1:] + \ (_lambda / m) * theta[i][:, 1:] flat_theta_grad = append(theta_grad[0].flatten(), theta_grad[1].flatten()) for i in range(2, len(theta_grad)): flat_theta_grad = append(flat_theta_grad, theta_grad[i].flatten()) return flat_theta_grad
[docs]def rand_init_weights(L_in, L_out): """Initializes weight matrix with random values. Args: X (numpy.array): Features' dataset. L_in (int): Number of units in previous layer. n_hidden_layers (int): Number of units in next layer. Returns: numpy.array: Random values' matrix of conforming dimensions. """ W = zeros((L_out, 1 + L_in), float64) # plus 1 for bias term epsilon_init = sqrt(6) / sqrt((L_in + 1) + L_out) W = uniform(size=(L_out, 1 + L_in)) * 2 * epsilon_init - epsilon_init return W
[docs]def unravel_params(nn_params, input_layer_size, hidden_layer_size, num_labels, n_hidden_layers=1): """Unravels flattened array into list of weight matrices Args: nn_params (numpy.array): Row vector of model's parameters. input_layer_size (int): Number of units in the input layer. hidden_layer_size (int): Number of units in a hidden layer. num_labels (int): Number of classes in multiclass classification. n_hidden_layers (int): Number of hidden layers in network. Returns: numpy.array(numpy.array): array with model's weight matrices. """ input_layer_n_units = hidden_layer_size * (input_layer_size + 1) hidden_layer_n_units = hidden_layer_size * (hidden_layer_size + 1) theta = empty((n_hidden_layers + 1), dtype=object) # input layer to hidden layer theta[0] = nn_params[0:input_layer_n_units] theta[0] = reshape(theta[0], (hidden_layer_size, (input_layer_size + 1))) # hidden layer to hidden layer for i in range(1, n_hidden_layers): start = input_layer_n_units + (i - 1) * hidden_layer_n_units end = input_layer_n_units + i * hidden_layer_n_units theta[i] = nn_params[start:end] theta[i] = reshape( theta[i], (hidden_layer_size, (hidden_layer_size + 1))) # hidden layer to output layer start = input_layer_n_units + (n_hidden_layers - 1) * hidden_layer_n_units theta[n_hidden_layers] = nn_params[start:] theta[n_hidden_layers] = reshape(theta[n_hidden_layers], (num_labels, (hidden_layer_size + 1))) return theta
[docs]def init_nn_weights(input_layer_size, hidden_layer_size, num_labels, n_hidden_layers=1): """Initialize the weight matrices of a network with random values. Args: hidden_layer_size (int): Number of units in a hidden layer. input_layer_size (int): Number of units in the input layer. num_labels (int): Number of classes in multiclass classification. n_hidden_layers (int): Number of hidden layers in network. Returns: numpy.array(numpy.array): array of weight matrices of random values. """ theta = empty((n_hidden_layers + 1), dtype=object) theta[0] = rand_init_weights(input_layer_size, hidden_layer_size) for l in range(1, n_hidden_layers): theta[l] = rand_init_weights(hidden_layer_size, hidden_layer_size) theta[n_hidden_layers] = rand_init_weights(hidden_layer_size, num_labels) return theta