Source code for touvlo.utils

"""
.. module:: utils
    :synopsis: Provides routines of interest to different ML models.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from numpy import (zeros, copy, std, mean, float64, exp, seterr,
                   where, array, maximum)


# sigmoid gradient function
[docs]def g(x): """This function applies the sigmoid function on a given value. Args: x (obj): Input value or object containing value . Returns: obj: Sigmoid function at value. """ return 1 / (1 + exp(-x))
# sigmoid gradient function
[docs]def g_grad(x): """This function calculates the sigmoid gradient at a given value. Args: x (obj): Input value or object containing value . Returns: obj: Sigmoid gradient at value. """ s = g(x) return s * (1 - s)
[docs]def sigmoid(Z): """ Implements the sigmoid activation in numpy Arguments: Z -- numpy array of any shape Returns: A -- output of sigmoid(z), same shape as Z cache -- returns Z as well, useful during backpropagation """ A = 1 / (1 + exp(-Z)) cache = Z return A, cache
[docs]def relu(Z): """ Implement the RELU function. Arguments: Z -- Output of the linear layer, of any shape Returns: A -- Post-activation parameter, of the same shape as Z cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently """ A = maximum(0, Z) assert(A.shape == Z.shape) cache = Z return A, cache
[docs]def relu_backward(dA, cache): """ Implement the backward propagation for a single RELU unit. Arguments: dA -- post-activation gradient, of any shape cache -- 'Z' where we store for computing backward propagation efficiently Returns: dZ -- Gradient of the cost with respect to Z """ Z = cache dZ = array(dA, copy=True) # just converting dz to a correct object. # When z <= 0, you should set dz to 0 as well. dZ[Z <= 0] = 0 assert (dZ.shape == Z.shape) return dZ
[docs]def sigmoid_backward(dA, cache): """ Implement the backward propagation for a single SIGMOID unit. Arguments: dA -- post-activation gradient, of any shape cache -- 'Z' where we store for computing backward propagation efficiently Returns: dZ -- Gradient of the cost with respect to Z """ Z = cache s = 1 / (1 + exp(-Z)) dZ = dA * s * (1 - s) assert (dZ.shape == Z.shape) return dZ
[docs]def BGD(X, y, grad, initial_theta, alpha, num_iters, **kwargs): """Performs parameter optimization via Batch Gradient Descent. Args: X (numpy.array): Features' dataset plus bias column. y (numpy.array): Column vector of expected values. grad (numpy.array): Routine that generates the partial derivatives given theta. initial_theta (numpy.array): Initial value for parameters to be optimized. alpha (float): Learning rate or _step size of the optimization. num_iters (int): Number of times the optimization will be performed. Returns: numpy.array: Optimized model parameters. """ theta = copy(initial_theta) for _ in range(num_iters): theta = theta - alpha * grad(X, y, theta, **kwargs) return theta
[docs]def SGD(X, y, grad, initial_theta, alpha, num_iters, **kwargs): """Performs parameter optimization via Stochastic Gradient Descent. Args: X (numpy.array): Features' dataset plus bias column. y (numpy.array): Column vector of expected values. grad (numpy.array): Routine that generates the partial derivatives given theta. initial_theta (numpy.array): Initial value for parameters to be optimized. alpha (float): Learning rate or _step size of the optimization. num_iters (int): Number of times the optimization will be performed. Returns: numpy.array: Optimized model parameters. """ m = len(y) theta = copy(initial_theta) for _ in range(num_iters): for i in range(m): theta = theta - alpha * grad(X[[i], :], y[[i], :], theta, **kwargs) return theta
[docs]def MBGD(X, y, grad, initial_theta, alpha, num_iters, b, **kwargs): """Performs parameter optimization via Mini-Batch Gradient Descent. Args: X (numpy.array): Features' dataset plus bias column. y (numpy.array): Column vector of expected values. grad (numpy.array): Routine that generates the partial derivatives given theta. initial_theta (numpy.array): Initial value for parameters to be optimized. alpha (float): Learning rate or _step size of the optimization. num_iters (int): Number of times the optimization will be performed. b (int): Number of examples in mini batch. Returns: numpy.array: Optimized model parameters. """ m = len(y) theta = copy(initial_theta) _steps = [el for el in range(0, m, b)] for _ in range(num_iters): for _step in _steps[:-1]: theta = theta - alpha * grad(X[_step:(_step + b), :], y[_step:(_step + b), :], theta, **kwargs) theta = theta - alpha * grad(X[_steps[-1]:, :], y[_steps[-1]:, :], theta, **kwargs) return theta
[docs]def numerical_grad(J, theta, err): """Numerically calculates the gradient of a given cost function. Args: J (Callable): Function handle that computes cost given theta. theta (numpy.array): Model parameters. err (float): distance between points where J is evaluated. Returns: numpy.array: Computed numeric gradient. """ num_grad = zeros(theta.shape, dtype=float64) perturb = zeros(theta.shape, dtype=float64) for i in range(len(theta)): perturb[i] = err loss1 = J(theta - perturb) loss2 = J(theta + perturb) num_grad[i] = (loss2 - loss1) / (2 * err) perturb[i] = 0 return num_grad
[docs]def feature_normalize(X): """Performs Z score normalization in a numeric dataset. Args: X (numpy.array): Features' dataset plus bias column. Returns: (numpy.array, numpy.array, numpy.array): A 3-tuple of X_norm, normalized features' dataset, mu, mean of each feature, and sigma, standard deviation of each feature. """ seterr(divide='ignore', invalid='ignore') mu = mean(X, axis=0) sigma = std(X, axis=0, ddof=1) X_norm = (X - mu) / sigma return X_norm, mu, sigma
[docs]def mean_normlztn(Y, R): """Performs mean normalization in a numeric dataset. :param Y: Scores' dataset. :type Y: numpy.array :param R: Dataset of 0s and 1s (whether there's a rating). :type R: numpy.array :returns: - Y_norm - Normalized scores' dataset (row wise). - Y_mean - Column vector of calculated means. :rtype: - Y_norm (:py:class: numpy.array) - Y_mean (:py:class: numpy.array) """ m, n = Y.shape Y_mean = zeros((m, 1)) Y_norm = zeros((m, n)) for i in range(len(R)): idx = where(R[i, :] == 1)[0] Y_mean[i] = mean(Y[i, idx]) Y_norm[i, idx] = Y[i, idx] - Y_mean[i] return Y_norm, Y_mean