"""
.. module:: utils
:synopsis: Provides routines of interest to different ML models.
.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""
from numpy import (zeros, copy, std, mean, float64, exp, seterr,
where, array, maximum)
# sigmoid gradient function
[docs]def g(x):
"""This function applies the sigmoid function on a given value.
Args:
x (obj): Input value or object containing value .
Returns:
obj: Sigmoid function at value.
"""
return 1 / (1 + exp(-x))
# sigmoid gradient function
[docs]def g_grad(x):
"""This function calculates the sigmoid gradient at a given value.
Args:
x (obj): Input value or object containing value .
Returns:
obj: Sigmoid gradient at value.
"""
s = g(x)
return s * (1 - s)
[docs]def sigmoid(Z):
"""
Implements the sigmoid activation in numpy
Arguments:
Z -- numpy array of any shape
Returns:
A -- output of sigmoid(z), same shape as Z
cache -- returns Z as well, useful during backpropagation
"""
A = 1 / (1 + exp(-Z))
cache = Z
return A, cache
[docs]def relu(Z):
"""
Implement the RELU function.
Arguments:
Z -- Output of the linear layer, of any shape
Returns:
A -- Post-activation parameter, of the same shape as Z
cache -- a python dictionary containing "A" ; stored for
computing the backward pass efficiently
"""
A = maximum(0, Z)
assert(A.shape == Z.shape)
cache = Z
return A, cache
[docs]def relu_backward(dA, cache):
"""
Implement the backward propagation for a single RELU unit.
Arguments:
dA -- post-activation gradient, of any shape
cache -- 'Z' where we store for computing backward propagation efficiently
Returns:
dZ -- Gradient of the cost with respect to Z
"""
Z = cache
dZ = array(dA, copy=True) # just converting dz to a correct object.
# When z <= 0, you should set dz to 0 as well.
dZ[Z <= 0] = 0
assert (dZ.shape == Z.shape)
return dZ
[docs]def sigmoid_backward(dA, cache):
"""
Implement the backward propagation for a single SIGMOID unit.
Arguments:
dA -- post-activation gradient, of any shape
cache -- 'Z' where we store for computing backward propagation efficiently
Returns:
dZ -- Gradient of the cost with respect to Z
"""
Z = cache
s = 1 / (1 + exp(-Z))
dZ = dA * s * (1 - s)
assert (dZ.shape == Z.shape)
return dZ
[docs]def BGD(X, y, grad, initial_theta,
alpha, num_iters, **kwargs):
"""Performs parameter optimization via Batch Gradient Descent.
Args:
X (numpy.array): Features' dataset plus bias column.
y (numpy.array): Column vector of expected values.
grad (numpy.array): Routine that generates the partial derivatives
given theta.
initial_theta (numpy.array): Initial value for parameters to be
optimized.
alpha (float): Learning rate or _step size of the optimization.
num_iters (int): Number of times the optimization will be performed.
Returns:
numpy.array: Optimized model parameters.
"""
theta = copy(initial_theta)
for _ in range(num_iters):
theta = theta - alpha * grad(X, y, theta, **kwargs)
return theta
[docs]def SGD(X, y, grad, initial_theta,
alpha, num_iters, **kwargs):
"""Performs parameter optimization via Stochastic Gradient Descent.
Args:
X (numpy.array): Features' dataset plus bias column.
y (numpy.array): Column vector of expected values.
grad (numpy.array): Routine that generates the partial derivatives
given theta.
initial_theta (numpy.array): Initial value for parameters to be
optimized.
alpha (float): Learning rate or _step size of the optimization.
num_iters (int): Number of times the optimization will be performed.
Returns:
numpy.array: Optimized model parameters.
"""
m = len(y)
theta = copy(initial_theta)
for _ in range(num_iters):
for i in range(m):
theta = theta - alpha * grad(X[[i], :], y[[i], :], theta, **kwargs)
return theta
[docs]def MBGD(X, y, grad, initial_theta,
alpha, num_iters, b, **kwargs):
"""Performs parameter optimization via Mini-Batch Gradient Descent.
Args:
X (numpy.array): Features' dataset plus bias column.
y (numpy.array): Column vector of expected values.
grad (numpy.array): Routine that generates the partial derivatives
given theta.
initial_theta (numpy.array): Initial value for parameters to be
optimized.
alpha (float): Learning rate or _step size of the optimization.
num_iters (int): Number of times the optimization will be performed.
b (int): Number of examples in mini batch.
Returns:
numpy.array: Optimized model parameters.
"""
m = len(y)
theta = copy(initial_theta)
_steps = [el for el in range(0, m, b)]
for _ in range(num_iters):
for _step in _steps[:-1]:
theta = theta - alpha * grad(X[_step:(_step + b), :],
y[_step:(_step + b), :],
theta, **kwargs)
theta = theta - alpha * grad(X[_steps[-1]:, :],
y[_steps[-1]:, :],
theta, **kwargs)
return theta
[docs]def numerical_grad(J, theta, err):
"""Numerically calculates the gradient of a given cost function.
Args:
J (Callable): Function handle that computes cost given theta.
theta (numpy.array): Model parameters.
err (float): distance between points where J is evaluated.
Returns:
numpy.array: Computed numeric gradient.
"""
num_grad = zeros(theta.shape, dtype=float64)
perturb = zeros(theta.shape, dtype=float64)
for i in range(len(theta)):
perturb[i] = err
loss1 = J(theta - perturb)
loss2 = J(theta + perturb)
num_grad[i] = (loss2 - loss1) / (2 * err)
perturb[i] = 0
return num_grad
[docs]def feature_normalize(X):
"""Performs Z score normalization in a numeric dataset.
Args:
X (numpy.array): Features' dataset plus bias column.
Returns:
(numpy.array, numpy.array, numpy.array): A 3-tuple of X_norm,
normalized features' dataset, mu, mean of each feature, and sigma,
standard deviation of each feature.
"""
seterr(divide='ignore', invalid='ignore')
mu = mean(X, axis=0)
sigma = std(X, axis=0, ddof=1)
X_norm = (X - mu) / sigma
return X_norm, mu, sigma
[docs]def mean_normlztn(Y, R):
"""Performs mean normalization in a numeric dataset.
:param Y: Scores' dataset.
:type Y: numpy.array
:param R: Dataset of 0s and 1s (whether there's a rating).
:type R: numpy.array
:returns:
- Y_norm - Normalized scores' dataset (row wise).
- Y_mean - Column vector of calculated means.
:rtype:
- Y_norm (:py:class: numpy.array)
- Y_mean (:py:class: numpy.array)
"""
m, n = Y.shape
Y_mean = zeros((m, 1))
Y_norm = zeros((m, n))
for i in range(len(R)):
idx = where(R[i, :] == 1)[0]
Y_mean[i] = mean(Y[i, idx])
Y_norm[i, idx] = Y[i, idx] - Y_mean[i]
return Y_norm, Y_mean