"""
.. module:: sgl_parm
:synopsis: Provides routines to construct a Logistic Regression of
single parameter theta.
.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""
from numpy import log, zeros
from touvlo.utils import g
# predict function
[docs]def p(x, threshold=0.5):
"""Predicts whether a probability falls into class 1.
Args:
x (obj): Probability that example belongs to class 1.
threshold (float): point above which a probability is deemed of class
1.
Returns:
int: Binary value to denote class 1 or 0
"""
prediction = None
if x >= threshold:
prediction = 1
else:
prediction = 0
return prediction
[docs]def h(X, theta):
"""Logistic regression hypothesis.
Args:
X (numpy.array): Features' dataset plus bias column.
theta (numpy.array): Column vector of model's parameters.
Raises:
ValueError
Returns:
numpy.array: The probability that each entry belong to class 1.
"""
return g(X.dot(theta))
[docs]def cost_func(X, Y, theta):
"""Computes the cost function J for Logistic Regression.
Args:
X (numpy.array): Features' dataset plus bias column.
Y (numpy.array): Column vector of expected values.
theta (numpy.array): Column vector of model's parameters.
Returns:
float: Computed cost.
"""
m = len(Y)
J = (1 / m) * ((-Y.T).dot(log(h(X, theta)))
- ((1 - Y).T).dot(log(1 - h(X, theta))))
return J
[docs]def reg_cost_func(X, Y, theta, _lambda):
"""Computes the regularized cost function J for Logistic Regression.
Args:
X (numpy.array): Features' dataset plus bias column.
Y (numpy.array): Column vector of expected values.
theta (numpy.array): Column vector of model's parameters.
_lambda (float): The regularization hyperparameter.
Returns:
float: Computed cost with regularization.
"""
m = len(Y)
J = - (1 / m) * ((Y.T).dot(log(h(X, theta)))
+ (1 - Y.T).dot(log(1 - h(X, theta))))
J = J + (_lambda / (2 * m)) * ((theta[1:, :]).T).dot(theta[1:, :])
return J
[docs]def grad(X, Y, theta):
"""Computes the gradient for the parameters theta.
Args:
X (numpy.array): Features' dataset plus bias column.
Y (numpy.array): Column vector of expected values.
theta (numpy.array): Column vector of model's parameters.
Returns:
numpy.array: Gradient column vector.
"""
m = len(Y)
grad = (1 / m) * (X.T).dot(h(X, theta) - Y)
return grad
[docs]def reg_grad(X, Y, theta, _lambda):
"""Computes the regularized gradient for Logistic Regression.
Args:
X (numpy.array): Features' dataset plus bias column.
Y (numpy.array): Column vector of expected values.
theta (numpy.array): Column vector of model's parameters.
_lambda (float): The regularization hyperparameter.
Returns:
numpy.array: Regularized gradient column vector.
"""
m = len(Y)
grad = zeros(theta.shape)
grad = (1 / m) * (X.T).dot(h(X, theta) - Y)
grad[1:, :] = grad[1:, :] + (_lambda / m) * theta[1:, :]
return grad
[docs]def predict_prob(X, theta):
"""Produces the probability that the entries belong to class 1.
Returns:
X (numpy.array): Features' dataset plus bias column.
theta (numpy.array): Column vector of model's parameters.
Raises:
ValueError
Returns:
numpy.array: The probability that each entry belong to class 1.
"""
return g(X.dot(theta))
[docs]def predict(X, theta):
"""Classifies each entry as class 1 or class 0.
Args:
X (numpy.array): Features' dataset plus bias column.
theta (numpy.array): Column vector of model's parameters.
Returns:
numpy.array: Column vector with each entry classification.
"""
return p(predict_prob(X, theta))