Source code for touvlo.lgx_rg.sgl_parm

"""
.. module:: sgl_parm
    :synopsis: Provides routines to construct a Logistic Regression of
        single parameter theta.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from numpy import log, zeros

from touvlo.utils import g


# predict function
[docs]def p(x, threshold=0.5):
    """Predicts whether a probability falls into class 1.

    Args:
        x (obj): Probability that example belongs to class 1.
        threshold (float): point above which a probability is deemed of class
            1.

    Returns:
        int: Binary value to denote class 1 or 0
    """
    prediction = None
    if x >= threshold:
        prediction = 1
    else:
        prediction = 0

    return prediction


[docs]def h(X, theta):
    """Logistic regression hypothesis.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        theta (numpy.array): Column vector of model's parameters.

    Raises:
        ValueError

    Returns:
        numpy.array: The probability that each entry belong to class 1.
    """
    return g(X.dot(theta))


[docs]def cost_func(X, Y, theta):
    """Computes the cost function J for Logistic Regression.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        Y (numpy.array): Column vector of expected values.
        theta (numpy.array): Column vector of model's parameters.

    Returns:
        float: Computed cost.
    """
    m = len(Y)
    J = (1 / m) * ((-Y.T).dot(log(h(X, theta)))
                   - ((1 - Y).T).dot(log(1 - h(X, theta))))
    return J


[docs]def reg_cost_func(X, Y, theta, _lambda):
    """Computes the regularized cost function J for Logistic Regression.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        Y (numpy.array): Column vector of expected values.
        theta (numpy.array): Column vector of model's parameters.
        _lambda (float): The regularization hyperparameter.

    Returns:
        float: Computed cost with regularization.
    """
    m = len(Y)
    J = - (1 / m) * ((Y.T).dot(log(h(X, theta)))
                     + (1 - Y.T).dot(log(1 - h(X, theta))))
    J = J + (_lambda / (2 * m)) * ((theta[1:, :]).T).dot(theta[1:, :])
    return J


[docs]def grad(X, Y, theta):
    """Computes the gradient for the parameters theta.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        Y (numpy.array): Column vector of expected values.
        theta (numpy.array): Column vector of model's parameters.

    Returns:
        numpy.array: Gradient column vector.
    """
    m = len(Y)
    grad = (1 / m) * (X.T).dot(h(X, theta) - Y)
    return grad


[docs]def reg_grad(X, Y, theta, _lambda):
    """Computes the regularized gradient for Logistic Regression.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        Y (numpy.array): Column vector of expected values.
        theta (numpy.array): Column vector of model's parameters.
        _lambda (float): The regularization hyperparameter.

    Returns:
        numpy.array: Regularized gradient column vector.
    """
    m = len(Y)
    grad = zeros(theta.shape)

    grad = (1 / m) * (X.T).dot(h(X, theta) - Y)
    grad[1:, :] = grad[1:, :] + (_lambda / m) * theta[1:, :]

    return grad


[docs]def predict_prob(X, theta):
    """Produces the probability that the entries belong to class 1.

    Returns:
        X (numpy.array): Features' dataset plus bias column.
        theta (numpy.array): Column vector of model's parameters.

    Raises:
        ValueError

    Returns:
        numpy.array: The probability that each entry belong to class 1.
    """
    return g(X.dot(theta))


[docs]def predict(X, theta):
    """Classifies each entry as class 1 or class 0.

    Args:
        X (numpy.array): Features' dataset plus bias column.
        theta (numpy.array): Column vector of model's parameters.

    Returns:
        numpy.array: Column vector with each entry classification.
    """
    return p(predict_prob(X, theta))