Source code for touvlo.nnet.cmpt_grf

"""
.. module:: cmpt_grf
    :synopsis: Provides routines to construct a Computation Graph based
        Classification Neural Network.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from numpy.random import seed, randn
from numpy import (dot, log, divide, zeros, squeeze)
from numpy import sum as add

from touvlo.utils import sigmoid, sigmoid_backward, relu, relu_backward


[docs]def init_params(layer_dims, _seed=1): """Creates numpy arrays to to represent the weight matrices and intercepts of the Neural Network. Args: layer_dims (list[int]): List of numbers representing the dimensions of each layer in our network. _seed (int): Seed to make function reproducible despite randomness. Returns: dict: Single dictionary containing your parameters "W1", "b1", ..., "WL", "bL" where Wl is a weight matrix of shape (layer_dims[l], layer_dims[l-1]) and bl is the bias vector of shape (layer_dims[l], 1). """ seed(_seed) parameters = {} L = len(layer_dims) # number of layers in the network for l in range(1, L): parameters['W' + str(l)] = randn(layer_dims[l], layer_dims[l - 1]) * 0.01 parameters['b' + str(l)] = zeros(shape=(layer_dims[l], 1)) return parameters
[docs]def linear_forward(A, W, b): """ Implement the linear part of a layer's forward propagation. Args: A (numpy.array): activations from previous layer (or input data): (size of previous layer, number of examples). W (numpy.array): weights matrix: numpy array of shape (size of current layer, size of previous layer). b (numpy.array): bias vector, numpy array of shape (size of the current layer, 1). Returns: (numpy.array, dict): A 2-tuple consisting of the input of the activation function, also called pre-activation parameter and a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently. """ Z = dot(W, A) + b cache = (A, W, b) return Z, cache
[docs]def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Args: A_prev (numpy.array): activations from previous layer (or input data): (size of previous layer, number of examples) W (numpy.array) weights matrix: numpy array of shape (size of current layer, size of previous layer) b (float): bias vector, numpy array of shape (size of the current layer, 1) activation (str): the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: (numpy.array, dict): A 2-tuple consisting of the output of the activation function, also called the post-activation value and a python tuple containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently. """ if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) cache = (linear_cache, activation_cache) return A, cache
[docs]def L_model_forward(X, parameters): """ Implements forward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID computation. Args: X (numpy.array) data of shape (input size, number of examples) parameters (dict) output of initialize_parameters_deep() Returns: (numpy.array, list[tuple]): A 2-tuple consisting of the last post-activation value and a list of caches containing every cache of linear_activation_forward(), (there are L-1 of them, indexed from 0 to L-1). """ caches = [] A = X # number of layers in the neural network L = len(parameters) // 2 # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list. for l in range(1, L): A_prev = A A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu") caches.append(cache) # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list. AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid") caches.append(cache) return AL, caches
[docs]def compute_cost(AL, Y): """ Implements the cost function. Args: AL (numpy.array): probability vector corresponding to your label predictions, shape (1, number of examples) Y (numpy.array): true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples) Returns: float: cross-entropy cost """ m = Y.shape[1] # Compute loss from aL and y. cost = -(1 / m) * add(Y * log(AL) + (1 - Y) * log(1 - AL)) # this turns [[17]] into 17 cost = squeeze(cost) return cost
[docs]def linear_backward(dZ, cache): """ Implements the linear portion of backward propagation for a single layer (layer l). Args: dZ (numpy.array): Gradient of the cost with respect to the linear output (of current layer l). cache (tuple): values (A_prev, W, b) coming from the forward propagation in the current layer. Returns: (numpy.array, numpy.array, float): A 3-tuple consisting of the Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev, the Gradient of the cost with respect to W (current layer l),same shape as W and the Gradient of the cost with respect to b (current layer l), same shape as b. """ A_prev, W, b = cache m = A_prev.shape[1] dW = (1 / m) * dot(dZ, A_prev.T) db = (1 / m) * add(dZ, axis=1, keepdims=True) dA_prev = dot(W.T, dZ) return dA_prev, dW, db
[docs]def linear_activation_backward(dA, cache, activation): """ Implements the backward propagation for the LINEAR -> ACTIVATION layer. Args: dA (numpy.array): post-activation gradient for current layer l. cache (tuple): values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation (str): the activation to be used in this layer, stored as a text string: "sigmoid" or "relu". Returns: (numpy.array, numpy.array, float): A 3-tuple consisting of the Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev, the Gradient of the cost with respect to W (current layer l), same shape as W and the Gradient of the cost with respect to b (current layer l), same shape as b. """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
[docs]def L_model_backward(AL, Y, caches): """ Implements the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group. Args: AL (numpy.array): probability vector, output of the forward propagation (L_model_forward()). Y (numpy.array): true "label" vector (containing 0 if non-cat, 1 if cat) caches (list(tuple)): list of caches containing every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]). Returns: (dict): A dictionary with the gradients: - grads["dA" + str(l)] = ... - grads["dW" + str(l)] = ... - grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation dAL = - (divide(Y, AL) - divide(1 - Y, 1 - AL)) # Lth layer (SIGMOID -> LINEAR) gradients. # Inputs: "dAL, current_cache". # Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"] current_cache = caches[L - 1] (grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)]) = linear_activation_backward(dAL, current_cache, "sigmoid") # Loop from l=L-2 to l=0 for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 1)], current_cache". # Outputs: "grads["dA" + str(l)] , # grads["dW" + str(l + 1)], # grads["db" + str(l + 1)] current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_activation_backward( grads["dA" + str(l + 1)], current_cache, "relu") grads["dA" + str(l)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
[docs]def update_parameters(parameters, grads, learning_rate): """ Updates parameters using gradient descent. Args: parameters (dict): dictionary containing your parameters grads (dict): dictionary containing your gradients, output of L_model_backward Returns: (dict) dictionary containing your updated parameters - parameters["W" + str(l)] = ... - parameters["b" + str(l)] = ... """ L = len(parameters) // 2 # number of layers in the neural network # Update rule for each parameter. Use a for loop. for l in range(L): parameters["W" + str(l + 1)] -= learning_rate * \ grads["dW" + str(l + 1)] parameters["b" + str(l + 1)] -= learning_rate * \ grads["db" + str(l + 1)] return parameters