"""
.. module:: anmly_detc
:synopsis: Provides routines to perform Anomaly Detection.
.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""
from functools import reduce
from math import pi
from numpy import array, mean, var, sqrt, exp, power, sum, multiply
from numpy.linalg import det, inv
# predict function
[docs]def is_anomaly(p, threshold=0.5):
"""Predicts whether a probability falls into class 1 (anomaly).
Args:
p (numpy.array): Probability that example belongs to class 1 (is
anomaly).
threshold (float): point below which an example is considered of class
1.
Returns:
int: Binary value to denote class 1 or 0
"""
prediction = array([[1] if el < threshold else [0] for el in p])
return prediction
[docs]def cov_matrix(X, mu):
"""Calculates the covariance matrix for matrix X (m x n).
Args:
X (numpy.array): Features' dataset.
mu (numpy.array): Mean of each feature/column of.
Returns:
int: Covariance matrix (n x n)
"""
m, n = X.shape
X_minus_mu = X - mu
sigma = (1 / m) * (X_minus_mu.T).dot(X_minus_mu)
return sigma
[docs]def estimate_uni_gaussian(X):
"""Estimates parameters for Univariate Gaussian distribution.
Args:
X (numpy.array): Features' dataset.
Returns:
(numpy.array, numpy.array): A 2-tuple of mu, the mean of each
feature/column of X, and sigma2, the variance of each
feature/column of X.
"""
mu = mean(X, axis=0)
sigma2 = var(X, axis=0)
return mu, sigma2
[docs]def estimate_multi_gaussian(X):
"""Estimates parameters for Multivariate Gaussian distribution.
Args:
X (numpy.array): Features' dataset.
Returns:
(numpy.array, numpy.array): A 2-tuple of mu, the mean of each
feature/column of X, and sigma, the covariance matrix for X.
"""
m, n = X.shape
mu = mean(X, axis=0)
sigma = cov_matrix(X, mu)
return mu, sigma
[docs]def uni_gaussian(X, mu, sigma2):
"""Estimates probability that examples belong to Univariate Gaussian.
Args:
X (numpy.array): Features' dataset.
mu (numpy.array): Mean of each feature/column of X.
sigma2 (numpy.array): Variance of each feature/column of X.
Returns:
numpy.array: Probability density function for each example
"""
p = (1 / sqrt(2 * pi * sigma2))
p = p * exp(-power(X - mu, 2) / (2 * sigma2))
def prod(x, y):
return x * y
p = array([[reduce(prod, el)] for el in p])
return p
[docs]def multi_gaussian(X, mu, sigma):
"""Estimates probability that examples belong to Multivariate Gaussian.
Args:
X (numpy.array): Features' dataset.
mu (numpy.array): Mean of each feature/column of X.
sigma (numpy.array): Covariance matrix for X.
Returns:
numpy.array: Probability density function for each example
"""
m, n = X.shape
X = X - mu
factor = X.dot(inv(sigma))
factor = multiply(factor, X)
factor = - (1 / 2) * sum(factor, axis=1, keepdims=True)
p = 1 / (power(2 * pi, n / 2) * sqrt(det(sigma)))
p = p * exp(factor)
return p
[docs]def predict(X, epsilon, gaussian, **kwargs):
"""Predicts whether examples are anomalies.
Args:
X (numpy.array): Features' dataset.
epsilon (float): point below which an example is considered of class 1.
gaussian (numpy.array): Function that estimates pertinency probability.
Returns:
numpy.array: Column vector of classification
"""
p = gaussian(X=X, **kwargs)
return is_anomaly(p, threshold=epsilon)