Source code for touvlo.unsupv.pca

"""
.. module:: pca
    :synopsis: Provides routines to perform Principal Component Analysis.

.. moduleauthor:: Benardi Nunes <benardinunes@gmail.com>
"""

from numpy.linalg import svd
from numpy import diag


[docs]def pca(X): """Runs Principal Component Analysis on dataset Args: X (numpy.array): Features' dataset Returns: (numpy.array, numpy.array): A 2-tuple of U, eigenvectors of covariance matrix, and S, eigenvalues (on diagonal) of covariance matrix. """ m, n = X.shape Sigma = (1 / m) * X.T.dot(X) U, S, V = svd(Sigma) S = diag(S) return U, S
[docs]def project_data(X, U, k): """Computes reduced data representation (projected data) Args: X (numpy.array): Normalized features' dataset U (numpy.array): eigenvectors of covariance matrix k (int): Number of features in reduced data representation Returns: numpy.array: Reduced data representation (projection) """ U_reduce = U[:, 0:k] Z = X.dot(U_reduce) return Z
[docs]def recover_data(Z, U, k): """Recovers an approximation of original data using the projected data Args: Z (numpy.array): Reduced data representation (projection) U (numpy.array): eigenvectors of covariance matrix k (int): Number of features in reduced data representation Returns: numpy.array: Approximated features' dataset """ X_rec = Z.dot(U[:, 0:k].T) return X_rec