Analyse en composantes principales avec python

Published: 14 novembre 2016

DMCA.com Protection Status

Quelques exemples/tests pour comprendre/faire une analyse en composantes principales (PCA Principal component analysis ) avec python:

Exemple 1 avec sklearn

Analyse en composantes principales en passant par scikit-learn

pca with sklearn
pca with sklearn

from sklearn.decomposition import PCA

import numpy as np
import matplotlib.pyplot as plt

#----------------------------------------------------------------------------------------#

mean = [0,0]
cov = [[40,25],[10,1]]

x1,x2 = np.random.multivariate_normal(mean,cov,1000).T

X = np.c_[x1,x2]

plt.xlim(-25.0,25.0)
plt.ylim(-25.0,25.0)
plt.grid()

plt.scatter(X[:,0],X[:,1])

#----------------------------------------------------------------------------------------#

pca = PCA(n_components=2)

pca.fit(X)

#print(pca.explained_variance_ratio_) 
#print(pca.components_)

axis = pca.components_.T
axis /= axis.std()
x_axis, y_axis = axis

plt.plot(0.1 * x_axis, 0.1 * y_axis, linewidth=1 )
plt.quiver(0, 0, x_axis, y_axis, zorder=11, width=0.01, scale=6, color='red')

plt.savefig('pca_example_1.png')
plt.show()
Exemple 2 ICA et PCA avec sklearn

Comparaison entre ICA (independent component analysis) et PCA (Principal component analysis) (source)

comparison ICA PCA avec sklearn
comparison ICA PCA avec sklearn

# Authors: Alexandre Gramfort, Gael Varoquaux
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, FastICA

rng = np.random.RandomState(42)
S = rng.standard_t(1.5, size=(20000, 2))
S[:, 0] *= 2.

# Mix data
A = np.array([[1, 1], [0, 2]])  # Mixing matrix

X = np.dot(S, A.T)  # Generate observations

pca = PCA()
S_pca_ = pca.fit(X).transform(X)

ica = FastICA(random_state=rng)
S_ica_ = ica.fit(X).transform(X)  # Estimate the sources

S_ica_ /= S_ica_.std(axis=0)

def plot_samples(S, axis_list=None):
    plt.scatter(S[:, 0], S[:, 1], s=2, marker='o', zorder=10,
                color='steelblue', alpha=0.5)
    if axis_list is not None:
        colors = ['orange', 'red']
        for color, axis in zip(colors, axis_list):
            axis /= axis.std()
            x_axis, y_axis = axis
            # Trick to get legend to work
            plt.plot(0.1 * x_axis, 0.1 * y_axis, linewidth=2, color=color)
            plt.quiver(0, 0, x_axis, y_axis, zorder=11, width=0.01, scale=6,
                       color=color)

    plt.hlines(0, -3, 3)
    plt.vlines(0, -3, 3)
    plt.xlim(-3, 3)
    plt.ylim(-3, 3)
    plt.xlabel('x')
    plt.ylabel('y')

plt.figure()
plt.subplot(2, 2, 1)
plot_samples(S / S.std())
plt.title('True Independent Sources')

axis_list = [pca.components_.T, ica.mixing_]

plt.subplot(2, 2, 2)
plot_samples(X / np.std(X), axis_list=axis_list)
legend = plt.legend(['PCA', 'ICA'], loc='upper right')
legend.set_zorder(100)

plt.title('Observations')

plt.subplot(2, 2, 3)
plot_samples(S_pca_ / np.std(S_pca_, axis=0))
plt.title('PCA recovered signals')

plt.subplot(2, 2, 4)
plot_samples(S_ica_ / np.std(S_ica_))
plt.title('ICA recovered signals')

plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.36)

plt.savefig('pca_example_2.png',bbox_inches='tight')
plt.show()
Exemple 3 avec numpy

Exemple d'analyse en composantes principales en utilisant numpy (source by unutbu)

pca using numpy size:30
pca using numpy size:30

# http://stackoverflow.com/questions/18299523/basic-example-for-pca-with-matplotlib
# unutbu

import numpy as np
import matplotlib.pyplot as plt

N = 1000
xTrue = np.linspace(0, 1000, N)
yTrue = 3 * xTrue
xData = xTrue + np.random.normal(0, 100, N)
yData = yTrue + np.random.normal(0, 100, N)
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
data = np.hstack((xData, yData))

mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
print(eigenvectors)

fig, ax = plt.subplots()
ax.scatter(xData, yData)
for axis in eigenvectors:
    start, end = mu, mu + sigma * axis
    ax.annotate(
        '', xy=end, xycoords='data',
        xytext=start, textcoords='data',
        arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')

plt.grid()
plt.xticks(rotation=90)
plt.savefig('pca_example_3.png',bbox_inches='tight')
plt.show()

Recherches associées

Image

of