Quelques exemples/tests pour comprendre/faire une analyse en composantes principales (PCA Principal component analysis ) avec python:
Exemple 1 avec sklearn
Analyse en composantes principales en passant par scikit-learn
from sklearn.decomposition import PCAimport numpy as npimport matplotlib.pyplot as plt#----------------------------------------------------------------------------------------#mean = [0,0]cov = [[40,25],[10,1]]x1,x2 = np.random.multivariate_normal(mean,cov,1000).TX = np.c_[x1,x2]plt.xlim(-25.0,25.0)plt.ylim(-25.0,25.0)plt.grid()plt.scatter(X[:,0],X[:,1])#----------------------------------------------------------------------------------------#pca = PCA(n_components=2)pca.fit(X)#print(pca.explained_variance_ratio_)#print(pca.components_)axis = pca.components_.Taxis /= axis.std()x_axis, y_axis = axisplt.plot(0.1 * x_axis, 0.1 * y_axis, linewidth=1 )plt.quiver(0, 0, x_axis, y_axis, zorder=11, width=0.01, scale=6, color='red')plt.savefig('pca_example_1.png')plt.show()
Exemple 2 ICA et PCA avec sklearn
Comparaison entre ICA (independent component analysis) et PCA (Principal component analysis) (source)

# Authors: Alexandre Gramfort, Gael Varoquaux# License: BSD 3 clauseimport numpy as npimport matplotlib.pyplot as pltfrom sklearn.decomposition import PCA, FastICArng = np.random.RandomState(42)S = rng.standard_t(1.5, size=(20000, 2))S[:, 0] *= 2.# Mix dataA = np.array([[1, 1], [0, 2]]) # Mixing matrixX = np.dot(S, A.T) # Generate observationspca = PCA()S_pca_ = pca.fit(X).transform(X)ica = FastICA(random_state=rng)S_ica_ = ica.fit(X).transform(X) # Estimate the sourcesS_ica_ /= S_ica_.std(axis=0)def plot_samples(S, axis_list=None):plt.scatter(S[:, 0], S[:, 1], s=2, marker='o', zorder=10,color='steelblue', alpha=0.5)if axis_list is not None:colors = ['orange', 'red']for color, axis in zip(colors, axis_list):axis /= axis.std()x_axis, y_axis = axis# Trick to get legend to workplt.plot(0.1 * x_axis, 0.1 * y_axis, linewidth=2, color=color)plt.quiver(0, 0, x_axis, y_axis, zorder=11, width=0.01, scale=6,color=color)plt.hlines(0, -3, 3)plt.vlines(0, -3, 3)plt.xlim(-3, 3)plt.ylim(-3, 3)plt.xlabel('x')plt.ylabel('y')plt.figure()plt.subplot(2, 2, 1)plot_samples(S / S.std())plt.title('True Independent Sources')axis_list = [pca.components_.T, ica.mixing_]plt.subplot(2, 2, 2)plot_samples(X / np.std(X), axis_list=axis_list)legend = plt.legend(['PCA', 'ICA'], loc='upper right')legend.set_zorder(100)plt.title('Observations')plt.subplot(2, 2, 3)plot_samples(S_pca_ / np.std(S_pca_, axis=0))plt.title('PCA recovered signals')plt.subplot(2, 2, 4)plot_samples(S_ica_ / np.std(S_ica_))plt.title('ICA recovered signals')plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.36)plt.savefig('pca_example_2.png',bbox_inches='tight')plt.show()
Exemple 3 avec numpy
Exemple d'analyse en composantes principales en utilisant numpy (source by unutbu)
# http://stackoverflow.com/questions/18299523/basic-example-for-pca-with-matplotlib# unutbuimport numpy as npimport matplotlib.pyplot as pltN = 1000xTrue = np.linspace(0, 1000, N)yTrue = 3 * xTruexData = xTrue + np.random.normal(0, 100, N)yData = yTrue + np.random.normal(0, 100, N)xData = np.reshape(xData, (N, 1))yData = np.reshape(yData, (N, 1))data = np.hstack((xData, yData))mu = data.mean(axis=0)data = data - mu# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA resultseigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)projected_data = np.dot(data, eigenvectors)sigma = projected_data.std(axis=0).mean()print(eigenvectors)fig, ax = plt.subplots()ax.scatter(xData, yData)for axis in eigenvectors:start, end = mu, mu + sigma * axisax.annotate('', xy=end, xycoords='data',xytext=start, textcoords='data',arrowprops=dict(facecolor='red', width=2.0))ax.set_aspect('equal')plt.grid()plt.xticks(rotation=90)plt.savefig('pca_example_3.png',bbox_inches='tight')plt.show()


