import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.decomposition import PCA, NMF
from sklearn.preprocessing import scale
= datasets.load_iris(); iris
PCA / NNMF Example
Initial imports
PCA
= iris.data
X = iris.target
y = scale(X)
X_scale = iris.target_names
target_names
= PCA(n_components=2)
pca = pca.fit(X_scale).transform(X_scale)
X_r
# Same as above
= pca.fit_transform(X_scale)
X_r
# Print PC1 loadings
print(pca.components_[:, 0])
# Print scores of first data point
print(X_r[0, :])
[0.52106591 0.37741762]
[-2.26470281 0.4800266 ]
# Percentage of variance explained for each component
print(pca.explained_variance_ratio_)
[0.72962445 0.22850762]
= ['navy', 'turquoise', 'darkorange']
colors
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
== i, 0], X_r[y == i, 1], color=color, alpha=.8, label=target_name)
plt.scatter(X_r[y ='best', shadow=False, scatterpoints=1)
plt.legend(loc'PCA of IRIS dataset')
plt.title("PC 1")
plt.xlabel("PC 2"); plt.ylabel(
NNMF
= NMF(n_components=2)
nn = nn.fit_transform(X) X_n
/home/runner/work/ml-for-bioe/ml-for-bioe/site/content/examples/.venv/lib/python3.12/site-packages/sklearn/decomposition/_nmf.py:1742: ConvergenceWarning:
Maximum number of iterations 200 reached. Increase it to improve convergence.
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
== i, 0], X_n[y == i, 1], color=color, alpha=.8, label=target_name)
plt.scatter(X_n[y ='best', shadow=False, scatterpoints=1)
plt.legend(loc'PCA of IRIS dataset')
plt.title("PC 1")
plt.xlabel("PC 2"); plt.ylabel(
# Percent variance explained
1 - nn.reconstruction_err_ / np.linalg.norm(X_n)
np.float64(0.6367169303502034)