PCA / NNMF Example

Initial imports

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.decomposition import PCA, NMF
from sklearn.preprocessing import scale

iris = datasets.load_iris();

PCA

X = iris.data
y = iris.target
X_scale = scale(X)
target_names = iris.target_names

pca = PCA(n_components=2)
X_r = pca.fit(X_scale).transform(X_scale)

# Same as above
X_r = pca.fit_transform(X_scale)

# Print loadings for PC1
print(pca.components_[0, :])

# Print scores of first data point
print(X_r[0, :])
[ 0.52106591 -0.26934744  0.5804131   0.56485654]
[-2.26470281  0.4800266 ]
# Percentage of variance explained for each component
print(pca.explained_variance_ratio_)
[0.72962445 0.22850762]

scale(X) centers each feature to mean 0 and scales it to unit variance before PCA.

colors = ['navy', 'turquoise', 'darkorange']

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')
plt.xlabel("PC 1")
plt.ylabel("PC 2");

NNMF

nn = NMF(n_components=2)
X_n = nn.fit_transform(X)
/home/runner/work/ml-for-bioe/ml-for-bioe/.venv/lib/python3.14/site-packages/sklearn/decomposition/_nmf.py:1720: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.
  warnings.warn(
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_n[y == i, 0], X_n[y == i, 1], color=color, alpha=.8, label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('NMF of IRIS dataset')
plt.xlabel("Component 1")
plt.ylabel("Component 2");

# Percent variance explained
1 - nn.reconstruction_err_ / np.linalg.norm(X_n)
np.float64(0.636716930350203)