import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from ipywidgets import interact, fixed
%matplotlib inline
import matplotlib_inline
'svg')
matplotlib_inline.backend_inline.set_matplotlib_formats(
# use seaborn plotting defaults
import seaborn as sns; sns.set_theme()
SVM Example
This notebook contains an excerpt from the Python Data Science Handbook by Jake VanderPlas; the content is available on GitHub. The text is released under the CC-BY-NC-ND license, and code is released under the MIT license.
In-Depth: Support Vector Machines
Motivating Support Vector Machines
from sklearn.datasets import make_blobs
= make_blobs(n_samples=50, centers=2,
X, y =0, cluster_std=0.60)
random_state0], X[:, 1], c=y, s=50, cmap='autumn'); plt.scatter(X[:,
= np.linspace(-1, 3.5)
xfit 0], X[:, 1], c=y, s=50, cmap='autumn')
plt.scatter(X[:, 0.6], [2.1], 'x', color='red', markeredgewidth=2, markersize=10)
plt.plot([
for m, b in [(1, 0.65), (0.5, 1.6), (-0.2, 2.9)]:
* xfit + b, '-k')
plt.plot(xfit, m
-1, 3.5); plt.xlim(
Support Vector Machines: Maximizing the Margin
Support vector machines offer one way to improve on this. The intuition is this: rather than simply drawing a zero-width line between the classes, we can draw around each line a margin of some width, up to the nearest point. Here is an example of how this might look:
= np.linspace(-1, 3.5)
xfit 0], X[:, 1], c=y, s=50, cmap='autumn')
plt.scatter(X[:,
for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
= m * xfit + b
yfit '-k')
plt.plot(xfit, yfit, - d, yfit + d, edgecolor='none',
plt.fill_between(xfit, yfit ='#AAAAAA', alpha=0.4)
color
-1, 3.5); plt.xlim(
Fitting a support vector machine
Let’s see the result of an actual fit to this data: we will use Scikit-Learn’s support vector classifier to train an SVM model on this data. For the time being, we will use a linear kernel and set the C
parameter to a very large number (we’ll discuss the meaning of these in more depth momentarily).
from sklearn.svm import SVC # "Support vector classifier"
= SVC(kernel='linear', C=1E10)
model model.fit(X, y)
SVC(C=10000000000.0, kernel='linear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC(C=10000000000.0, kernel='linear')
To better visualize what’s happening here, let’s create a quick convenience function that will plot SVM decision boundaries for us:
def plot_svc_decision_function(model, ax=None, plot_support=True):
"""Plot the decision function for a 2D SVC"""
if ax is None:
= plt.gca()
ax = ax.get_xlim()
xlim = ax.get_ylim()
ylim
# create grid to evaluate model
= np.linspace(xlim[0], xlim[1], 30)
x = np.linspace(ylim[0], ylim[1], 30)
y = np.meshgrid(y, x)
Y, X = np.vstack([X.ravel(), Y.ravel()]).T
xy = model.decision_function(xy).reshape(X.shape)
P
# plot decision boundary and margins
='k',
ax.contour(X, Y, P, colors=[-1, 0, 1], alpha=0.5,
levels=['--', '-', '--'])
linestyles
# plot support vectors
if plot_support:
0],
ax.scatter(model.support_vectors_[:, 1],
model.support_vectors_[:, =300, linewidth=1, facecolors='none');
s
ax.set_xlim(xlim) ax.set_ylim(ylim)
0], X[:, 1], c=y, s=50, cmap='autumn')
plt.scatter(X[:, ; plot_svc_decision_function(model)
model.support_vectors_
array([[0.44359863, 3.11530945],
[2.33812285, 3.43116792],
[2.06156753, 1.96918596]])
def plot_svm(N=10, ax=None):
= make_blobs(n_samples=200, centers=2,
X, y =0, cluster_std=0.60)
random_state= X[:N]
X = y[:N]
y = SVC(kernel='linear', C=1E10)
model
model.fit(X, y)
= ax or plt.gca()
ax 0], X[:, 1], c=y, s=50, cmap='autumn')
ax.scatter(X[:, -1, 4)
ax.set_xlim(-1, 6)
ax.set_ylim(
plot_svc_decision_function(model, ax)
= plt.subplots(1, 2, figsize=(16, 6))
fig, ax =0.0625, right=0.95, wspace=0.1)
fig.subplots_adjust(leftfor axi, N in zip(ax, [60, 120]):
plot_svm(N, axi)'N = {0}'.format(N)) axi.set_title(
from ipywidgets import interact, fixed
=[10, 200], ax=fixed(None)); interact(plot_svm, N
Beyond linear boundaries: Kernel SVM
Where SVM becomes extremely powerful is when it is combined with kernels. We have seen a version of kernels before, in the basis function regressions of In Depth: Linear Regression. There we projected our data into higher-dimensional space defined by polynomials and Gaussian basis functions, and thereby were able to fit for nonlinear relationships with a linear classifier.
In SVM models, we can use a version of the same idea. To motivate the need for kernels, let’s look at some data that is not linearly separable:
from sklearn.datasets import make_circles
= make_circles(100, factor=.1, noise=.1)
X, y
= SVC(kernel='linear').fit(X, y)
clf
0], X[:, 1], c=y, s=50, cmap='autumn')
plt.scatter(X[:, =False); plot_svc_decision_function(clf, plot_support
= np.exp(-(X ** 2).sum(1)) r
We can visualize this extra data dimension using a three-dimensional plot—if you are running this notebook live, you will be able to use the sliders to rotate the plot:
def plot_3D_interactive(X=X, y=y):
= go.Figure(data=[go.Scatter3d(
fig =X[:, 0],
x=X[:, 1],
y=np.exp(-(X ** 2).sum(1)), # Use the 'r' value calculated earlier
z='markers',
mode=dict(
marker=5,
size=y, # Use 'y' for color mapping
color# colorscale='Autumn', # Choose a colorscale
=0.8,
opacity=dict(title='Class')
colorbar
)
)])
fig.update_layout(=dict(
scene='x',
xaxis_title='y',
yaxis_title='r',
zaxis_title=dict(x=1, y=1, z=1)
aspectratio
),='Interactive 3D Scatter Plot',
title=dict(l=0, r=0, b=0, t=50)
margin
)
return fig
=X, y=y) plot_3D_interactive(X
= SVC(kernel='rbf', C=1E6)
clf clf.fit(X, y)
SVC(C=1000000.0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC(C=1000000.0)
0], X[:, 1], c=y, s=50, cmap='autumn')
plt.scatter(X[:,
plot_svc_decision_function(clf)0], clf.support_vectors_[:, 1],
plt.scatter(clf.support_vectors_[:, =300, lw=1, facecolors='none'); s
Tuning the SVM: Softening Margins
Our discussion thus far has centered around very clean datasets, in which a perfect decision boundary exists. But what if your data has some amount of overlap? For example, you may have data like this:
= make_blobs(n_samples=100, centers=2,
X, y =0, cluster_std=1.2)
random_state0], X[:, 1], c=y, s=50, cmap='autumn'); plt.scatter(X[:,
= make_blobs(n_samples=100, centers=2,
X, y =0, cluster_std=0.8)
random_state
= plt.subplots(1, 2, figsize=(16, 6))
fig, ax =0.0625, right=0.95, wspace=0.1)
fig.subplots_adjust(left
for axi, C in zip(ax, [10.0, 0.1]):
= SVC(kernel='linear', C=C).fit(X, y)
model 0], X[:, 1], c=y, s=50, cmap='autumn')
axi.scatter(X[:,
plot_svc_decision_function(model, axi)0],
axi.scatter(model.support_vectors_[:, 1],
model.support_vectors_[:, =300, lw=1, facecolors='none');
s'C = {0:.1f}'.format(C), size=14) axi.set_title(