# Cross-Validation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, cross_val_predict, KFold
from sklearn.metrics import r2_score
from sklearn.utils import resample

In [None]:
rng = np.random.RandomState(1)
x = 10 * rng.rand(50)
X = x[:, np.newaxis]
y = 2 * x - 5 + rng.randn(50)
plt.scatter(x, y);

In [None]:
model = LinearRegression(fit_intercept=True)

model.fit(X, y)
print(model.score(X, y))

xfit = np.linspace(0, 10, 1000)
yfit = model.predict(xfit[:, np.newaxis])

plt.scatter(x, y)
plt.plot(xfit, yfit);

In [None]:
print(cross_val_score(model, X, y, cv=3))

In [None]:
y_pred = cross_val_predict(model, X, y, cv=3)
r2_score(y, y_pred)

# More manual cross-validation

In [None]:
X = np.logspace(-3, 2, num=15)
Y = 1.0 * X / (1.0 + 0.4 * X) + 0.5 * rng.randn(*X.shape)

plt.semilogx(X, Y,'.');
plt.xlabel('Concentration [nM]')
plt.ylabel('Binding');

In [None]:
def klotz1(xx, lig):
    """ A function defining our binding curve. """
    return (xx[1]*lig)/(1 + xx[0]*lig)

def ls_obj_k1(xx, ligs, data):
    """ A function that returns the residuals between our prediction and the data. """
    return(data - klotz1(xx,ligs))

In [None]:
x0 = np.array([1.1, 0.9])
res = sp.optimize.least_squares(ls_obj_k1, x0, args=(X,Y))
assert res.success

plt.plot(Y, klotz1(res.x, X), '.')
plt.plot(Y, Y, 'r-')
plt.xlabel("Actual")
plt.ylabel("Fit");

In [None]:
kf = KFold(n_splits=2, shuffle=True)
Y_pred = np.empty_like(Y)

for train_index, test_index in kf.split(X):
    res = sp.optimize.least_squares(ls_obj_k1, x0, args=(X[train_index], Y[train_index]))

    Y_pred[test_index] = klotz1(res.x, X[test_index])

plt.plot(Y, Y_pred, '.')
plt.plot(Y, Y, 'r-')
plt.xlabel("Actual")
plt.ylabel("Predicted");

# Bootstrap

In [None]:
p_boot = np.empty((2, 1000))

for bootstrapi in range(p_boot.shape[1]):
    resamp = resample(range(X.shape[0]))

    res = sp.optimize.least_squares(ls_obj_k1, x0, args=(X[resamp], Y[resamp]))
    p_boot[:, bootstrapi] = res.x

In [None]:
plt.scatter(p_boot[0, :], p_boot[1, :], s=0.2)
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$");