import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, confusion_matrix

from sklearn.model_selection import train_test_split, cross_val_score, LeaveOneOut, validation_curve, GridSearchCV

from sklearn.preprocessing import SplineTransformer

from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression, Ridge, Lasso

from sklearn.datasets import load_digits
digits = load_digits()
digits.images.shape

(1797, 8, 8)

fig, axes = plt.subplots(10, 10, figsize=(8, 8),
                         subplot_kw={'xticks':[], 'yticks':[]},
                         gridspec_kw=dict(hspace=0.1, wspace=0.1))

for i, ax in enumerate(axes.flat):
    ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
    ax.text(0.05, 0.05, str(digits.target[i]),
            transform=ax.transAxes, color='green')

X = digits.data
X.shape

(1797, 64)

y = digits.target
y.shape

(1797,)

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)

model = GaussianNB()

model.fit(Xtrain, ytrain)

GaussianNB()

GaussianNB()

y_predicted_test = model.predict(Xtest)

accuracy_score(ytest, y_predicted_test)

0.8333333333333334

mat = confusion_matrix(ytest, y_predicted_test)

from seaborn import heatmap
heatmap(mat, square=True, annot=True, cbar=False, cmap='Blues')
plt.xlabel('Predicted value')
plt.ylabel('True value');

y_predicted_train = model.predict(Xtrain)

accuracy_score(ytrain, y_predicted_train)

0.8574610244988864

X1, X2, y1, y2 = train_test_split(X, y, random_state=0,
                                  train_size=0.5)

y2_model = model.fit(X1, y1).predict(X2)
y1_model = model.fit(X2, y2).predict(X1)
accuracy_score(y1, y1_model), accuracy_score(y2, y2_model)

(0.844097995545657, 0.8342602892102335)

cross_val_score(model, X, y, cv=5)

array([0.78055556, 0.78333333, 0.79387187, 0.8718663 , 0.80501393])

scores = cross_val_score(model, X, y, cv=LeaveOneOut())
scores

array([1., 1., 0., ..., 1., 1., 1.])

scores.mean()

0.8408458542014469

rng = np.random.RandomState(2)
x = 10 * rng.rand(200)
y = np.sin(x) + rng.randn(200)
X = x[:, np.newaxis]

features = SplineTransformer(degree=2, n_knots=20) 
X = features.fit_transform(x[:, None])

X[:3]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.2859855 , 0.68431665,
        0.02969786, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.21978553, 0.72343049, 0.05678398, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.15939406, 0.74582513, 0.09478081, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ]])

OLS_model = LinearRegression()
OLS_model.fit(X, y)

LinearRegression()

LinearRegression()

xfit = np.linspace(0, 10, 1000)
Xfit = features.fit_transform(xfit[:, None])
yfit = OLS_model.predict(Xfit)

plt.scatter(x, y)
plt.plot(xfit, yfit);

Ridge_model = Ridge(alpha=10)
Ridge_model.fit(X, y)
yfit = Ridge_model.predict(Xfit)

plt.scatter(x, y)
plt.plot(xfit, yfit);

from sklearn.linear_model import Lasso
Lasso_model = Lasso(alpha=.02)
Lasso_model.fit(X, y)
yfit = Lasso_model.predict(Xfit)

plt.scatter(x, y)
plt.plot(xfit, yfit);

ridge_model = Ridge()
param_range = np.linspace(.2, 10, 50)

train_scores, test_scores = validation_curve(
    ridge_model, X, y, 
    param_name="alpha", param_range=param_range, 
    cv=10, scoring="neg_mean_squared_error"
)

# Average across folds
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)

# Plot the validation curve
plt.figure()
plt.title("Validation Curve for Ridge")
plt.xlabel("Alpha")
plt.ylabel("Score")
plt.plot(param_range, train_scores_mean, label="Training score", color="darkorange")
plt.plot(param_range, test_scores_mean, label="Cross-validation score", color="navy")
plt.legend(loc="best")

<matplotlib.legend.Legend at 0x12634b990>

grid = GridSearchCV(ridge_model, {'alpha': param_range}, cv=10)
grid.fit(X, y)
grid.best_params_

{'alpha': 3.2}

bestmodel = grid.best_estimator_
yfit = bestmodel.predict(Xfit)
plt.scatter(x, y)
plt.plot(xfit, yfit);

lasso_model = Lasso()
param_range = np.linspace(.00125, .05, 40)

train_scores, test_scores = validation_curve(
    lasso_model, X, y, 
    param_name="alpha", param_range=param_range, 
    cv=10, scoring="neg_mean_squared_error"
)

# Average across folds
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)

# Plot the validation curve
plt.figure()
plt.title("Validation Curve for Lasso")
plt.xlabel("Alpha")
plt.ylabel("Score")
plt.plot(param_range, train_scores_mean, label="Training score", color="darkorange")
plt.plot(param_range, test_scores_mean, label="Cross-validation score", color="navy")
plt.legend(loc="best")

<matplotlib.legend.Legend at 0x1261a77d0>

grid = GridSearchCV(lasso_model, {'alpha': param_range}, cv=10)
grid.fit(X, y)
grid.best_params_

{'alpha': 0.01}

bestmodel = grid.best_estimator_
yfit = bestmodel.predict(Xfit)
plt.scatter(x, y)
plt.plot(xfit, yfit);

An introduction to supervised learning using scikit-learn¶

A motivating example: Digit recognition¶

Basic recipe for fitting a supervised learning model¶

Model Validation¶

Linear regression with regularization¶

Model selection¶

Summary¶