|
| 1 | +from __future__ import print_function |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | + |
| 6 | +from sklearn.linear_model import LinearRegression |
| 7 | +from sklearn.preprocessing import PolynomialFeatures |
| 8 | +from sklearn.model_selection import cross_validate |
| 9 | +from sklearn.model_selection import KFold |
| 10 | + |
| 11 | +# ========== HW12 SOLUTION [Python2/3] ========== # |
| 12 | + |
| 13 | +np.random.seed(1) |
| 14 | +x = np.random.random(20) * 2.0 |
| 15 | +noise = np.random.normal(size=20) |
| 16 | +y = 2.0 * x - 3.2 + noise |
| 17 | +# plt.figure() |
| 18 | +# plt.plot(x, y, 'o') |
| 19 | +# plt.show() |
| 20 | +X = x.reshape(-1, 1) |
| 21 | + |
| 22 | +# linear model |
| 23 | +linreg_fit = LinearRegression(fit_intercept=True) |
| 24 | + |
| 25 | +# polynomial model (degree=2) |
| 26 | +poly2 = PolynomialFeatures(degree=2) |
| 27 | +X_poly2 = poly2.fit_transform(X) |
| 28 | +poly2_fit = LinearRegression(fit_intercept=False) |
| 29 | + |
| 30 | +# polynomial model (degree=10) |
| 31 | +poly10 = PolynomialFeatures(degree=10) |
| 32 | +X_poly10 = poly10.fit_transform(X) |
| 33 | +poly10_fit = LinearRegression(fit_intercept=False) |
| 34 | + |
| 35 | + |
| 36 | +# option 1: one loop for everything (faster) |
| 37 | + |
| 38 | +# for plotting purposes |
| 39 | +x_linspace = np.linspace(np.min(X), np.max(X), 100) |
| 40 | +X_linspace = x_linspace.reshape(-1, 1) |
| 41 | +X_linspace_poly2 = poly2.transform(X_linspace) |
| 42 | +X_linspace_poly10 = poly10.transform(X_linspace) |
| 43 | + |
| 44 | +scores_linreg_fit = [] |
| 45 | +scores_poly2_fit = [] |
| 46 | +scores_poly10_fit = [] |
| 47 | +kf = KFold(n_splits=5) |
| 48 | +for train, test in kf.split(X): |
| 49 | + X_test, X_train = X[test], X[train] |
| 50 | + y_test, y_train = y[test], y[train] |
| 51 | + |
| 52 | + # subset training data |
| 53 | + X_lin_train = X[train] |
| 54 | + X_poly2_train = X_poly2[train] |
| 55 | + X_poly10_train = X_poly10[train] |
| 56 | + |
| 57 | + # subset testing data |
| 58 | + X_lin_test = X[test] |
| 59 | + X_poly2_test = X_poly2[test] |
| 60 | + X_poly10_test = X_poly10[test] |
| 61 | + |
| 62 | + # fit models |
| 63 | + linreg_fit.fit(X_lin_train, y_train) |
| 64 | + poly2_fit.fit(X_poly2_train, y_train) |
| 65 | + poly10_fit.fit(X_poly10_train, y_train) |
| 66 | + |
| 67 | + # predict models for plots |
| 68 | + y_hat_lin = linreg_fit.predict(X_linspace) |
| 69 | + y_hat_poly2 = poly2_fit.predict(X_linspace_poly2) |
| 70 | + y_hat_poly10 = poly10_fit.predict(X_linspace_poly10) |
| 71 | + |
| 72 | + # compute R^2 scores and append to lists |
| 73 | + lin_score = linreg_fit.score(X_lin_test, y_test) |
| 74 | + scores_linreg_fit.append(lin_score) |
| 75 | + |
| 76 | + poly2_score = poly2_fit.score(X_poly2_test, y_test) |
| 77 | + scores_poly2_fit.append(poly2_score) |
| 78 | + |
| 79 | + poly10_score = poly10_fit.score(X_poly10_test, y_test) |
| 80 | + scores_poly10_fit.append(poly10_score) |
| 81 | + |
| 82 | + # uncomment to show visualization for each cross-validation step |
| 83 | + # plt.figure() |
| 84 | + # plt.plot(X_train, y_train, 'ok', label='train') |
| 85 | + # plt.plot(X_test, y_test, 'xb', label='test') |
| 86 | + # plt.plot(X_linspace, y_hat_lin, '.-', label='Linear model') |
| 87 | + # plt.plot(X_linspace, y_hat_poly2, '.-', label='Quadratic model') |
| 88 | + # plt.plot(X_linspace, y_hat_poly10, '.-', label='10 degree model') |
| 89 | + # plt.ylim((-10, 10)) |
| 90 | + # plt.legend() |
| 91 | + # plt.show() |
| 92 | + |
| 93 | +print(np.mean(scores_linreg_fit)) |
| 94 | +print(np.mean(scores_poly2_fit)) |
| 95 | +print(np.mean(scores_poly10_fit)) |
| 96 | + |
| 97 | +# option 2: one-liner for each model (more readable) |
| 98 | + |
| 99 | +scores_linreg_fit = cross_validate(linreg_fit, X, y, cv=5, |
| 100 | + return_train_score=False) |
| 101 | +scores_poly2_fit = cross_validate(poly2_fit, X_poly2, y, cv=5, |
| 102 | + return_train_score=False) |
| 103 | +scores_poly10_fit = cross_validate(poly10_fit, X_poly10, y, cv=5, |
| 104 | + return_train_score=False) |
| 105 | + |
| 106 | +print(np.mean(scores_linreg_fit['test_score'])) |
| 107 | +print(np.mean(scores_poly2_fit['test_score'])) |
| 108 | +print(np.mean(scores_poly10_fit['test_score'])) |
0 commit comments