Skip to content

Commit 7403a63

Browse files
committed
add hw12, hw13 solutions
add solution files, update readme
1 parent bae7b0d commit 7403a63

File tree

3 files changed

+154
-2
lines changed

3 files changed

+154
-2
lines changed

homework_solutions/hw12_solution.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from __future__ import print_function
2+
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
6+
from sklearn.linear_model import LinearRegression
7+
from sklearn.preprocessing import PolynomialFeatures
8+
from sklearn.model_selection import cross_validate
9+
from sklearn.model_selection import KFold
10+
11+
# ========== HW12 SOLUTION [Python2/3] ========== #
12+
13+
np.random.seed(1)
14+
x = np.random.random(20) * 2.0
15+
noise = np.random.normal(size=20)
16+
y = 2.0 * x - 3.2 + noise
17+
# plt.figure()
18+
# plt.plot(x, y, 'o')
19+
# plt.show()
20+
X = x.reshape(-1, 1)
21+
22+
# linear model
23+
linreg_fit = LinearRegression(fit_intercept=True)
24+
25+
# polynomial model (degree=2)
26+
poly2 = PolynomialFeatures(degree=2)
27+
X_poly2 = poly2.fit_transform(X)
28+
poly2_fit = LinearRegression(fit_intercept=False)
29+
30+
# polynomial model (degree=10)
31+
poly10 = PolynomialFeatures(degree=10)
32+
X_poly10 = poly10.fit_transform(X)
33+
poly10_fit = LinearRegression(fit_intercept=False)
34+
35+
36+
# option 1: one loop for everything (faster)
37+
38+
# for plotting purposes
39+
x_linspace = np.linspace(np.min(X), np.max(X), 100)
40+
X_linspace = x_linspace.reshape(-1, 1)
41+
X_linspace_poly2 = poly2.transform(X_linspace)
42+
X_linspace_poly10 = poly10.transform(X_linspace)
43+
44+
scores_linreg_fit = []
45+
scores_poly2_fit = []
46+
scores_poly10_fit = []
47+
kf = KFold(n_splits=5)
48+
for train, test in kf.split(X):
49+
X_test, X_train = X[test], X[train]
50+
y_test, y_train = y[test], y[train]
51+
52+
# subset training data
53+
X_lin_train = X[train]
54+
X_poly2_train = X_poly2[train]
55+
X_poly10_train = X_poly10[train]
56+
57+
# subset testing data
58+
X_lin_test = X[test]
59+
X_poly2_test = X_poly2[test]
60+
X_poly10_test = X_poly10[test]
61+
62+
# fit models
63+
linreg_fit.fit(X_lin_train, y_train)
64+
poly2_fit.fit(X_poly2_train, y_train)
65+
poly10_fit.fit(X_poly10_train, y_train)
66+
67+
# predict models for plots
68+
y_hat_lin = linreg_fit.predict(X_linspace)
69+
y_hat_poly2 = poly2_fit.predict(X_linspace_poly2)
70+
y_hat_poly10 = poly10_fit.predict(X_linspace_poly10)
71+
72+
# compute R^2 scores and append to lists
73+
lin_score = linreg_fit.score(X_lin_test, y_test)
74+
scores_linreg_fit.append(lin_score)
75+
76+
poly2_score = poly2_fit.score(X_poly2_test, y_test)
77+
scores_poly2_fit.append(poly2_score)
78+
79+
poly10_score = poly10_fit.score(X_poly10_test, y_test)
80+
scores_poly10_fit.append(poly10_score)
81+
82+
# uncomment to show visualization for each cross-validation step
83+
# plt.figure()
84+
# plt.plot(X_train, y_train, 'ok', label='train')
85+
# plt.plot(X_test, y_test, 'xb', label='test')
86+
# plt.plot(X_linspace, y_hat_lin, '.-', label='Linear model')
87+
# plt.plot(X_linspace, y_hat_poly2, '.-', label='Quadratic model')
88+
# plt.plot(X_linspace, y_hat_poly10, '.-', label='10 degree model')
89+
# plt.ylim((-10, 10))
90+
# plt.legend()
91+
# plt.show()
92+
93+
print(np.mean(scores_linreg_fit))
94+
print(np.mean(scores_poly2_fit))
95+
print(np.mean(scores_poly10_fit))
96+
97+
# option 2: one-liner for each model (more readable)
98+
99+
scores_linreg_fit = cross_validate(linreg_fit, X, y, cv=5,
100+
return_train_score=False)
101+
scores_poly2_fit = cross_validate(poly2_fit, X_poly2, y, cv=5,
102+
return_train_score=False)
103+
scores_poly10_fit = cross_validate(poly10_fit, X_poly10, y, cv=5,
104+
return_train_score=False)
105+
106+
print(np.mean(scores_linreg_fit['test_score']))
107+
print(np.mean(scores_poly2_fit['test_score']))
108+
print(np.mean(scores_poly10_fit['test_score']))

homework_solutions/hw13_solution.py

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from __future__ import print_function
2+
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
6+
# ========== HW13 SOLUTION [Python2/3] ========== #
7+
8+
# read in data
9+
df_aapl = pd.read_csv('AAPL.csv', na_values='null', index_col='Date')
10+
df_msft = pd.read_csv('MSFT.csv', na_values='null', index_col='Date')
11+
df_pg = pd.read_csv('PG.csv', na_values='null', index_col='Date')
12+
# convert index to datetime type (for plotting)
13+
df_aapl.index = df_aapl.index.astype('datetime64')
14+
df_msft.index = df_msft.index.astype('datetime64')
15+
df_pg.index = df_pg.index.astype('datetime64')
16+
17+
# calculate Range (abs not necessary since High >= Low can be assumed)
18+
df_aapl['Range'] = abs(df_aapl['High'] - df_aapl['Low'])
19+
df_msft['Range'] = abs(df_msft['High'] - df_msft['Low'])
20+
df_pg['Range'] = abs(df_pg['High'] - df_pg['Low'])
21+
22+
# write output files
23+
df_aapl.to_csv('AAPL_range.csv')
24+
df_msft.to_csv('MSFT_range.csv')
25+
df_pg.to_csv('PG_range.csv')
26+
27+
# print summary statistics
28+
print(df_aapl.Range.describe())
29+
print(df_msft.Range.describe())
30+
print(df_pg.Range.describe())
31+
32+
# subset Close prices between 2008-2009 (for year 2008)
33+
close_aapl = df_aapl['Close'].loc['2008']
34+
close_msft = df_msft['Close'].loc['2008']
35+
close_pg = df_pg['Close'].loc['2008']
36+
37+
# plot
38+
plt.figure()
39+
plt.plot(close_aapl.index, close_aapl, label='AAPL')
40+
plt.plot(close_msft.index, close_msft, label='MSFT')
41+
plt.plot(close_pg.index, close_pg, label='PG')
42+
plt.legend()
43+
plt.xticks(rotation=30)
44+
plt.show()

readme.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ Lectures were performed using the presentation slides along with a Jupyter Noteb
3939
| Python read and write: opening and modifying text/csv files | [lecture09.pdf](lectures/lecture09/lecture09.pdf) | [lecture09.ipynb](lectures/lecture09/lecture09.ipynb) | [hw09.pdf](lectures/lecture09/hw09.pdf) | [HW09 Solution](homework_solutions/hw09_solution.py) |
4040
| Symbolic math with SymPy , DOE with pyDOE (Second quiz 15 mins before end of class) | [lecture10.pdf](lectures/lecture10/lecture10.pdf) | [lecture10.ipynb](lectures/lecture10/lecture10.ipynb) | [hw10.pdf](lectures/lecture10/hw10.pdf) | [HW10 Solution](homework_solutions/hw10_solution.py) |
4141
| Scikit-learn: surrogate modeling | [lecture11.pdf](lectures/lecture11/lecture11.pdf) | [lecture11.ipynb](lectures/lecture11/lecture11.ipynb) | [hw11.pdf](lectures/lecture11/hw11.pdf) | [HW11 Solution](homework_solutions/hw11_solution.py) |
42-
| Scikit-learn: surrogate modeling and machine learning | [lecture12.pdf](lectures/lecture12/lecture12.pdf) | [lecture12.ipynb](lectures/lecture12/lecture12.ipynb) | [hw12.pdf](lectures/lecture12/hw12.pdf) | soon |
43-
| Pandas and DataFrames / Review for final | [lecture13.pdf](lectures/lecture13/lecture13.pdf) | [lecture13.ipynb](lectures/lecture13/lecture13.ipynb) | [hw13.pdf](lectures/lecture13/hw13.pdf) | soon |
42+
| Scikit-learn: surrogate modeling and machine learning | [lecture12.pdf](lectures/lecture12/lecture12.pdf) | [lecture12.ipynb](lectures/lecture12/lecture12.ipynb) | [hw12.pdf](lectures/lecture12/hw12.pdf) | [HW12 Solution](homework_solutions/hw12_solution.py) |
43+
| Pandas and DataFrames / Review for final | [lecture13.pdf](lectures/lecture13/lecture13.pdf) | [lecture13.ipynb](lectures/lecture13/lecture13.ipynb) | [hw13.pdf](lectures/lecture13/hw13.pdf) | [HW13 Solution](homework_solutions/hw13_solution.py) |
4444

4545
[Quiz](/quiz)
4646

0 commit comments

Comments
 (0)