import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


TrainExer13 = pd.read_csv('TrainExer13.txt', sep='\t', header=0, names=['G', 'T'], index_col=1)


TrainExer13


X = pd.Series.to_numpy(TrainExer13['G']).reshape((-1, 1))


y = pd.Series.to_numpy(TrainExer13['T'])


model = LinearRegression().fit(X, y)


model.intercept_

10.386000000000001


model.coef_

array([-0.038])


plt.scatter(X,y)
plt.plot(X, model.predict(X))
plt.xlabel('Game G')
plt.ylabel('Winng time W (s)')
plt.title('Winning times (W) of the Olympic 100-meter finals (for men) from 1948 to 2004')

Text(0.5, 1.0, 'Winning times (W) of the Olympic 100-meter finals (for men) from 1948 to 2004')


residuals = y - model.predict(X)


s = np.sum(np.square(residuals)) / (y.size - 2)

s

0.015086153846153952


r_sq = model.score(X, y)


r_sq

0.6733728599027362


model.predict(np.array([16,]).reshape((-1,1)))

array([9.778])


model.predict(np.array([17,]).reshape((-1,1)))

array([9.74])


model.predict(np.array([18,]).reshape((-1,1)))

array([9.702])


print("Residual for 2008 prediction is " + str(9.890 - 9.778))

Residual for 2008 prediction is 0.1120000000000001


print("Residual for 2008 prediction is " + str(9.630 - 9.74))

Residual for 2008 prediction is -0.10999999999999943


print("Residual for 2008 prediction is " + str(9.810 - 9.702))

Residual for 2008 prediction is 0.10800000000000054

	G	T
1948	1	10.30
1952	2	10.40
1956	3	10.50
1960	4	10.20
1964	5	10.00
1968	6	9.95
1972	7	10.14
1976	8	10.06
1980	9	10.25
1984	10	9.99
1988	11	9.92
1992	12	9.96
1996	13	9.84
2000	14	9.87
2004	15	9.85

Training Exercise 1.3¶