Simple Linear Regression
Implement simple linear regression using gradient descent in python. Read training dataset from
file, build linear regression model and predict the output for few unseen data samples.
Use minimum three datasets for experimentation. Salary_Data.csv is provided as an example.
In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [8]:
plt.rcParams['figure.figsize'] = (6.0, 4.0)
In [9]:
data = pd.read_csv('Salary_Data.csv')
In [10]:
X = data.iloc[:, 0]
Y = data.iloc[:, 1]
plt.scatter(X, Y)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Training Data')
plt.show()
In [25]:
theta1 = 9449.967223163576
theta0 = 25792.16716735611
alpha = 0.01 # The learning Rate
m = float(len(X)) # Number of elements in X
epsilon = 0.000001
CostOld=9999
diff = 1
iterations = 1
In [26]:
while abs(diff) > epsilon:
Y_pred = theta1*X + theta0 # The current predicted value of Y
Cost = (1/m) * sum((Y_pred-Y)**2)
# C.append(Cost)
D_theta1 = (1/m) * sum(X * (Y_pred - Y)) # Derivative wrt theta1
D_theta0 = (1/m) * sum(Y_pred - Y) # Derivative wrt theta0
theta1 = theta1 - alpha * D_theta1 # Update theta1
theta0 = theta0 - alpha * D_theta0 # Update theta0
#print (Cost)
diff = CostOld - Cost
CostOld = Cost
iterations = iterations + 1
print ("Iterations = ", str(iterations))
print ("theta0 = ", theta0, "theta1 = ", theta1)
#plt.plot(C)
plt.scatter(X, Y)
plt.plot(X, Y_pred, color = 'red')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Fitting the Linear Hypothesis')
plt.show()
Iterations = 4 theta0 = 25792.167376520094 theta1 = 9449.967192124517
In [28]:
test_X = 1.2
Y_pred = theta1*test_X + theta0
print("Salary Prediction for Experience of ", str(test_X), " years =", round(Y_pred,2))
Salary Prediction for Experience of 1.2 years = 37132.13
In [ ]:
Tags:
Machine Learning
