# !wget https://raw.githubusercontent.com/ravikumarmn/Introduction-to-Machine-Learning/main/house_price_data.csv

# %pip install numpy pandas matplotlib

import numpy as np
import pandas as pd
import copy
import math
import matplotlib.pyplot as plt

house_price_data = pd.read_csv("house_price_data.csv")
house_price_data.head()

plt.figure(figsize=(10, 5))
plt.scatter(house_price_data['size'],house_price_data['price'], color='red', marker='x')
plt.title('House sizes and prices')
plt.xlabel('Size in feet²')
plt.ylabel('Price in $1000\'s')
plt.xlim(left=0)
plt.ylim(bottom=0)
plt.legend(["Data Points"], fontsize="x-large")
plt.show()

# Normalize data
size_mean = house_price_data['size'].mean()
size_std = house_price_data['size'].std()
price_mean = house_price_data['price'].mean()
price_std = house_price_data['price'].std()

house_price_data['size_normalized'] = (house_price_data['size'] - size_mean) / size_std
house_price_data['price_normalized'] = (house_price_data['price'] - price_mean) / price_std

x_train = np.array(house_price_data['size_normalized'])
y_train = np.array(house_price_data['price_normalized'])

def linear_fn(x_train, weights, bias): 
    # f(x) = wx + b
    return weights * x_train + bias

def compute_cost(x_train, y_train, weights, bias):
    m = x_train.shape[0]
    f_wb = linear_fn(x_train,weights,bias)
    cost = np.sum((f_wb - y_train)**2)
    total_cost = (1 / (2 * m)) * cost
    return total_cost

# compute_cost(x_train, y_train, 1, 1)

def compute_gradient(x_train, y_train, weights, bias):
    m = x_train.shape[0]
    f_wb = linear_fn(x_train, weights, bias)
    error = f_wb - y_train
    dj_dw = np.dot(error, x_train)/m # We use the dot product, which inherently includes the sum of the element-wise products.
    dj_db = np.sum(error) / m # sum the error terms.
    return dj_dw, dj_db

# compute_gradient(x_train, y_train, 1, 1) # (-646.0, -397.5)

def gradient_descent(x_train, y_train, w_init=0, b_init=0, alpha = 0.001, num_iters=10000):
    w = copy.deepcopy(w_init)
    b = b_init
    w = w_init
    J_history = list()
    p_history = list()
    
    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(x_train, y_train, w, b)
        b = b - alpha * dj_db
        w = w - alpha * dj_dw
        if i<100000:      # prevent resource exhaustion 
            J_history.append(compute_cost(x_train, y_train, w , b))
            p_history.append([w,b])
            
        if i% math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    return w, b, J_history, p_history

w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 4.94e-01  dj_dw: -8.558e-01, dj_db:  1.554e-17   w:  8.558e-04, b:-1.55431e-20
Iteration 1000: Cost 1.76e-01  dj_dw: -3.178e-01, dj_db: -4.996e-17   w:  5.437e-01, b: 3.49232e-17
Iteration 2000: Cost 1.32e-01  dj_dw: -1.180e-01, dj_db: -3.220e-17   w:  7.453e-01, b: 7.10865e-17
Iteration 3000: Cost 1.26e-01  dj_dw: -4.384e-02, dj_db: -5.773e-17   w:  8.202e-01, b: 1.03145e-16
Iteration 4000: Cost 1.25e-01  dj_dw: -1.628e-02, dj_db: -4.108e-17   w:  8.480e-01, b: 1.11047e-16
Iteration 5000: Cost 1.25e-01  dj_dw: -6.047e-03, dj_db:  2.554e-17   w:  8.583e-01, b: 1.11023e-16
Iteration 6000: Cost 1.25e-01  dj_dw: -2.246e-03, dj_db: -5.218e-17   w:  8.622e-01, b: 1.11070e-16
Iteration 7000: Cost 1.25e-01  dj_dw: -8.341e-04, dj_db:  2.109e-17   w:  8.636e-01, b: 1.11020e-16
Iteration 8000: Cost 1.25e-01  dj_dw: -3.098e-04, dj_db:  1.776e-17   w:  8.641e-01, b: 1.11036e-16
Iteration 9000: Cost 1.25e-01  dj_dw: -1.151e-04, dj_db: -4.885e-17   w:  8.643e-01, b: 1.11069e-16
(w,b) found by gradient descent: (  0.8644,  0.0000)

fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12,4))
ax1.plot(J_hist[:100])
ax2.plot(1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
ax1.set_title("Cost vs. iteration(start)");  ax2.set_title("Cost vs. iteration (end)")
ax1.set_ylabel('Cost')            ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')  ;  ax2.set_xlabel('iteration step') 
plt.show()

# Optimal values
print(f"Optimal value of weight is: {w_final}]\nOptimal value of bias is: {b_final}")

Optimal value of weight is: 0.8644087726498778]
Optimal value of bias is: 1.1105671937627857e-16

# Plotting the results
plt.figure(figsize=(10, 5))
plt.scatter(house_price_data['size'], house_price_data['price'], color='red', marker='x')
sizes = np.linspace(min(house_price_data['size']), max(house_price_data['size']), 100)
normalized_sizes = (sizes - size_mean) / size_std
predicted_prices = linear_fn(normalized_sizes, w_final, b_final) * price_std + price_mean

plt.plot(sizes, predicted_prices, color='blue')
plt.title('House Size vs. Price with Fit Line')
plt.xlabel('Size in feet²')
plt.ylabel('Price in $1000\'s')
plt.show()

def predict_house_price(house_size, w_final, b_final):
    """
    Predict the house price for a given house size.
    
    Parameters:
    - house_size (float): The size of the house in square feet.
    - w_final (float): Final weight from the trained model.
    - b_final (float): Final bias from the trained model.
    
    Returns:
    - predicted_price (float): The predicted price of the house in the original price scale.
    """
    size_mean = house_price_data['size'].mean()
    size_std = house_price_data['size'].std()
    price_mean =  house_price_data['price'].mean()
    price_std = house_price_data['price'].std()
    # Normalize the input house size
    normalized_size = (house_size - size_mean) / size_std
    # Predict the normalized price using the model
    normalized_price = w_final * normalized_size + b_final
    # Unnormalize the predicted price
    predicted_price = normalized_price * price_std + price_mean
    print(f"Predicted price for a {house_size} sqft house is ${predicted_price:.2f}")

predict_house_price(1000, w_final, b_final)
predict_house_price(1200, w_final, b_final)
predict_house_price(2000, w_final, b_final)

Predicted price for a 1000 sqft house is $160.07
Predicted price for a 1200 sqft house is $179.87
Predicted price for a 2000 sqft house is $259.10

	size	price
0	1646.440512	156.0
1	2145.568099	310.0
2	1808.290128	254.0
3	1634.649549	137.0
4	1270.964398	252.0

Understanding Linear Regression in Supervised Learning

Introduction

Types of Learning In Machine Learning

Understanding Linear Regression

Process of How Supervised Learning Works

Interactive Linear Regression

Cost Function

Gradient Descent

Gradient Descent Algorithm

Implementation

Predictions¶

References

Trending Tags