イントロ
Numpyで線形回帰 with 勾配降下法を実装しました。
一応、オープンソースに対抗できるだけの性能は出たので、共有します。
丁寧に書いたので、参考になると思います。
Numpyの線形回帰 with 勾配降下法
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
def updater(b_current, m_current, X_vector, y_vector, learning_rate):
y_pred = np.dot(X_vector, m_current) + np.array(b_current)
residual = (y_pred-y_vector)
mse = np.sum(residual**2) / (2*len(X_vector))
gradient_b = np.sum(residual)/ len(X_vector)
gradient_m = np.dot(X_vector.T, residual)/len(X_vector)
b_current -= learning_rate*gradient_b
m_current -= learning_rate*gradient_m
return b_current, m_current, mse
def gradient_decent_runner(X_vector, y_vector, learning_rate, num_iterations, starting_m, starting_b):
b = starting_b
m = starting_m
for i in range(num_iterations):
b, m, mse = updater(b, m, X_vector, y_vector, learning_rate)
print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, mse))
return b, m
def main():
df = pd.DataFrame(load_boston().data, columns=load_boston().feature_names)[["RM", "LSTAT"]]
df["target"] = load_boston().target
m_dimention = df.drop("target", axis=1).shape[1]
X = df.drop("target", axis=1).values
y = df["target"].ravel()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
learning_rate = 0.0001
num_iterations = 100000
initial_m = np.zeros(m_dimention)
initial_b = 0
b, m = gradient_decent_runner(X_train, y_train, learning_rate, num_iterations, initial_m, initial_b)
return b, m, X_train, X_test, y_train, y_test
if __name__ == '__main__':
b, m, X_train, X_test, y_train, y_test = main()
y_prediction_from_scratch = np.dot(X_test, m) + np.array(b)
print("Numpyで実装した線形回帰 with 勾配降下法")
print("-----------------------------------------------------")
print("R2 Score: {}".format(round(r2_score(y_test, y_prediction_from_scratch), 4)))
print("RMSE: {}".format(round(np.sqrt(mean_squared_error(y_test, y_prediction_from_scratch)), 4)))
print("-----------------------------------------------------")
オープンソースの線形回帰 with 最小二乗法
from sklearn.linear_model import LinearRegression
lr = LinearRegression().fit(X_train, y_train)
print("オープンソースの線形回帰")
print("-----------------------------------------------------")
print("R2 Score: {}".format(round(r2_score(y_test, lr.predict(X_test)), 4)))
print("RMSE: {}".format(round(np.sqrt(mean_squared_error(y_test, lr.predict(X_test))), 4)))
print("-----------------------------------------------------")