Linear Regression¶

In [1]:

Copied!

# Uncomment the next line and run this cell to install sorix
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
# Uncomment the next line and run this cell to install sorix
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'

In [2]:

Copied!





import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import sorix
from sorix.nn import Linear, MSELoss
from sorix.optim import RMSprop, Adam
from sorix.metrics import regression_report,r2_score
from sorix.model_selection import train_test_split
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import sorix
from sorix.nn import Linear, MSELoss
from sorix.optim import RMSprop, Adam
from sorix.metrics import regression_report,r2_score
from sorix.model_selection import train_test_split

In [3]:

Copied!

device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device
device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device

✅ GPU basic operation passed
✅ GPU available: NVIDIA GeForce RTX 4070 Laptop GPU
CUDA runtime version: 13000
CuPy version: 13.6.0

Out[3]:

'cuda'

In [4]:

Copied!





n = 10000
x=np.linspace(2,20,n)
y=2*x + 1 + 2*np.sin(x) + np.random.randn(n)

data = pd.DataFrame({'x':x, 'y':y})
data.head()
n = 10000
x=np.linspace(2,20,n)
y=2*x + 1 + 2*np.sin(x) + np.random.randn(n)

data = pd.DataFrame({'x':x, 'y':y})
data.head()

Out[4]:

	x	y
0	2.000000	6.031291
1	2.001800	7.039682
2	2.003600	7.738353
3	2.005401	7.783779
4	2.007201	8.820567

In [5]:

Copied!

plt.figure(figsize=(12,8))
plt.scatter(data['x'],data['y'],s=50)
plt.figure(figsize=(12,8))
plt.scatter(data['x'],data['y'],s=50)

Out[5]:

<matplotlib.collections.PathCollection at 0x7f9f9e150050>

No description has been provided for this image

In [6]:

Copied!

data_train, data_test = train_test_split(data, test_size=0.2)

X_train = data_train['x'].values.reshape(-1,1)
y_train = data_train['y'].values.reshape(-1,1)

X_test = data_test['x'].values.reshape(-1,1)
y_test = data_test['y'].values.reshape(-1,1)
data_train, data_test = train_test_split(data, test_size=0.2)

X_train = data_train['x'].values.reshape(-1,1)
y_train = data_train['y'].values.reshape(-1,1)

X_test = data_test['x'].values.reshape(-1,1)
y_test = data_test['y'].values.reshape(-1,1)

In [7]:

Copied!

X_train_tensor = sorix.tensor(X_train).to(device)
y_train_tensor = sorix.tensor(y_train).to(device)

X_test_tensor = sorix.tensor(X_test).to(device)
y_test_tensor = sorix.tensor(y_test).to(device)

print(X_train_tensor.shape, y_train_tensor.shape, X_test_tensor.shape, y_test_tensor.shape)
X_train_tensor = sorix.tensor(X_train).to(device)
y_train_tensor = sorix.tensor(y_train).to(device)

X_test_tensor = sorix.tensor(X_test).to(device)
y_test_tensor = sorix.tensor(y_test).to(device)

print(X_train_tensor.shape, y_train_tensor.shape, X_test_tensor.shape, y_test_tensor.shape)

sorix.Size([8000, 1]) sorix.Size([8000, 1]) sorix.Size([2000, 1]) sorix.Size([2000, 1])

In [8]:

Copied!

model = Linear(1,1).to(device)

loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)
model = Linear(1,1).to(device)

loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)

In [9]:

Copied!





for itr in range(1000+1):
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if itr % 100 == 0:
        print(f"Epoch: {itr:5d} | Loss: {loss.data:.6f}")
for itr in range(1000+1):
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if itr % 100 == 0:
        print(f"Epoch: {itr:5d} | Loss: {loss.data:.6f}")

Epoch:     0 | Loss: 10.239796

Epoch:   100 | Loss: 3.091258
Epoch:   200 | Loss: 3.012487

Epoch:   300 | Loss: 2.964709

Epoch:   400 | Loss: 2.942883
Epoch:   500 | Loss: 2.934947

Epoch:   600 | Loss: 2.932604

Epoch:   700 | Loss: 2.932038
Epoch:   800 | Loss: 2.931925

Epoch:   900 | Loss: 2.931907

Epoch:  1000 | Loss: 2.931905

In [10]:

Copied!





with sorix.no_grad():
    y_pred_train = model(X_train_tensor)
    y_pred_test = model(X_test_tensor)

print("Train")
print(regression_report(y_train_tensor, y_pred_train))
print("\nTest")
print(regression_report(y_test_tensor, y_pred_test))
with sorix.no_grad():
    y_pred_train = model(X_train_tensor)
    y_pred_test = model(X_test_tensor)

print("Train")
print(regression_report(y_train_tensor, y_pred_train))
print("\nTest")
print(regression_report(y_test_tensor, y_pred_test))

Train
Metric |     Score |    Range
-----------------------------
R2     |    0.9735 | [0,   1]
MAE    |    1.4306 | [0,  ∞) 
MSE    |    2.9319 | [0,  ∞) 
RMSE   |    1.7123 | [0,  ∞) 
MAPE   |    8.6057 | [0, 100]

Test
Metric |     Score |    Range
-----------------------------
R2     |    0.9741 | [0,   1]
MAE    |    1.4141 | [0,  ∞) 
MSE    |    2.8980 | [0,  ∞) 
RMSE   |    1.7023 | [0,  ∞) 
MAPE   |    8.2353 | [0, 100]

In [11]:

Copied!

model.coef_, model.intercept_
model.coef_, model.intercept_

Out[11]:

(array([1.9982972], dtype=float32), 0.8980032205581665)

In [12]:

Copied!





r2_test = r2_score(y_test_tensor, y_pred_test)

plt.scatter(X_test_tensor,y_test_tensor,s=50)
plt.scatter(X_test_tensor,y_pred_test,s=50)
plt.title(f'Linear Regression on Test Data(Accuracy:{r2_test*100:.3f}%)')
plt.text(5, 5, f'y = {model.coef_[0]:.2f}x + {model.intercept_:.3f}')
r2_test = r2_score(y_test_tensor, y_pred_test)

plt.scatter(X_test_tensor,y_test_tensor,s=50)
plt.scatter(X_test_tensor,y_pred_test,s=50)
plt.title(f'Linear Regression on Test Data(Accuracy:{r2_test*100:.3f}%)')
plt.text(5, 5, f'y = {model.coef_[0]:.2f}x + {model.intercept_:.3f}')

Out[12]:

Text(5, 5, 'y = 2.00x + 0.898')

Save Model¶

In [13]:

Copied!

sorix.save(model.state_dict(),"regression_model.sor")
sorix.save(model.state_dict(),"regression_model.sor")

In [14]:

Copied!

model2 = Linear(1,1)
model2.load_state_dict(sorix.load("regression_model.sor"))
model2.to(device)
model2 = Linear(1,1)
model2.load_state_dict(sorix.load("regression_model.sor"))
model2.to(device)

Out[14]:

Linear(in_features=1, out_features=1, bias=True)

In [15]:

Copied!





with sorix.no_grad():
    r2_train = r2_score(y_train_tensor, model2(X_train_tensor))
    r2_test =  r2_score(y_test_tensor, model2(X_test_tensor))
    print(f"R2 Train: {100*r2_train:5.2f} % | R2 Test: {100*r2_test:5.2f} %")
with sorix.no_grad():
    r2_train = r2_score(y_train_tensor, model2(X_train_tensor))
    r2_test =  r2_score(y_test_tensor, model2(X_test_tensor))
    print(f"R2 Train: {100*r2_train:5.2f} % | R2 Test: {100*r2_test:5.2f} %")

R2 Train: 97.35 % | R2 Test: 97.41 %