Classification Multiclass¶
In [1]:
Copied!
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
In [2]:
Copied!
import joblib
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sorix
from sorix.nn import ReLU,Linear
from sorix.optim import SGDMomentum,RMSprop,Adam,SGD
from sorix import tensor,Tensor
from sorix.nn import Module
from sorix.nn import CrossEntropyLoss
from sorix.metrics import confusion_matrix,classification_report
from sorix.model_selection import train_test_split
import joblib
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sorix
from sorix.nn import ReLU,Linear
from sorix.optim import SGDMomentum,RMSprop,Adam,SGD
from sorix import tensor,Tensor
from sorix.nn import Module
from sorix.nn import CrossEntropyLoss
from sorix.metrics import confusion_matrix,classification_report
from sorix.model_selection import train_test_split
In [3]:
Copied!
device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device
device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device
✅ GPU basic operation passed ✅ GPU available: NVIDIA GeForce RTX 4070 Laptop GPU CUDA runtime version: 13000 CuPy version: 13.6.0
Out[3]:
'cuda'
Datos¶
In [4]:
Copied!
r1 = 0.5
r2 = 1.5
r3 = 1
num_points = 10000
thetas = np.linspace(0, 2 * np.pi, num_points)
x1 = r1 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y1 = r1 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
x2 = r2 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y2 = r2 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
x3 = 5 + r3 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y3 = 5 + r3 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
plt.figure(figsize=(8, 7))
plt.scatter(x1, y1, s=50, label="Clase A", alpha=0.8) # Puntos de la Clase A
plt.scatter(x2, y2, s=50, label="Clase B", alpha=0.8) # Puntos de la Clase B
plt.scatter(x3, y3, s=50, label="Clase C", alpha=0.8) # Puntos de la Clase C
plt.xlabel("Característica X")
plt.ylabel("Característica Y")
plt.title("Dataset de Círculos con 3 Clases")
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.axis('equal')
plt.show()
r1 = 0.5
r2 = 1.5
r3 = 1
num_points = 10000
thetas = np.linspace(0, 2 * np.pi, num_points)
x1 = r1 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y1 = r1 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
x2 = r2 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y2 = r2 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
x3 = 5 + r3 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y3 = 5 + r3 * np.sin(thetas) + 0.1 * np.random.randn(num_points)
plt.figure(figsize=(8, 7))
plt.scatter(x1, y1, s=50, label="Clase A", alpha=0.8) # Puntos de la Clase A
plt.scatter(x2, y2, s=50, label="Clase B", alpha=0.8) # Puntos de la Clase B
plt.scatter(x3, y3, s=50, label="Clase C", alpha=0.8) # Puntos de la Clase C
plt.xlabel("Característica X")
plt.ylabel("Característica Y")
plt.title("Dataset de Círculos con 3 Clases")
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.axis('equal')
plt.show()
In [5]:
Copied!
df = pd.DataFrame(
{"x": x1.tolist()+x2.tolist()+x3.tolist(),
"y": y1.tolist()+y2.tolist()+y3.tolist(),
"labels":['A' for _ in range(num_points)]+['B' for _ in range(num_points)]+['C' for _ in range(num_points)]
})
labels = df["labels"].unique()
labels2id = {label: i for i, label in enumerate(labels)}
id2labels = {v: k for k, v in labels2id.items()}
df["labels"] = df["labels"].map(labels2id)
df.head()
df = pd.DataFrame(
{"x": x1.tolist()+x2.tolist()+x3.tolist(),
"y": y1.tolist()+y2.tolist()+y3.tolist(),
"labels":['A' for _ in range(num_points)]+['B' for _ in range(num_points)]+['C' for _ in range(num_points)]
})
labels = df["labels"].unique()
labels2id = {label: i for i, label in enumerate(labels)}
id2labels = {v: k for k, v in labels2id.items()}
df["labels"] = df["labels"].map(labels2id)
df.head()
Out[5]:
| x | y | labels | |
|---|---|---|---|
| 0 | 0.566616 | 0.095752 | 0 |
| 1 | 0.512442 | -0.089349 | 0 |
| 2 | 0.437397 | 0.018412 | 0 |
| 3 | 0.560465 | 0.056711 | 0 |
| 4 | 0.581462 | 0.145193 | 0 |
In [6]:
Copied!
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X_train = tensor(df_train[["x","y"]].values).to(device)
Y_train = tensor(df_train[["labels"]].values).to(device)
X_test = tensor(df_test[["x","y"]].values).to(device)
Y_test = tensor(df_test[["labels"]].values).to(device)
print(f"X_train shape: {X_train.shape}, device: {X_train.device}")
print(f"Y_train shape: {Y_train.shape}, device: {Y_train.device}")
print(f"X_test shape: {X_test.shape}, device: {X_test.device}")
print(f"Y_test shape: {Y_test.shape}, device: {Y_test.device}")
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X_train = tensor(df_train[["x","y"]].values).to(device)
Y_train = tensor(df_train[["labels"]].values).to(device)
X_test = tensor(df_test[["x","y"]].values).to(device)
Y_test = tensor(df_test[["labels"]].values).to(device)
print(f"X_train shape: {X_train.shape}, device: {X_train.device}")
print(f"Y_train shape: {Y_train.shape}, device: {Y_train.device}")
print(f"X_test shape: {X_test.shape}, device: {X_test.device}")
print(f"Y_test shape: {Y_test.shape}, device: {Y_test.device}")
X_train shape: sorix.Size([24000, 2]), device: cuda:0 Y_train shape: sorix.Size([24000, 1]), device: cuda:0 X_test shape: sorix.Size([6000, 2]), device: cuda:0 Y_test shape: sorix.Size([6000, 1]), device: cuda:0
In [7]:
Copied!
X_train.grad
X_train.grad
In [8]:
Copied!
class Network(Module):
def __init__(self):
super().__init__()
self.fc1 = Linear(2, 4)
self.relu = ReLU()
self.fc2 = Linear(4, 4)
self.fc3 = Linear(4, 3)
def forward(self, x: tensor) -> Tensor:
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x
net = Network().to(device)
net.parameters()
class Network(Module):
def __init__(self):
super().__init__()
self.fc1 = Linear(2, 4)
self.relu = ReLU()
self.fc2 = Linear(4, 4)
self.fc3 = Linear(4, 3)
def forward(self, x: tensor) -> Tensor:
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x
net = Network().to(device)
net.parameters()
Out[8]:
[tensor([[ 0.8218481 , 1.6584973 , -0.21329844, -0.22710423],
[-1.283966 , 1.0882515 , -0.06972136, -0.03704953]], device='cuda:0', requires_grad=True),
tensor([[0., 0., 0., 0.]], device='cuda:0', requires_grad=True),
tensor([[ 0.30648765, 0.21343027, -0.09919398, 0.9340736 ],
[-1.133978 , 0.14940731, 0.28791216, -0.29160607],
[ 0.43589577, 0.4688745 , 0.78475726, -0.0788846 ],
[-0.15354739, 0.38126808, -0.13964173, -1.0757526 ]], device='cuda:0', requires_grad=True),
tensor([[0., 0., 0., 0.]], device='cuda:0', requires_grad=True),
tensor([[ 0.60982543, -1.2970355 , 0.28402847],
[ 1.7949859 , 0.37703976, -0.49080712],
[-0.31916267, 0.26207057, -0.01511212],
[ 0.5347513 , 1.025364 , 0.9294297 ]], device='cuda:0', requires_grad=True),
tensor([[0., 0., 0.]], device='cuda:0', requires_grad=True)]
In [9]:
Copied!
criterion = CrossEntropyLoss()
optimizer = RMSprop(net.parameters(), lr=1e-2)
criterion = CrossEntropyLoss()
optimizer = RMSprop(net.parameters(), lr=1e-2)
In [10]:
Copied!
logits = net(X_train)
logits
logits = net(X_train)
logits
Out[10]:
tensor([[ 0.09201065, 0.06878142, -0.04054321],
[ 2.65121776, 1.98188497, -1.16822201],
[ 2.44292547, 1.82617866, -1.07644093],
...,
[ 2.14173553, 1.60102785, -0.9437258 ],
[ 2.14797085, 1.60568898, -0.9464733 ],
[ 0.21315635, 0.14280145, -0.08877873]], device='cuda:0', dtype=sorix.float64, requires_grad=True)
In [11]:
Copied!
preds = sorix.argmax(logits, axis=1, keepdims=True)
preds
preds = sorix.argmax(logits, axis=1, keepdims=True)
preds
Out[11]:
tensor([[0],
[0],
[0],
...,
[0],
[0],
[0]], device='cuda:0', dtype=sorix.int64)
Training¶
In [12]:
Copied!
# Bucle de entrenamiento mejorado
for epoch in range(10000 + 1):
logits = net(X_train)
loss = criterion(logits, Y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 10 == 0:
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc_train = (preds == Y_train).mean()
with sorix.no_grad():
logits = net(X_test)
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc_test = (preds == Y_test).mean()
# Usamos una f-string para formatear y alinear la salida
print(f"[{device}] Epoch {epoch:5d} | Loss: {loss.item():.4f} | Acc Train: {acc_train.item()*100:.2f}% | Acc Test: {acc_test.item()*100:.2f}%")
if acc_test.item() >= 0.98: # Mejoramos el criterio de parada
print(f"Entrenamiento completado en {epoch} epochs!")
break
# Bucle de entrenamiento mejorado
for epoch in range(10000 + 1):
logits = net(X_train)
loss = criterion(logits, Y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 10 == 0:
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc_train = (preds == Y_train).mean()
with sorix.no_grad():
logits = net(X_test)
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc_test = (preds == Y_test).mean()
# Usamos una f-string para formatear y alinear la salida
print(f"[{device}] Epoch {epoch:5d} | Loss: {loss.item():.4f} | Acc Train: {acc_train.item()*100:.2f}% | Acc Test: {acc_test.item()*100:.2f}%")
if acc_test.item() >= 0.98: # Mejoramos el criterio de parada
print(f"Entrenamiento completado en {epoch} epochs!")
break
[cuda] Epoch 0 | Loss: 2.0361 | Acc Train: 33.66% | Acc Test: 33.15% [cuda] Epoch 10 | Loss: 0.4788 | Acc Train: 79.40% | Acc Test: 79.10% [cuda] Epoch 20 | Loss: 0.3751 | Acc Train: 83.17% | Acc Test: 83.05% [cuda] Epoch 30 | Loss: 0.3218 | Acc Train: 89.33% | Acc Test: 89.33% [cuda] Epoch 40 | Loss: 0.2806 | Acc Train: 93.16% | Acc Test: 92.45% [cuda] Epoch 50 | Loss: 0.2433 | Acc Train: 94.77% | Acc Test: 94.17% [cuda] Epoch 60 | Loss: 0.2077 | Acc Train: 96.01% | Acc Test: 95.27%
[cuda] Epoch 70 | Loss: 0.1763 | Acc Train: 97.19% | Acc Test: 96.32% [cuda] Epoch 80 | Loss: 0.1470 | Acc Train: 97.98% | Acc Test: 97.58% [cuda] Epoch 90 | Loss: 0.1227 | Acc Train: 98.64% | Acc Test: 98.47% Entrenamiento completado en 90 epochs!
Prediction¶
In [13]:
Copied!
with sorix.no_grad():
logits = net(X_test)
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc = (preds == Y_test).mean()
y_pred_labels = [id2labels[y.item()] for y in preds]
df_test['pred_labels'] = y_pred_labels
for label in df_test['pred_labels'].unique():
x = df_test[df_test['pred_labels'] == label]['x']
y = df_test[df_test['pred_labels'] == label]['y']
plt.scatter(x,y,s=50,label=label)
plt.title(f"Circles Tetst Dataset: Accuracy: {100*acc.item():.2f}%")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
with sorix.no_grad():
logits = net(X_test)
preds = sorix.argmax(logits, axis=1, keepdims=True)
acc = (preds == Y_test).mean()
y_pred_labels = [id2labels[y.item()] for y in preds]
df_test['pred_labels'] = y_pred_labels
for label in df_test['pred_labels'].unique():
x = df_test[df_test['pred_labels'] == label]['x']
y = df_test[df_test['pred_labels'] == label]['y']
plt.scatter(x,y,s=50,label=label)
plt.title(f"Circles Tetst Dataset: Accuracy: {100*acc.item():.2f}%")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
Out[13]:
<matplotlib.legend.Legend at 0x7fc42ad13610>
In [14]:
Copied!
sns.heatmap(confusion_matrix(Y_test, preds), annot=True, cmap="Blues", fmt=".0f")
sns.heatmap(confusion_matrix(Y_test, preds), annot=True, cmap="Blues", fmt=".0f")
Out[14]:
<Axes: >
In [15]:
Copied!
print(classification_report(Y_test, preds))
print(classification_report(Y_test, preds))
precision recall f1-score support 0 0.95 1.00 0.98 1917 1 1.00 0.95 0.98 1989 2 1.00 1.00 1.00 2094 accuracy 0.98 6000 macro avg 0.98 0.98 0.98 6000 weighted avg 0.99 0.98 0.98 6000
Save and Load Model¶
In [16]:
Copied!
sorix.save(net.state_dict(),"model_weights.sor")
sorix.save(net.state_dict(),"model_weights.sor")
CPU¶
In [17]:
Copied!
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
if X_test.device == 'cpu':
with sorix.no_grad():
logits = net2(X_test)
if X_test.device == 'cuda':
with sorix.no_grad():
logits = net2(X_test.to('cpu'))
logits
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
if X_test.device == 'cpu':
with sorix.no_grad():
logits = net2(X_test)
if X_test.device == 'cuda':
with sorix.no_grad():
logits = net2(X_test.to('cpu'))
logits
Out[17]:
tensor([[-19.54701513, 5.20614781, 9.78048857],
[-21.92400001, 5.05538736, 10.33117134],
[-19.03847451, 4.6375706 , 9.03179574],
...,
[ 4.73455059, 2.95294746, -2.148889 ],
[ 2.27274471, 2.09316105, -1.57196888],
[-24.52207742, 5.4836166 , 11.55275332]], dtype=sorix.float64)
GPU¶
In [18]:
Copied!
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
net2.to('cuda')
if X_test.device == 'cpu':
with sorix.no_grad():
logits = net2(X_test.to('cuda'))
if X_test.device == 'cuda':
with sorix.no_grad():
logits = net2(X_test)
logits
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
net2.to('cuda')
if X_test.device == 'cpu':
with sorix.no_grad():
logits = net2(X_test.to('cuda'))
if X_test.device == 'cuda':
with sorix.no_grad():
logits = net2(X_test)
logits
Out[18]:
tensor([[-19.54701513, 5.20614781, 9.78048857],
[-21.92400001, 5.05538736, 10.33117134],
[-19.03847451, 4.6375706 , 9.03179574],
...,
[ 4.73455059, 2.95294746, -2.148889 ],
[ 2.27274471, 2.09316105, -1.57196888],
[-24.52207742, 5.4836166 , 11.55275332]], device='cuda:0', dtype=sorix.float64)