Classification Multiclass¶

In [1]:

Copied!

# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'

In [2]:

Copied!





import joblib
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

import sorix
from sorix.nn import ReLU,Linear
from sorix.optim import SGDMomentum,RMSprop,Adam,SGD
from sorix import tensor,Tensor
from sorix.nn import Module
from sorix.nn import CrossEntropyLoss
from sorix.metrics import confusion_matrix,classification_report
from sorix.model_selection import train_test_split
import joblib
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

import sorix
from sorix.nn import ReLU,Linear
from sorix.optim import SGDMomentum,RMSprop,Adam,SGD
from sorix import tensor,Tensor
from sorix.nn import Module
from sorix.nn import CrossEntropyLoss
from sorix.metrics import confusion_matrix,classification_report
from sorix.model_selection import train_test_split

In [3]:

Copied!

device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device
device = 'cuda' if sorix.cuda.is_available() else 'cpu'
device

✅ GPU basic operation passed
✅ GPU available: NVIDIA GeForce RTX 4070 Laptop GPU
CUDA runtime version: 13000
CuPy version: 13.6.0

Out[3]:

'cuda'

Datos¶

In [4]:

Copied!





r1 = 0.5  
r2 = 1.5  
r3 = 1  

num_points = 10000
thetas = np.linspace(0, 2 * np.pi, num_points) 


x1 = r1 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y1 = r1 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


x2 = r2 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y2 = r2 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


x3 = 5 + r3 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y3 = 5 + r3 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


plt.figure(figsize=(8, 7)) 


plt.scatter(x1, y1, s=50, label="Clase A", alpha=0.8) # Puntos de la Clase A
plt.scatter(x2, y2, s=50, label="Clase B", alpha=0.8) # Puntos de la Clase B
plt.scatter(x3, y3, s=50, label="Clase C", alpha=0.8) # Puntos de la Clase C

plt.xlabel("Característica X") 
plt.ylabel("Característica Y")
plt.title("Dataset de Círculos con 3 Clases")
plt.legend() 
plt.grid(True, linestyle='--', alpha=0.6)
plt.axis('equal')
plt.show() 
r1 = 0.5  
r2 = 1.5  
r3 = 1  

num_points = 10000
thetas = np.linspace(0, 2 * np.pi, num_points) 


x1 = r1 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y1 = r1 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


x2 = r2 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y2 = r2 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


x3 = 5 + r3 * np.cos(thetas) + 0.1 * np.random.randn(num_points)
y3 = 5 + r3 * np.sin(thetas) + 0.1 * np.random.randn(num_points)


plt.figure(figsize=(8, 7)) 


plt.scatter(x1, y1, s=50, label="Clase A", alpha=0.8) # Puntos de la Clase A
plt.scatter(x2, y2, s=50, label="Clase B", alpha=0.8) # Puntos de la Clase B
plt.scatter(x3, y3, s=50, label="Clase C", alpha=0.8) # Puntos de la Clase C

plt.xlabel("Característica X") 
plt.ylabel("Característica Y")
plt.title("Dataset de Círculos con 3 Clases")
plt.legend() 
plt.grid(True, linestyle='--', alpha=0.6)
plt.axis('equal')
plt.show() 

No description has been provided for this image

In [5]:

Copied!





df = pd.DataFrame(
    {"x": x1.tolist()+x2.tolist()+x3.tolist(),
    "y": y1.tolist()+y2.tolist()+y3.tolist(),
    "labels":['A' for _ in range(num_points)]+['B' for _ in range(num_points)]+['C' for _ in range(num_points)]
    })

labels = df["labels"].unique()
labels2id = {label: i for i, label in enumerate(labels)}
id2labels = {v: k for k, v in labels2id.items()}

df["labels"] = df["labels"].map(labels2id)
df.head()
df = pd.DataFrame(
    {"x": x1.tolist()+x2.tolist()+x3.tolist(),
    "y": y1.tolist()+y2.tolist()+y3.tolist(),
    "labels":['A' for _ in range(num_points)]+['B' for _ in range(num_points)]+['C' for _ in range(num_points)]
    })

labels = df["labels"].unique()
labels2id = {label: i for i, label in enumerate(labels)}
id2labels = {v: k for k, v in labels2id.items()}

df["labels"] = df["labels"].map(labels2id)
df.head()

Out[5]:

	x	y
0	0.566616	0.095752
1	0.512442	-0.089349
2	0.437397	0.018412
3	0.560465	0.056711
4	0.581462	0.145193

In [6]:

Copied!





df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)


X_train = tensor(df_train[["x","y"]].values).to(device)
Y_train = tensor(df_train[["labels"]].values).to(device)
X_test = tensor(df_test[["x","y"]].values).to(device)
Y_test = tensor(df_test[["labels"]].values).to(device)

print(f"X_train shape: {X_train.shape}, device: {X_train.device}")
print(f"Y_train shape: {Y_train.shape}, device: {Y_train.device}")
print(f"X_test shape: {X_test.shape}, device: {X_test.device}")
print(f"Y_test shape: {Y_test.shape}, device: {Y_test.device}")
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)


X_train = tensor(df_train[["x","y"]].values).to(device)
Y_train = tensor(df_train[["labels"]].values).to(device)
X_test = tensor(df_test[["x","y"]].values).to(device)
Y_test = tensor(df_test[["labels"]].values).to(device)

print(f"X_train shape: {X_train.shape}, device: {X_train.device}")
print(f"Y_train shape: {Y_train.shape}, device: {Y_train.device}")
print(f"X_test shape: {X_test.shape}, device: {X_test.device}")
print(f"Y_test shape: {Y_test.shape}, device: {Y_test.device}")

X_train shape: sorix.Size([24000, 2]), device: cuda:0
Y_train shape: sorix.Size([24000, 1]), device: cuda:0
X_test shape: sorix.Size([6000, 2]), device: cuda:0
Y_test shape: sorix.Size([6000, 1]), device: cuda:0

In [7]:

Copied!

X_train.grad
X_train.grad

In [8]:

Copied!





class Network(Module):
    def __init__(self):
        super().__init__()
        self.fc1 = Linear(2, 4)
        self.relu = ReLU()
        self.fc2 = Linear(4, 4)
        self.fc3 = Linear(4, 3)

    def forward(self, x: tensor) -> Tensor:
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x


net = Network().to(device)
net.parameters()
class Network(Module):
    def __init__(self):
        super().__init__()
        self.fc1 = Linear(2, 4)
        self.relu = ReLU()
        self.fc2 = Linear(4, 4)
        self.fc3 = Linear(4, 3)

    def forward(self, x: tensor) -> Tensor:
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x


net = Network().to(device)
net.parameters()

Out[8]:

[tensor([[ 0.8218481 ,  1.6584973 , -0.21329844, -0.22710423],
         [-1.283966  ,  1.0882515 , -0.06972136, -0.03704953]], device='cuda:0', requires_grad=True),
 tensor([[0., 0., 0., 0.]], device='cuda:0', requires_grad=True),
 tensor([[ 0.30648765,  0.21343027, -0.09919398,  0.9340736 ],
         [-1.133978  ,  0.14940731,  0.28791216, -0.29160607],
         [ 0.43589577,  0.4688745 ,  0.78475726, -0.0788846 ],
         [-0.15354739,  0.38126808, -0.13964173, -1.0757526 ]], device='cuda:0', requires_grad=True),
 tensor([[0., 0., 0., 0.]], device='cuda:0', requires_grad=True),
 tensor([[ 0.60982543, -1.2970355 ,  0.28402847],
         [ 1.7949859 ,  0.37703976, -0.49080712],
         [-0.31916267,  0.26207057, -0.01511212],
         [ 0.5347513 ,  1.025364  ,  0.9294297 ]], device='cuda:0', requires_grad=True),
 tensor([[0., 0., 0.]], device='cuda:0', requires_grad=True)]

In [9]:

Copied!

criterion = CrossEntropyLoss()
optimizer = RMSprop(net.parameters(), lr=1e-2)
criterion = CrossEntropyLoss()
optimizer = RMSprop(net.parameters(), lr=1e-2)

In [10]:

Copied!

logits = net(X_train)
logits
logits = net(X_train)
logits

Out[10]:

tensor([[ 0.09201065,  0.06878142, -0.04054321],
        [ 2.65121776,  1.98188497, -1.16822201],
        [ 2.44292547,  1.82617866, -1.07644093],
        ...,
        [ 2.14173553,  1.60102785, -0.9437258 ],
        [ 2.14797085,  1.60568898, -0.9464733 ],
        [ 0.21315635,  0.14280145, -0.08877873]], device='cuda:0', dtype=sorix.float64, requires_grad=True)

In [11]:

Copied!

preds = sorix.argmax(logits, axis=1, keepdims=True)
preds
preds = sorix.argmax(logits, axis=1, keepdims=True)
preds

Out[11]:

tensor([[0],
        [0],
        [0],
        ...,
        [0],
        [0],
        [0]], device='cuda:0', dtype=sorix.int64)

Training¶

In [12]:

Copied!





# Bucle de entrenamiento mejorado
for epoch in range(10000 + 1):
    logits = net(X_train)
    loss = criterion(logits, Y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        preds = sorix.argmax(logits, axis=1, keepdims=True)
        acc_train = (preds == Y_train).mean()
        with sorix.no_grad():
            logits = net(X_test)
            preds = sorix.argmax(logits, axis=1, keepdims=True)
            acc_test = (preds == Y_test).mean()

        # Usamos una f-string para formatear y alinear la salida
        print(f"[{device}] Epoch {epoch:5d} | Loss: {loss.item():.4f} | Acc Train: {acc_train.item()*100:.2f}% | Acc Test: {acc_test.item()*100:.2f}%")

        if acc_test.item() >= 0.98:  # Mejoramos el criterio de parada
            print(f"Entrenamiento completado en {epoch} epochs!")
            break
# Bucle de entrenamiento mejorado
for epoch in range(10000 + 1):
    logits = net(X_train)
    loss = criterion(logits, Y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        preds = sorix.argmax(logits, axis=1, keepdims=True)
        acc_train = (preds == Y_train).mean()
        with sorix.no_grad():
            logits = net(X_test)
            preds = sorix.argmax(logits, axis=1, keepdims=True)
            acc_test = (preds == Y_test).mean()

        # Usamos una f-string para formatear y alinear la salida
        print(f"[{device}] Epoch {epoch:5d} | Loss: {loss.item():.4f} | Acc Train: {acc_train.item()*100:.2f}% | Acc Test: {acc_test.item()*100:.2f}%")

        if acc_test.item() >= 0.98:  # Mejoramos el criterio de parada
            print(f"Entrenamiento completado en {epoch} epochs!")
            break

[cuda] Epoch     0 | Loss: 2.0361 | Acc Train: 33.66% | Acc Test: 33.15%
[cuda] Epoch    10 | Loss: 0.4788 | Acc Train: 79.40% | Acc Test: 79.10%
[cuda] Epoch    20 | Loss: 0.3751 | Acc Train: 83.17% | Acc Test: 83.05%
[cuda] Epoch    30 | Loss: 0.3218 | Acc Train: 89.33% | Acc Test: 89.33%
[cuda] Epoch    40 | Loss: 0.2806 | Acc Train: 93.16% | Acc Test: 92.45%
[cuda] Epoch    50 | Loss: 0.2433 | Acc Train: 94.77% | Acc Test: 94.17%
[cuda] Epoch    60 | Loss: 0.2077 | Acc Train: 96.01% | Acc Test: 95.27%

[cuda] Epoch    70 | Loss: 0.1763 | Acc Train: 97.19% | Acc Test: 96.32%
[cuda] Epoch    80 | Loss: 0.1470 | Acc Train: 97.98% | Acc Test: 97.58%
[cuda] Epoch    90 | Loss: 0.1227 | Acc Train: 98.64% | Acc Test: 98.47%
Entrenamiento completado en 90 epochs!

Prediction¶

In [13]:

Copied!





with sorix.no_grad():
    logits = net(X_test)

preds = sorix.argmax(logits, axis=1, keepdims=True)
acc = (preds == Y_test).mean()   


y_pred_labels = [id2labels[y.item()] for y in preds]

df_test['pred_labels'] = y_pred_labels

for label in df_test['pred_labels'].unique():
    x = df_test[df_test['pred_labels'] == label]['x']
    y = df_test[df_test['pred_labels'] == label]['y']

    plt.scatter(x,y,s=50,label=label)

plt.title(f"Circles Tetst Dataset: Accuracy: {100*acc.item():.2f}%")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
with sorix.no_grad():
    logits = net(X_test)

preds = sorix.argmax(logits, axis=1, keepdims=True)
acc = (preds == Y_test).mean()   


y_pred_labels = [id2labels[y.item()] for y in preds]

df_test['pred_labels'] = y_pred_labels

for label in df_test['pred_labels'].unique():
    x = df_test[df_test['pred_labels'] == label]['x']
    y = df_test[df_test['pred_labels'] == label]['y']

    plt.scatter(x,y,s=50,label=label)

plt.title(f"Circles Tetst Dataset: Accuracy: {100*acc.item():.2f}%")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()

Out[13]:

<matplotlib.legend.Legend at 0x7fc42ad13610>

In [14]:

Copied!

sns.heatmap(confusion_matrix(Y_test, preds), annot=True, cmap="Blues", fmt=".0f")
sns.heatmap(confusion_matrix(Y_test, preds), annot=True, cmap="Blues", fmt=".0f")

Out[14]:

<Axes: >

In [15]:

Copied!

print(classification_report(Y_test, preds))
print(classification_report(Y_test, preds))

            precision   recall f1-score  support
0                0.95     1.00     0.98     1917
1                1.00     0.95     0.98     1989
2                1.00     1.00     1.00     2094

accuracy                           0.98     6000
macro avg        0.98     0.98     0.98     6000
weighted avg     0.99     0.98     0.98     6000

Save and Load Model¶

In [16]:

Copied!

sorix.save(net.state_dict(),"model_weights.sor")
sorix.save(net.state_dict(),"model_weights.sor")

CPU¶

In [17]:

Copied!





net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))

if X_test.device == 'cpu':
    with sorix.no_grad():
        logits = net2(X_test)
if X_test.device == 'cuda':
    with sorix.no_grad():
        logits = net2(X_test.to('cpu'))

logits
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))

if X_test.device == 'cpu':
    with sorix.no_grad():
        logits = net2(X_test)
if X_test.device == 'cuda':
    with sorix.no_grad():
        logits = net2(X_test.to('cpu'))

logits

Out[17]:

tensor([[-19.54701513,   5.20614781,   9.78048857],
        [-21.92400001,   5.05538736,  10.33117134],
        [-19.03847451,   4.6375706 ,   9.03179574],
        ...,
        [  4.73455059,   2.95294746,  -2.148889  ],
        [  2.27274471,   2.09316105,  -1.57196888],
        [-24.52207742,   5.4836166 ,  11.55275332]], dtype=sorix.float64)

GPU¶

In [18]:

Copied!





net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
net2.to('cuda')

if X_test.device == 'cpu':
    with sorix.no_grad():
        logits = net2(X_test.to('cuda'))
if X_test.device == 'cuda':
    with sorix.no_grad():
        logits = net2(X_test)

logits
net2 = Network()
net2.load_state_dict(sorix.load("model_weights.sor"))
net2.to('cuda')

if X_test.device == 'cpu':
    with sorix.no_grad():
        logits = net2(X_test.to('cuda'))
if X_test.device == 'cuda':
    with sorix.no_grad():
        logits = net2(X_test)

logits

Out[18]:

tensor([[-19.54701513,   5.20614781,   9.78048857],
        [-21.92400001,   5.05538736,  10.33117134],
        [-19.03847451,   4.6375706 ,   9.03179574],
        ...,
        [  4.73455059,   2.95294746,  -2.148889  ],
        [  2.27274471,   2.09316105,  -1.57196888],
        [-24.52207742,   5.4836166 ,  11.55275332]], device='cuda:0', dtype=sorix.float64)