MNIST Dataset¶

In [1]:

Copied!

# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'

In [2]:

Copied!





import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sorix
from sorix import tensor
from sorix.nn import Module,Linear, CrossEntropyLoss,ReLU,BatchNorm1d,Dropout
from sorix.optim import SGDMomentum, RMSprop, Adam
from sorix.model_selection import train_test_split
from sorix.utils.data import Dataset, DataLoader
from sorix.metrics import confusion_matrix,classification_report
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sorix
from sorix import tensor
from sorix.nn import Module,Linear, CrossEntropyLoss,ReLU,BatchNorm1d,Dropout
from sorix.optim import SGDMomentum, RMSprop, Adam
from sorix.model_selection import train_test_split
from sorix.utils.data import Dataset, DataLoader
from sorix.metrics import confusion_matrix,classification_report
from datetime import datetime

In [3]:

Copied!

device = 'cuda' if sorix.cuda.is_available() else "cpu"
device = 'cpu'
device = 'cuda' if sorix.cuda.is_available() else "cpu"
device = 'cpu'

✅ GPU basic operation passed
✅ GPU available: NVIDIA GeForce RTX 4070 Laptop GPU
CUDA runtime version: 13000
CuPy version: 13.6.0

In [4]:

Copied!

data = pd.read_csv("../data/digit-recognizer/train.csv")
data = pd.read_csv("../data/digit-recognizer/train.csv")

In [5]:

Copied!

data.head()
data.head()

Out[5]:

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

5 rows × 785 columns

In [6]:

Copied!

data_train,data_test = train_test_split(data,test_size=0.2)
data_train,data_test = train_test_split(data,test_size=0.2)

In [7]:

Copied!





X_train = data_train.drop("label",axis=1).values
Y_train = data_train[["label"]].values

X_test = data_test.drop("label",axis=1).values
Y_test = data_test[["label"]].values

# Transformación: Normalización básica (0-255 -> 0-1)
transform = lambda x: x / 255.0

train_dataset = Dataset(X_train, Y_train, transform=transform)    
test_dataset = Dataset(X_test, Y_test, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128)
X_train = data_train.drop("label",axis=1).values
Y_train = data_train[["label"]].values

X_test = data_test.drop("label",axis=1).values
Y_test = data_test[["label"]].values

# Transformación: Normalización básica (0-255 -> 0-1)
transform = lambda x: x / 255.0

train_dataset = Dataset(X_train, Y_train, transform=transform)    
test_dataset = Dataset(X_test, Y_test, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128)

In [8]:

Copied!

for x,y in train_dataloader:
    print(x.shape,y.shape)
    break
for x,y in train_dataloader:
    print(x.shape,y.shape)
    break

sorix.Size([128, 784]) sorix.Size([128, 1])

In [9]:

Copied!





class Model(Module):
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(784,128,bias=False)
        self.bn1 = BatchNorm1d(128)
        self.linear2 = Linear(128,64)
        self.linear3 = Linear(64,10)
        self.relu = ReLU()
        self.dropout = Dropout(p=0.2)
        
    def forward(self,x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear3(x)
        return x
    

model = Model()
model.to(device)
loss_fn = CrossEntropyLoss()

optimizer = RMSprop(model.parameters(), lr=1e-3)
class Model(Module):
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(784,128,bias=False)
        self.bn1 = BatchNorm1d(128)
        self.linear2 = Linear(128,64)
        self.linear3 = Linear(64,10)
        self.relu = ReLU()
        self.dropout = Dropout(p=0.2)
        
    def forward(self,x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear3(x)
        return x
    

model = Model()
model.to(device)
loss_fn = CrossEntropyLoss()

optimizer = RMSprop(model.parameters(), lr=1e-3)

In [10]:

Copied!





# Ahora el DataLoader devuelve tensores automáticamente
for X, Y in train_dataloader:
    # Movemos a GPU si está disponible
    X, Y = X.to(device), Y.to(device)
    print(X.shape, Y.shape)
    break
# Ahora el DataLoader devuelve tensores automáticamente
for X, Y in train_dataloader:
    # Movemos a GPU si está disponible
    X, Y = X.to(device), Y.to(device)
    print(X.shape, Y.shape)
    break

sorix.Size([128, 784]) sorix.Size([128, 1])

In [11]:

Copied!





start = datetime.now()

epochs = 100

for epoch in range(epochs+1):
    model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for x, y in train_dataloader:
        # Movemos los tensores al dispositivo (GPU/CPU)
        x, y = x.to(device), y.to(device)

        # Forward
        logits = model(x)
        loss = loss_fn(logits, y)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Acumular loss y accuracy
        total_train_loss += loss.item() * len(y)
        preds = sorix.argmax(logits, axis=1, keepdims=True)
        correct_train += (preds == y).sum().item()
        total_train += len(y)

    avg_train_loss = total_train_loss / total_train
    avg_train_acc = correct_train / total_train

    # --- Validación/Test ---
    if epoch % 5 == 0:
        with sorix.no_grad():
            model.eval()
            total_test_loss = 0.0
            correct_test = 0
            total_test = 0

            for x, y in test_dataloader:
                x, y = x.to(device), y.to(device)

                logits = model(x)
                loss = loss_fn(logits, y)

                total_test_loss += loss.item() * len(y)
                preds = sorix.argmax(logits, axis=1, keepdims=True)
                correct_test += (preds == y).sum().item()
                total_test += len(y)

            avg_test_loss = total_test_loss / total_test
            avg_test_acc = correct_test / total_test

            print(f"[{device}] [{epoch:3d}/{epochs:3d}] | Loss: {avg_test_loss:.4f} | Acc Train: {100*avg_train_acc:.2f}% | Acc Test: {100*avg_test_acc:.2f}%")

        if avg_test_acc > 0.97:
            break
end = datetime.now()

delta = end-start
tiempo = delta.total_seconds()
print(f"Tiempo:{tiempo} segundos = {tiempo/60:.2f} min ")
start = datetime.now()

epochs = 100

for epoch in range(epochs+1):
    model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for x, y in train_dataloader:
        # Movemos los tensores al dispositivo (GPU/CPU)
        x, y = x.to(device), y.to(device)

        # Forward
        logits = model(x)
        loss = loss_fn(logits, y)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Acumular loss y accuracy
        total_train_loss += loss.item() * len(y)
        preds = sorix.argmax(logits, axis=1, keepdims=True)
        correct_train += (preds == y).sum().item()
        total_train += len(y)

    avg_train_loss = total_train_loss / total_train
    avg_train_acc = correct_train / total_train

    # --- Validación/Test ---
    if epoch % 5 == 0:
        with sorix.no_grad():
            model.eval()
            total_test_loss = 0.0
            correct_test = 0
            total_test = 0

            for x, y in test_dataloader:
                x, y = x.to(device), y.to(device)

                logits = model(x)
                loss = loss_fn(logits, y)

                total_test_loss += loss.item() * len(y)
                preds = sorix.argmax(logits, axis=1, keepdims=True)
                correct_test += (preds == y).sum().item()
                total_test += len(y)

            avg_test_loss = total_test_loss / total_test
            avg_test_acc = correct_test / total_test

            print(f"[{device}] [{epoch:3d}/{epochs:3d}] | Loss: {avg_test_loss:.4f} | Acc Train: {100*avg_train_acc:.2f}% | Acc Test: {100*avg_test_acc:.2f}%")

        if avg_test_acc > 0.97:
            break
end = datetime.now()

delta = end-start
tiempo = delta.total_seconds()
print(f"Tiempo:{tiempo} segundos = {tiempo/60:.2f} min ")

[cpu] [  0/100] | Loss: 0.1517 | Acc Train: 90.63% | Acc Test: 95.64%

[cpu] [  5/100] | Loss: 0.0982 | Acc Train: 98.12% | Acc Test: 97.23%
Tiempo:8.815667 segundos = 0.15 min

In [12]:

Copied!





all_preds = np.array([])
all_targets = np.array([])

with sorix.no_grad():
    model.eval()
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        # Predicciones
        logits = model(x)
        preds = sorix.argmax(logits, axis=1, keepdims=True).cpu()
        # Guardar predicciones y targets
        all_preds = np.append(all_preds, preds.data)
        all_targets = np.append(all_targets, y.cpu().data)
all_preds = np.array([])
all_targets = np.array([])

with sorix.no_grad():
    model.eval()
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        # Predicciones
        logits = model(x)
        preds = sorix.argmax(logits, axis=1, keepdims=True).cpu()
        # Guardar predicciones y targets
        all_preds = np.append(all_preds, preds.data)
        all_targets = np.append(all_targets, y.cpu().data)

In [13]:

Copied!

print(classification_report(all_targets,all_preds))
print(classification_report(all_targets,all_preds))

            precision   recall f1-score  support
0.0              0.96     0.99     0.98      798
1.0              0.98     0.99     0.99      920
2.0              0.96     0.98     0.97      797
3.0              0.97     0.95     0.96      858
4.0              0.98     0.97     0.97      813
5.0              0.97     0.97     0.97      772
6.0              0.99     0.97     0.98      849
7.0              0.98     0.98     0.98      929
8.0              0.97     0.95     0.96      810
9.0              0.95     0.97     0.96      854

accuracy                           0.97     8400
macro avg        0.97     0.97     0.97     8400
weighted avg     0.97     0.97     0.97     8400

In [14]:

Copied!

plt.figure(figsize=(8,5))
sns.heatmap(confusion_matrix(all_targets,all_preds), annot=True, cmap="Blues")
plt.figure(figsize=(8,5))
sns.heatmap(confusion_matrix(all_targets,all_preds), annot=True, cmap="Blues")

Out[14]:

<Axes: >

No description has been provided for this image

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...