MNIST Dataset¶
In [1]:
Copied!
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
# Uncomment the following line to install GPU version with Cupy backend
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
In [2]:
Copied!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sorix
from sorix import tensor
from sorix.nn import Module,Linear, CrossEntropyLoss,ReLU,BatchNorm1d,Dropout
from sorix.optim import SGDMomentum, RMSprop, Adam
from sorix.model_selection import train_test_split
from sorix.utils.data import Dataset, DataLoader
from sorix.metrics import confusion_matrix,classification_report
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sorix
from sorix import tensor
from sorix.nn import Module,Linear, CrossEntropyLoss,ReLU,BatchNorm1d,Dropout
from sorix.optim import SGDMomentum, RMSprop, Adam
from sorix.model_selection import train_test_split
from sorix.utils.data import Dataset, DataLoader
from sorix.metrics import confusion_matrix,classification_report
from datetime import datetime
In [3]:
Copied!
device = 'cuda' if sorix.cuda.is_available() else "cpu"
device = 'cpu'
device = 'cuda' if sorix.cuda.is_available() else "cpu"
device = 'cpu'
✅ GPU basic operation passed ✅ GPU available: NVIDIA GeForce RTX 4070 Laptop GPU CUDA runtime version: 13000 CuPy version: 13.6.0
In [4]:
Copied!
data = pd.read_csv("../data/digit-recognizer/train.csv")
data = pd.read_csv("../data/digit-recognizer/train.csv")
In [5]:
Copied!
data.head()
data.head()
Out[5]:
| label | pixel0 | pixel1 | pixel2 | pixel3 | pixel4 | pixel5 | pixel6 | pixel7 | pixel8 | ... | pixel774 | pixel775 | pixel776 | pixel777 | pixel778 | pixel779 | pixel780 | pixel781 | pixel782 | pixel783 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 785 columns
In [6]:
Copied!
data_train,data_test = train_test_split(data,test_size=0.2)
data_train,data_test = train_test_split(data,test_size=0.2)
In [7]:
Copied!
X_train = data_train.drop("label",axis=1).values
Y_train = data_train[["label"]].values
X_test = data_test.drop("label",axis=1).values
Y_test = data_test[["label"]].values
# Transformación: Normalización básica (0-255 -> 0-1)
transform = lambda x: x / 255.0
train_dataset = Dataset(X_train, Y_train, transform=transform)
test_dataset = Dataset(X_test, Y_test, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128)
X_train = data_train.drop("label",axis=1).values
Y_train = data_train[["label"]].values
X_test = data_test.drop("label",axis=1).values
Y_test = data_test[["label"]].values
# Transformación: Normalización básica (0-255 -> 0-1)
transform = lambda x: x / 255.0
train_dataset = Dataset(X_train, Y_train, transform=transform)
test_dataset = Dataset(X_test, Y_test, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128)
In [8]:
Copied!
for x,y in train_dataloader:
print(x.shape,y.shape)
break
for x,y in train_dataloader:
print(x.shape,y.shape)
break
sorix.Size([128, 784]) sorix.Size([128, 1])
In [9]:
Copied!
class Model(Module):
def __init__(self):
super().__init__()
self.linear1 = Linear(784,128,bias=False)
self.bn1 = BatchNorm1d(128)
self.linear2 = Linear(128,64)
self.linear3 = Linear(64,10)
self.relu = ReLU()
self.dropout = Dropout(p=0.2)
def forward(self,x):
x = self.linear1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.linear2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.linear3(x)
return x
model = Model()
model.to(device)
loss_fn = CrossEntropyLoss()
optimizer = RMSprop(model.parameters(), lr=1e-3)
class Model(Module):
def __init__(self):
super().__init__()
self.linear1 = Linear(784,128,bias=False)
self.bn1 = BatchNorm1d(128)
self.linear2 = Linear(128,64)
self.linear3 = Linear(64,10)
self.relu = ReLU()
self.dropout = Dropout(p=0.2)
def forward(self,x):
x = self.linear1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.linear2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.linear3(x)
return x
model = Model()
model.to(device)
loss_fn = CrossEntropyLoss()
optimizer = RMSprop(model.parameters(), lr=1e-3)
In [10]:
Copied!
# Ahora el DataLoader devuelve tensores automáticamente
for X, Y in train_dataloader:
# Movemos a GPU si está disponible
X, Y = X.to(device), Y.to(device)
print(X.shape, Y.shape)
break
# Ahora el DataLoader devuelve tensores automáticamente
for X, Y in train_dataloader:
# Movemos a GPU si está disponible
X, Y = X.to(device), Y.to(device)
print(X.shape, Y.shape)
break
sorix.Size([128, 784]) sorix.Size([128, 1])
In [11]:
Copied!
start = datetime.now()
epochs = 100
for epoch in range(epochs+1):
model.train()
total_train_loss = 0.0
correct_train = 0
total_train = 0
for x, y in train_dataloader:
# Movemos los tensores al dispositivo (GPU/CPU)
x, y = x.to(device), y.to(device)
# Forward
logits = model(x)
loss = loss_fn(logits, y)
# Backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Acumular loss y accuracy
total_train_loss += loss.item() * len(y)
preds = sorix.argmax(logits, axis=1, keepdims=True)
correct_train += (preds == y).sum().item()
total_train += len(y)
avg_train_loss = total_train_loss / total_train
avg_train_acc = correct_train / total_train
# --- Validación/Test ---
if epoch % 5 == 0:
with sorix.no_grad():
model.eval()
total_test_loss = 0.0
correct_test = 0
total_test = 0
for x, y in test_dataloader:
x, y = x.to(device), y.to(device)
logits = model(x)
loss = loss_fn(logits, y)
total_test_loss += loss.item() * len(y)
preds = sorix.argmax(logits, axis=1, keepdims=True)
correct_test += (preds == y).sum().item()
total_test += len(y)
avg_test_loss = total_test_loss / total_test
avg_test_acc = correct_test / total_test
print(f"[{device}] [{epoch:3d}/{epochs:3d}] | Loss: {avg_test_loss:.4f} | Acc Train: {100*avg_train_acc:.2f}% | Acc Test: {100*avg_test_acc:.2f}%")
if avg_test_acc > 0.97:
break
end = datetime.now()
delta = end-start
tiempo = delta.total_seconds()
print(f"Tiempo:{tiempo} segundos = {tiempo/60:.2f} min ")
start = datetime.now()
epochs = 100
for epoch in range(epochs+1):
model.train()
total_train_loss = 0.0
correct_train = 0
total_train = 0
for x, y in train_dataloader:
# Movemos los tensores al dispositivo (GPU/CPU)
x, y = x.to(device), y.to(device)
# Forward
logits = model(x)
loss = loss_fn(logits, y)
# Backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Acumular loss y accuracy
total_train_loss += loss.item() * len(y)
preds = sorix.argmax(logits, axis=1, keepdims=True)
correct_train += (preds == y).sum().item()
total_train += len(y)
avg_train_loss = total_train_loss / total_train
avg_train_acc = correct_train / total_train
# --- Validación/Test ---
if epoch % 5 == 0:
with sorix.no_grad():
model.eval()
total_test_loss = 0.0
correct_test = 0
total_test = 0
for x, y in test_dataloader:
x, y = x.to(device), y.to(device)
logits = model(x)
loss = loss_fn(logits, y)
total_test_loss += loss.item() * len(y)
preds = sorix.argmax(logits, axis=1, keepdims=True)
correct_test += (preds == y).sum().item()
total_test += len(y)
avg_test_loss = total_test_loss / total_test
avg_test_acc = correct_test / total_test
print(f"[{device}] [{epoch:3d}/{epochs:3d}] | Loss: {avg_test_loss:.4f} | Acc Train: {100*avg_train_acc:.2f}% | Acc Test: {100*avg_test_acc:.2f}%")
if avg_test_acc > 0.97:
break
end = datetime.now()
delta = end-start
tiempo = delta.total_seconds()
print(f"Tiempo:{tiempo} segundos = {tiempo/60:.2f} min ")
[cpu] [ 0/100] | Loss: 0.1517 | Acc Train: 90.63% | Acc Test: 95.64%
[cpu] [ 5/100] | Loss: 0.0982 | Acc Train: 98.12% | Acc Test: 97.23% Tiempo:8.815667 segundos = 0.15 min
In [12]:
Copied!
all_preds = np.array([])
all_targets = np.array([])
with sorix.no_grad():
model.eval()
for x, y in test_dataloader:
x, y = x.to(device), y.to(device)
# Predicciones
logits = model(x)
preds = sorix.argmax(logits, axis=1, keepdims=True).cpu()
# Guardar predicciones y targets
all_preds = np.append(all_preds, preds.data)
all_targets = np.append(all_targets, y.cpu().data)
all_preds = np.array([])
all_targets = np.array([])
with sorix.no_grad():
model.eval()
for x, y in test_dataloader:
x, y = x.to(device), y.to(device)
# Predicciones
logits = model(x)
preds = sorix.argmax(logits, axis=1, keepdims=True).cpu()
# Guardar predicciones y targets
all_preds = np.append(all_preds, preds.data)
all_targets = np.append(all_targets, y.cpu().data)
In [13]:
Copied!
print(classification_report(all_targets,all_preds))
print(classification_report(all_targets,all_preds))
precision recall f1-score support 0.0 0.96 0.99 0.98 798 1.0 0.98 0.99 0.99 920 2.0 0.96 0.98 0.97 797 3.0 0.97 0.95 0.96 858 4.0 0.98 0.97 0.97 813 5.0 0.97 0.97 0.97 772 6.0 0.99 0.97 0.98 849 7.0 0.98 0.98 0.98 929 8.0 0.97 0.95 0.96 810 9.0 0.95 0.97 0.96 854 accuracy 0.97 8400 macro avg 0.97 0.97 0.97 8400 weighted avg 0.97 0.97 0.97 8400
In [14]:
Copied!
plt.figure(figsize=(8,5))
sns.heatmap(confusion_matrix(all_targets,all_preds), annot=True, cmap="Blues")
plt.figure(figsize=(8,5))
sns.heatmap(confusion_matrix(all_targets,all_preds), annot=True, cmap="Blues")
Out[14]:
<Axes: >