Skip to content

MNIST with PyTorch

Train a simple CNN on MNIST across multiple learning rates, tracking each as a separate GoodSeed run.

Terminal window
pip install goodseed torch torchvision
python examples/mnist.py

Then view results:

Terminal window
goodseed serve
"""Train a simple CNN on MNIST and track the experiment with GoodSeed."""
import argparse
from pathlib import Path
import goodseed
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
return self.fc2(x)
def train_epoch(model, loader, optimizer, device):
model.train()
total_loss = 0.0
correct = 0
total = 0
for images, labels in loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
output = model(images)
loss = F.cross_entropy(output, labels)
loss.backward()
optimizer.step()
total_loss += loss.item() * images.size(0)
correct += (output.argmax(1) == labels).sum().item()
total += images.size(0)
return total_loss / total, correct / total
@torch.no_grad()
def evaluate(model, loader, device):
model.eval()
total_loss = 0.0
correct = 0
total = 0
for images, labels in loader:
images, labels = images.to(device), labels.to(device)
output = model(images)
total_loss += F.cross_entropy(output, labels).item() * images.size(0)
correct += (output.argmax(1) == labels).sum().item()
total += images.size(0)
return total_loss / total, correct / total
LEARNING_RATES = [1e-2, 1e-3, 1e-4]
def run_experiment(lr, epochs, batch_size, device, train_loader, val_loader):
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
with goodseed.Run(name=f"mnist-lr{lr}", tags=["mnist", "pytorch"]) as run:
run["learning_rate"] = lr
run["epochs"] = epochs
run["batch_size"] = batch_size
run["model"] = "CNN (2 conv + 2 fc)"
run["device"] = str(device)
for epoch in range(epochs):
train_loss, train_acc = train_epoch(model, train_loader, optimizer, device)
val_loss, val_acc = evaluate(model, val_loader, device)
run["train/loss"].log(train_loss, step=epoch)
run["train/accuracy"].log(train_acc, step=epoch)
run["val/loss"].log(val_loss, step=epoch)
run["val/accuracy"].log(val_acc, step=epoch)
print(
f" Epoch {epoch + 1}/{epochs} "
f"train_loss={train_loss:.4f} train_acc={train_acc:.4f} "
f"val_loss={val_loss:.4f} val_acc={val_acc:.4f}"
)
run["final/val_accuracy"] = val_acc
def main():
parser = argparse.ArgumentParser(description="MNIST + GoodSeed example")
parser.add_argument("--epochs", type=int, default=5)
parser.add_argument("--batch-size", type=int, default=64)
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
])
data_dir = Path(__file__).parent / "data"
train_data = datasets.MNIST(data_dir, train=True, download=True, transform=transform)
val_data = datasets.MNIST(data_dir, train=False, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=1000)
for lr in LEARNING_RATES:
print(f"\n=== Run: lr={lr} ===")
run_experiment(lr, args.epochs, args.batch_size, device, train_loader, val_loader)
if __name__ == "__main__":
main()

The script loops over three learning rates, creating a separate GoodSeed run for each:

LEARNING_RATES = [1e-2, 1e-3, 1e-4]
for lr in LEARNING_RATES:
run_experiment(lr, args.epochs, args.batch_size, device, train_loader, val_loader)

Each run is opened as a context manager with a descriptive name and tags:

with goodseed.Run(name=f"mnist-lr{lr}", tags=["mnist", "pytorch"]) as run:

Hyperparameters are logged as individual config keys:

run["learning_rate"] = lr
run["epochs"] = epochs
run["batch_size"] = batch_size
run["model"] = "CNN (2 conv + 2 fc)"
run["device"] = str(device)

Inside the training loop, metrics are logged per epoch using .log() with step=epoch:

run["train/loss"].log(train_loss, step=epoch)
run["train/accuracy"].log(train_acc, step=epoch)
run["val/loss"].log(val_loss, step=epoch)
run["val/accuracy"].log(val_acc, step=epoch)

After training, a final scalar is stored as a config:

run["final/val_accuracy"] = val_acc
CategoryFields
Configslearning_rate, epochs, batch_size, model, device
Metricstrain/loss, train/accuracy, val/loss, val/accuracy (per epoch)
Finalfinal/val_accuracy
AutoCPU/memory usage, stdout, git state (via GoodSeed monitoring)

Customize the run with --epochs and --batch-size:

Terminal window
python examples/mnist.py --epochs 10 --batch-size 128