MNIST with PyTorch
Train a simple CNN on MNIST across multiple learning rates, tracking each as a separate GoodSeed run.
Run it
Section titled “Run it”pip install goodseed torch torchvisionpython examples/mnist.pyThen view results:
goodseed serveFull source
Section titled “Full source”"""Train a simple CNN on MNIST and track the experiment with GoodSeed."""
import argparsefrom pathlib import Path
import goodseedimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom torch.utils.data import DataLoaderfrom torchvision import datasets, transforms
class Net(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, padding=1) self.conv2 = nn.Conv2d(32, 64, 3, padding=1) self.pool = nn.MaxPool2d(2) self.fc1 = nn.Linear(64 * 7 * 7, 128) self.fc2 = nn.Linear(128, 10)
def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(x.size(0), -1) x = F.relu(self.fc1(x)) return self.fc2(x)
def train_epoch(model, loader, optimizer, device): model.train() total_loss = 0.0 correct = 0 total = 0 for images, labels in loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() output = model(images) loss = F.cross_entropy(output, labels) loss.backward() optimizer.step() total_loss += loss.item() * images.size(0) correct += (output.argmax(1) == labels).sum().item() total += images.size(0) return total_loss / total, correct / total
@torch.no_grad()def evaluate(model, loader, device): model.eval() total_loss = 0.0 correct = 0 total = 0 for images, labels in loader: images, labels = images.to(device), labels.to(device) output = model(images) total_loss += F.cross_entropy(output, labels).item() * images.size(0) correct += (output.argmax(1) == labels).sum().item() total += images.size(0) return total_loss / total, correct / total
LEARNING_RATES = [1e-2, 1e-3, 1e-4]
def run_experiment(lr, epochs, batch_size, device, train_loader, val_loader): model = Net().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr)
with goodseed.Run(name=f"mnist-lr{lr}", tags=["mnist", "pytorch"]) as run: run["learning_rate"] = lr run["epochs"] = epochs run["batch_size"] = batch_size run["model"] = "CNN (2 conv + 2 fc)" run["device"] = str(device)
for epoch in range(epochs): train_loss, train_acc = train_epoch(model, train_loader, optimizer, device) val_loss, val_acc = evaluate(model, val_loader, device)
run["train/loss"].log(train_loss, step=epoch) run["train/accuracy"].log(train_acc, step=epoch) run["val/loss"].log(val_loss, step=epoch) run["val/accuracy"].log(val_acc, step=epoch)
print( f" Epoch {epoch + 1}/{epochs} " f"train_loss={train_loss:.4f} train_acc={train_acc:.4f} " f"val_loss={val_loss:.4f} val_acc={val_acc:.4f}" )
run["final/val_accuracy"] = val_acc
def main(): parser = argparse.ArgumentParser(description="MNIST + GoodSeed example") parser.add_argument("--epochs", type=int, default=5) parser.add_argument("--batch-size", type=int, default=64) args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), ]) data_dir = Path(__file__).parent / "data" train_data = datasets.MNIST(data_dir, train=True, download=True, transform=transform) val_data = datasets.MNIST(data_dir, train=False, download=True, transform=transform) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_data, batch_size=1000)
for lr in LEARNING_RATES: print(f"\n=== Run: lr={lr} ===") run_experiment(lr, args.epochs, args.batch_size, device, train_loader, val_loader)
if __name__ == "__main__": main()Walkthrough
Section titled “Walkthrough”The script loops over three learning rates, creating a separate GoodSeed run for each:
LEARNING_RATES = [1e-2, 1e-3, 1e-4]
for lr in LEARNING_RATES: run_experiment(lr, args.epochs, args.batch_size, device, train_loader, val_loader)Each run is opened as a context manager with a descriptive name and tags:
with goodseed.Run(name=f"mnist-lr{lr}", tags=["mnist", "pytorch"]) as run:Hyperparameters are logged as individual config keys:
run["learning_rate"] = lrrun["epochs"] = epochsrun["batch_size"] = batch_sizerun["model"] = "CNN (2 conv + 2 fc)"run["device"] = str(device)Inside the training loop, metrics are logged per epoch using .log() with step=epoch:
run["train/loss"].log(train_loss, step=epoch)run["train/accuracy"].log(train_acc, step=epoch)run["val/loss"].log(val_loss, step=epoch)run["val/accuracy"].log(val_acc, step=epoch)After training, a final scalar is stored as a config:
run["final/val_accuracy"] = val_accWhat gets tracked
Section titled “What gets tracked”| Category | Fields |
|---|---|
| Configs | learning_rate, epochs, batch_size, model, device |
| Metrics | train/loss, train/accuracy, val/loss, val/accuracy (per epoch) |
| Final | final/val_accuracy |
| Auto | CPU/memory usage, stdout, git state (via GoodSeed monitoring) |
Customize the run with --epochs and --batch-size:
python examples/mnist.py --epochs 10 --batch-size 128