Skip to content

Text Classification with Hugging Face

Fine-tune DistilBERT on IMDB sentiment classification and track training with GoodSeed via a Trainer callback.

Terminal window
pip install goodseed transformers datasets torch
python examples/hf_text_classification.py

Then view results:

Terminal window
goodseed serve
"""Fine-tune DistilBERT on IMDB sentiment and track with GoodSeed."""
import argparse
from pathlib import Path
import goodseed
from datasets import load_dataset
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
Trainer,
TrainerCallback,
TrainingArguments,
)
class GoodSeedCallback(TrainerCallback):
"""Log training metrics to a GoodSeed run."""
def __init__(self, run):
self.run = run
def on_log(self, args, state, control, logs=None, **kwargs):
if logs is None:
return
step = state.global_step
for key, value in logs.items():
if isinstance(value, (int, float)):
self.run[key].log(value, step=step)
def main():
parser = argparse.ArgumentParser(description="HF text classification + GoodSeed")
parser.add_argument("--epochs", type=int, default=3)
parser.add_argument("--batch-size", type=int, default=16)
parser.add_argument("--lr", type=float, default=2e-5)
parser.add_argument("--max-samples", type=int, default=2000,
help="Subset size for fast iteration (0 = full dataset)")
args = parser.parse_args()
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
dataset = load_dataset("imdb")
if args.max_samples > 0:
dataset["train"] = dataset["train"].shuffle(seed=42).select(range(args.max_samples))
dataset["test"] = dataset["test"].shuffle(seed=42).select(range(args.max_samples))
def tokenize(batch):
return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)
dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
output_dir = Path(__file__).parent / "hf_output"
training_args = TrainingArguments(
output_dir=str(output_dir),
num_train_epochs=args.epochs,
per_device_train_batch_size=args.batch_size,
per_device_eval_batch_size=args.batch_size,
learning_rate=args.lr,
eval_strategy="steps",
eval_steps=20,
logging_steps=1,
save_strategy="no",
report_to="none",
)
with goodseed.Run(name=f"imdb-{model_name}", tags=["hf", "imdb", "distilbert"]) as run:
run["parameters"] = args
run["model"] = model_name
run["dataset"] = "imdb"
run["device"] = str(training_args.device)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
callbacks=[GoodSeedCallback(run)],
)
trainer.train()
eval_result = trainer.evaluate()
run["eval/loss"] = eval_result["eval_loss"]
print(f"\nEval loss: {eval_result['eval_loss']:.4f}")
if __name__ == "__main__":
main()

The key pattern here is a TrainerCallback that bridges Hugging Face’s Trainer logging to GoodSeed:

class GoodSeedCallback(TrainerCallback):
"""Log training metrics to a GoodSeed run."""
def __init__(self, run):
self.run = run
def on_log(self, args, state, control, logs=None, **kwargs):
if logs is None:
return
step = state.global_step
for key, value in logs.items():
if isinstance(value, (int, float)):
self.run[key].log(value, step=step)

The on_log hook fires whenever the Trainer logs metrics. It iterates the logs dict and forwards every numeric value to GoodSeed with the current global step.

The run is opened as a context manager, and configs are logged using argparse namespace flattening:

with goodseed.Run(name=f"imdb-{model_name}", tags=["hf", "imdb", "distilbert"]) as run:
run["parameters"] = args
run["model"] = model_name
run["dataset"] = "imdb"
run["device"] = str(training_args.device)

Passing args (an argparse.Namespace) to run["parameters"] automatically flattens it into parameters/epochs, parameters/batch_size, parameters/lr, and parameters/max_samples.

The callback is passed to the Trainer:

trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
callbacks=[GoodSeedCallback(run)],
)

After training, a final eval metric is stored as a config:

eval_result = trainer.evaluate()
run["eval/loss"] = eval_result["eval_loss"]
CategoryFields
Configsparameters/epochs, parameters/batch_size, parameters/lr, parameters/max_samples, model, dataset, device
Metricsloss, learning_rate, grad_norm, eval_loss, eval_runtime, etc. (all Trainer metrics, per step)
Finaleval/loss
AutoCPU/memory usage, stdout, git state (via GoodSeed monitoring)

Customize the run:

Terminal window
python examples/hf_text_classification.py --epochs 5 --lr 5e-5 --batch-size 32 --max-samples 5000