mllam · sudhansu-24 · Apr 4, 2026 · Apr 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,6 @@
 ### Project Specific ###
-wandb
-saved_models
-lightning_logs
+runs/
+lightning_logs/
 data
 graphs
 *.sif

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ This release introduces new features including GIF animation support, wandb run
 
 ### Changed
 
+- Consolidate all training/evaluation run outputs (checkpoints, logger files, plots) into a single `runs/<run-name>/` directory instead of scattering across `saved_models/`, `lightning_logs/`, `wandb/`, and `mlruns/` [\#293](https://github.com/mllam/neural-lam/issues/293) @sudhansu-24
 - Change the default ensemble-loading behavior in `WeatherDataset` / `WeatherDataModule` to use all ensemble members as independent samples for ensemble datastores (with matching ensemble-member selection for forcing when available); single-member behavior now requires explicitly opting in via `--load_single_member` [\#332](https://github.com/mllam/neural-lam/pull/332) @kshirajahere
 
 - Refactor graph loading: move zero-indexing out of the model and update plotting to prepare using the research-branch graph I/O [\#184](https://github.com/mllam/neural-lam/pull/184) @zweihuehner

diff --git a/neural_lam/custom_loggers.py b/neural_lam/custom_loggers.py
@@ -1,4 +1,5 @@
 # Standard library
+import os
 import sys
 
 # Third-party
@@ -15,10 +16,11 @@ class CustomMLFlowLogger(pl.loggers.MLFlowLogger):
     of version `2.0.3` at least.
     """
 
-    def __init__(self, experiment_name, tracking_uri, run_name):
+    def __init__(self, experiment_name, tracking_uri, run_name, save_dir=None):
         super().__init__(
             experiment_name=experiment_name, tracking_uri=tracking_uri
         )
+        self._save_dir = save_dir or "mlruns"
 
         mlflow.start_run(run_id=self.run_id, log_system_metrics=True)
         mlflow.set_tag("mlflow.runName", run_name)
@@ -35,7 +37,7 @@ def save_dir(self):
         str
             Path to the directory where the artifacts are saved.
         """
-        return "mlruns"
+        return self._save_dir
 
     def log_image(self, key, images, step=None):
         """
@@ -57,7 +59,8 @@ def log_image(self, key, images, step=None):
 
         # Need to save the image to a temporary file, then log that file
         # mlflow.log_image, should do this automatically, but is buggy
-        temporary_image = f"{key}.png"
+        os.makedirs(self.save_dir, exist_ok=True)
+        temporary_image = os.path.join(self.save_dir, f"{key}.png")
         images[0].savefig(temporary_image)
 
         img = Image.open(temporary_image)

diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py
@@ -1,5 +1,6 @@
 # Standard library
 import json
+import os
 import random
 import time
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
@@ -338,12 +339,14 @@ def main(input_args=None):
             f"{time.strftime('%m_%d_%H')}-{random_run_id:04d}"
         )
 
+    run_dir = os.path.join("runs", run_name)
+
     training_logger = utils.setup_training_logger(
-        datastore=datastore, args=args, run_name=run_name
+        datastore=datastore, args=args, run_name=run_name, run_dir=run_dir
     )
 
     checkpoint_callback = pl.callbacks.ModelCheckpoint(
-        dirpath=f"saved_models/{run_name}",
+        dirpath=os.path.join(run_dir, "checkpoints"),
         filename="min_val_loss",
         monitor="val_mean_loss",
         mode="min",
@@ -352,6 +355,7 @@ def main(input_args=None):
     trainer = pl.Trainer(
         max_epochs=args.epochs,
         deterministic=True,
+        default_root_dir=run_dir,
         strategy="auto",
         accelerator=device_name,
         num_nodes=args.num_nodes,

diff --git a/neural_lam/utils.py b/neural_lam/utils.py
@@ -476,7 +476,7 @@ def init_training_logger_metrics(training_logger, val_steps):
 
 
 @rank_zero_only
-def setup_training_logger(datastore, args, run_name):
+def setup_training_logger(datastore, args, run_name, run_dir):
     """Set up the training logger (WandB or MLFlow).
 
     Parameters
@@ -520,6 +520,7 @@ def setup_training_logger(datastore, args, run_name):
             config=dict(training=vars(args), datastore=datastore._config),
             resume=wandb_resume,
             id=args.wandb_id,
+            save_dir=run_dir,
         )
     elif args.logger == "mlflow":
         if args.wandb_id is not None:
@@ -536,6 +537,7 @@ def setup_training_logger(datastore, args, run_name):
             experiment_name=args.logger_project,
             tracking_uri=url,
             run_name=run_name,
+            save_dir=run_dir,
         )
         training_logger.log_hyperparams(
             dict(training=vars(args), datastore=datastore._config)

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -83,7 +83,9 @@ def test_wandb_logger_kwargs(
     datastore = MagicMock()
     datastore._config = {}
 
-    setup_training_logger(datastore, args, run_name="my-run")
+    setup_training_logger(
+        datastore, args, run_name="my-run", run_dir="runs/my-run"
+    )
 
     _, kwargs = mock_wandb.call_args
     assert kwargs["resume"] == expected_resume
@@ -111,7 +113,9 @@ def test_wandb_id_ignored_with_mlflow_warns():
         ),
         patch("neural_lam.utils.logger") as mock_log,
     ):
-        setup_training_logger(datastore, args, run_name="my-run")
+        setup_training_logger(
+            datastore, args, run_name="my-run", run_dir="runs/my-run"
+        )
 
     mock_log.warning.assert_called_once()
     warning_msg = mock_log.warning.call_args[0][0]