Merge pull request #92 from diningphil/utilities_qualitative_results

diningphil · web-flow · commit aa5799dad977 · 2024-04-23T18:06:08.000+02:00
Added utilities to quickly play and run analyses on automatically trained models
diff --git a/.github/workflows/interrogate-docstring.yaml b/.github/workflows/interrogate-docstring.yaml
@@ -12,10 +12,10 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python 3.11
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4
       with:
         python-version: 3.11
 
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -23,9 +23,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4
       with:
         python-version: '3.11'
     - name: Install dependencies
diff --git a/.github/workflows/python-test-and-coverage.yml b/.github/workflows/python-test-and-coverage.yml
@@ -20,10 +20,10 @@ jobs:
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## [1.5.4] New post-processing tutorial
+
+### Added
+
+- TODO
+
 ## [1.5.3] Minor fix
 
 ### Fixed
diff --git a/THANKS.md b/THANKS.md
@@ -13,3 +13,5 @@ Many thanks to **Alessio Gravina** ([Github](https://github.com/gravins)
 /[Homepage](http://pages.di.unipi.it/gravina/)) for his invaluable help on the temporal graph implementation.
 
 Many thanks to **Francesco Landolfi** ([Github](https://github.com/flandolfi)) for the suggestions on how to make this library more user-friendly!
+
+Many thanks to **Henrik Christiansen** ([Github](https://github.com/christiansenh)) for his help with the post-processing of results!
diff --git a/docs/conf.py b/docs/conf.py
@@ -24,7 +24,7 @@
 author = "Federico Errica"
 
 # The full version, including alpha/beta/rc tags
-release = "1.5.3"
+release = "1.5.4"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -448,8 +448,8 @@ And we get:
    :width: 600
 
 
-Filtering Configurations for Successive Analyses
------------------------
+Filtering Configurations for Post-processing of Results
+----------------------------------------------------------
 
 You can use some utilities we provide to focus on a specific set of configurations after your experiments are terminated.
 Assuming you run `pydgn-train --config-file examples/MODEL_CONFIGS/config_SupToyDGN.yml` inside the PyDGN repo, you can
@@ -460,10 +460,49 @@ then do something like
     from pydgn.evaluation.util import retrieve_experiments, filter_experiments
 
     configs = retrieve_experiments('RESULTS/supervised_grid_search_toy_NCI1/MODEL_ASSESSMENT/OUTER_FOLD_1/MODEL_SELECTION/')
-    print(len(configs))  # will return 16
+    print(len(configs))  # will return 32
 
     filtered_configs = filter_experiments(configs, logic='OR', parameters={'Multiclass Classification': 1, 'lr': 0.001})
-    print(len(filtered_configs))  # will return 12
+    print(len(filtered_configs))  # will return 24
+
+
+
+Loading Model for Inspection in a Notebook
+----------------------------------------------
+
+We provide utilities to use your model immediately after experiments end to run additional analyses. Here's how:
+
+.. code-block:: python3
+
+    from pydgn.evaluation.util import *
+
+    config = retrieve_best_configuration('RESULTS/supervised_grid_search_toy_NCI1/MODEL_ASSESSMENT/OUTER_FOLD_1/MODEL_SELECTION/')
+    splits_filepath = 'examples/DATA_SPLITS/CHEMICAL/NCI1/NCI1_outer10_inner1.splits'
+    device = 'cpu'
+
+    # instantiate dataset
+    dataset = instantiate_dataset_from_config(config)
+
+    # instantiate model
+    model = instantiate_model_from_config(config, dataset, config_type="supervised_config")
+
+    # load model's checkpoint, assuming the best configuration has been loaded
+    checkpoint_location = 'RESULTS/supervised_grid_search_toy_NCI1/MODEL_ASSESSMENT/OUTER_FOLD_1/final_run1/best_checkpoint.pth'
+    load_checkpoint(checkpoint_location, model, device=device)
+
+    # you can now call the forward method of your model
+    y, embeddings = model(dataset[0])
+
+    # ------------------------------------------------------------------ #
+    # OPTIONAL: you can also instantiate a DataProvider to load TR/VL/TE splits specific to each fold
+    data_provider = instantiate_data_provider_from_config(config, splits_filepath)
+    # select outer fold 1 (indices start from 0)
+    data_provider.set_outer_k(0)
+    # select inner fold 1 (indices start from 0)
+    data_provider.set_inner_k(0)
+
+    # Please refer to the DataProvider documentation to use it properly.
+    # ------------------------------------------------------------------ #
 
 
 Telegram Bot
diff --git a/examples/Result Analysis.ipynb b/examples/Result Analysis.ipynb
@@ -0,0 +1,37 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    ""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pydgn/evaluation/util.py b/pydgn/evaluation/util.py
@@ -5,9 +5,12 @@
 import random
 from typing import Tuple, Callable, List
 
+import torch
 import tqdm
 
+from pydgn.data.dataset import DatasetInterface
 from pydgn.experiment.util import s2c
+from pydgn.model.interface import ModelInterface
 from pydgn.static import *
 
 
@@ -349,6 +352,9 @@ def retrieve_experiments(model_selection_folder) -> List[dict]:
     """
     config_directory = os.path.join(model_selection_folder)
 
+    if not os.path.exists(config_directory):
+        raise FileNotFoundError(f"Directory not found: {config_directory}")
+
     folder_names = []
     for _, dirs, _ in os.walk(config_directory):
         for d in dirs:
@@ -448,3 +454,81 @@ def _finditem(obj, key):
             filtered_config_list.append(config)
 
     return filtered_config_list
+
+
+def retrieve_best_configuration(model_selection_folder) -> dict:
+    """
+    Once the experiments are done, retrieves the winning configuration from
+     a specific model selection folder, and returns it as a dictionaries
+
+    :param model_selection_folder: path to the folder of a model selection,
+        that is, your_results_path/..../MODEL_SELECTION/
+    :return: a dictionary with info about the best configuration
+    """
+    config_directory = os.path.join(model_selection_folder)
+
+    if not os.path.exists(config_directory):
+        raise FileNotFoundError(f"Directory not found: {config_directory}")
+
+    best_config = json.load(
+        open(os.path.join(config_directory, "winner_config.json"), "rb")
+    )
+    return best_config
+
+
+def instantiate_dataset_from_config(config: dict) -> DatasetInterface:
+    """
+    Instantiate a dataset from a configuration file.
+    :param config (dict): the configuration file
+    :return: an instance of DatasetInterface, i.e., the dataset
+    """
+    data_root = config[CONFIG][DATA_ROOT]
+    dataset_name = config[CONFIG][DATASET]
+    dataset_class = s2c(config[CONFIG][DATASET_CLASS])
+
+    return dataset_class(data_root, dataset_name)
+
+
+def instantiate_model_from_config(config: dict,
+                                  dataset: DatasetInterface,
+                                  config_type: str = "supervised_config") -> ModelInterface:
+    """
+    Instantiate a model from a configuration file.
+    :param config (dict): the configuration file
+    :param dataset (DatasetInterface): the dataset used in the experiment
+    :param config_type (str): the type of model in ["supervised_config", "unsupervised_config"],
+        as written on the YAML experiment configuration file. Defaults to "supervised_config"
+    :return: an instance of ModelInterface, i.e., the model
+    """
+    config_ = config[CONFIG][config_type]
+    readout_class = s2c(config_["readout"])
+
+    model_class = s2c(config_[MODEL])
+    model = model_class(dataset.dim_node_features,
+                        dataset.dim_edge_features,
+                        dataset.dim_target,
+                        readout_class,
+                        config=config_)
+
+    return model
+
+
+def load_checkpoint(checkpoint_path: str, model: ModelInterface,
+                    device: torch.device):
+    """
+    Load a checkpoint from a checkpoint file into a model.
+    :param checkpoint_path: the checkpoint file path
+    :param model (ModelInterface): the model
+    :param device (torch.device): the device, e.g, "cpu" or "cuda"
+    """
+    ckpt_dict = torch.load(
+        checkpoint_path, map_location="cpu" if device == "cpu" else None
+    )
+    model_state = ckpt_dict[MODEL_STATE]
+
+    # Needed only when moving from cpu to cuda (due to changes in config
+    # file). Move all parameters to cuda.
+    for param in model_state.keys():
+        model_state[param] = model_state[param].to(device)
+
+    model.load_state_dict(model_state)
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,9 +2,12 @@
 requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools.packages.find]
+where = ["pydgn"]
+
 [project]
 name = "pydgn"
-version = "1.5.3"
+version = "1.5.4"
 description = "A Python Package for Deep Graph Networks"
 authors = [ { name="Federico Errica", email="f.errica@protonmail.com" } ]
 readme = "README.md"