Add qualitymetrics_based curation

alejoe91 · alejoe91 · commit c797a495e42a · 2026-02-03T10:49:23.000+01:00
diff --git a/examples/how_to/auto_label_units.py b/examples/how_to/auto_label_units.py
@@ -0,0 +1,41 @@
+# ---
+# jupyter:
+#   jupytext:
+#     cell_metadata_filter: -all
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.1
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Automatic labeling units after spike sorting
+#
+# This example shows how to automatically label units after spike sorting, using three different approaches:
+#
+# 1. Simple filter based on quality metrics
+# 2. Bombcell: heuristic approach to label units based on quality and template metrics [Fabre]_
+# 3. UnitRefine: pre-trained classifiers to label units as noise or SUA/MUA [Jain]_
+
+# %%
+import spikeinterface as si
+import spikeinterface.curation as sc
+import spikeinterface.widgets as sw
+
+# %%
+analyzer_path = "/ssd980/working/analyzer_np2_shank1.zarr"
+
+# %%
+analyzer = si.load(analyzer_path)
+
+# %%
+qm = analyzer_zarr.compute("quality_metrics", delete_existing_metrics=True)
+qm.get_data()
+
+# %%
diff --git a/src/spikeinterface/curation/__init__.py b/src/spikeinterface/curation/__init__.py
@@ -22,6 +22,7 @@
 # automated curation
 from .model_based_curation import model_based_label_units, load_model, auto_label_units
 from .train_manual_curation import train_model, get_default_classifier_search_spaces
+from .qualitymetrics_curation import qualitymetrics_label_units
 from .unitrefine_curation import unitrefine_label_units
 from .bombcell_curation import (
     bombcell_get_default_thresholds,
diff --git a/src/spikeinterface/curation/bombcell_curation.py b/src/spikeinterface/curation/bombcell_curation.py
@@ -14,6 +14,8 @@
 import json
 import numpy as np
 
+from .curation_tools import _is_threshold_disabled
+
 NOISE_METRICS = [
     "num_positive_peaks",
     "num_negative_peaks",
@@ -75,15 +77,6 @@ def bombcell_get_default_thresholds() -> dict:
     }
 
 
-def _is_threshold_disabled(value):
-    """Check if a threshold value is disabled (None or np.nan)."""
-    if value is None:
-        return True
-    if isinstance(value, float) and np.isnan(value):
-        return True
-    return False
-
-
 def bombcell_label_units(
     sorting_analyzer=None,
     thresholds: dict | str | Path | None = None,
diff --git a/src/spikeinterface/curation/curation_tools.py b/src/spikeinterface/curation/curation_tools.py
@@ -14,6 +14,15 @@
 _methods_numpy = ("keep_first", "random", "keep_last")
 
 
+def _is_threshold_disabled(value):
+    """Check if a threshold value is disabled (None or np.nan)."""
+    if value is None:
+        return True
+    if isinstance(value, float) and np.isnan(value):
+        return True
+    return False
+
+
 def _find_duplicated_spikes_numpy(
     spike_train: np.ndarray,
     censored_period: int,
diff --git a/src/spikeinterface/curation/qualitymetrics_curation.py b/src/spikeinterface/curation/qualitymetrics_curation.py
@@ -0,0 +1,80 @@
+import json
+from pathlib import Path
+
+import numpy as np
+
+from spikeinterface.core.analyzer_extension_core import SortingAnalyzer
+
+from .curation_tools import _is_threshold_disabled
+
+
+def qualitymetrics_label_units(
+    analyzer: SortingAnalyzer,
+    thresholds: dict | str | Path,
+):
+    """Label units based on quality metrics and thresholds.
+
+    Parameters
+    ----------
+    analyzer : SortingAnalyzer
+        The SortingAnalyzer object containing the quality metrics.
+    thresholds : dict | str | Path
+        A dictionary or JSON file path where keys are metric names and values are threshold values for labeling units.
+        Each key should correspond to a quality metric present in the analyzer's quality metrics DataFrame. Values
+        should contain at least "min" and/or "max" keys to specify threshold ranges.
+        Units that do not meet the threshold for a given metric will be labeled as 'noise', while those that do will
+        be labeled as 'good'.
+
+    Returns
+    -------
+    labels : pd.DataFrame
+        A DataFrame with unit IDs as index and a column 'label' containing the assigned labels ('noise' or 'good').
+    """
+    import pandas as pd
+
+    # Get the quality metrics from the analyzer
+    assert analyzer.has_extension("quality_metrics"), (
+        "The provided analyzer does not have quality metrics computed. "
+        "Please compute quality metrics before labeling units."
+    )
+    qm = analyzer.get_extension("quality_metrics").get_data()
+
+    # Load thresholds from file if a path is provided
+    if isinstance(thresholds, (str, Path)):
+
+        with open(thresholds, "r") as f:
+            thresholds_dict = json.load(f)
+    elif isinstance(thresholds, dict):
+        thresholds_dict = thresholds
+    else:
+        raise ValueError("Thresholds must be a dictionary or a path to a JSON file containing the thresholds.")
+
+    # Check that all specified metrics are present in the quality metrics DataFrame
+    missing_metrics = []
+    for metric in thresholds_dict.keys():
+        if metric not in qm.columns:
+            missing_metrics.append(metric)
+    if len(missing_metrics) > 0:
+        raise ValueError(
+            f"Metric(s) {missing_metrics} specified in thresholds are not present in the quality metrics DataFrame. "
+            f"Available metrics are: {qm.columns.tolist()}"
+        )
+
+    # Initialize an empty DataFrame to store labels
+    labels = pd.DataFrame(index=qm.index, dtype=str)
+    labels["label"] = "noise"  # Default label is 'noise'
+
+    # Apply thresholds to label units
+    good_mask = np.ones(len(qm), dtype=bool)
+
+    for metric_name, threshold in thresholds_dict.items():
+        min_value = threshold.get("min", None)
+        max_value = threshold.get("max", None)
+        if not _is_threshold_disabled(min_value):
+            good_mask &= qm[metric_name] >= min_value
+        if not _is_threshold_disabled(max_value):
+            good_mask &= qm[metric_name] <= max_value
+
+    labels.loc[good_mask, "label"] = "good"
+
+    return labels
diff --git a/src/spikeinterface/curation/tests/test_model_based_curation.py b/src/spikeinterface/curation/tests/test_model_based_curation.py
@@ -5,7 +5,6 @@
 from spikeinterface.curation.model_based_curation import ModelBasedClassification
 from spikeinterface.curation import model_based_label_units, load_model
 from spikeinterface.curation.train_manual_curation import _get_computed_metrics
-from spikeinterface.curation import unitrefine_label_units
 
 
 import numpy as np
@@ -171,83 +170,3 @@ def test_exception_raised_when_metric_params_not_equal(sorting_analyzer_for_cura
     model, model_info = load_model(model_folder=trained_pipeline_path, trusted=["numpy.dtype"])
     model_based_classification = ModelBasedClassification(sorting_analyzer_for_curation, model)
     model_based_classification._check_params_for_classification(enforce_metric_params=True, model_info=model_info)
-
-
-def test_unitrefine_label_units_hf(sorting_analyzer_for_curation):
-    """Test the `unitrefine_label_units` function."""
-    sorting_analyzer_for_curation.compute("template_metrics", include_multi_channel_metrics=True)
-    sorting_analyzer_for_curation.compute("quality_metrics")
-
-    # test passing both classifiers
-    labels = unitrefine_label_units(
-        sorting_analyzer_for_curation,
-        noise_neural_classifier="SpikeInterface/UnitRefine_noise_neural_classifier_lightweight",
-        sua_mua_classifier="SpikeInterface/UnitRefine_sua_mua_classifier_lightweight",
-    )
-
-    assert "label" in labels.columns
-    assert "probability" in labels.columns
-    assert labels.shape[0] == len(sorting_analyzer_for_curation.sorting.unit_ids)
-
-    # test only noise neural classifier
-    labels = unitrefine_label_units(
-        sorting_analyzer_for_curation,
-        noise_neural_classifier="SpikeInterface/UnitRefine_noise_neural_classifier_lightweight",
-        sua_mua_classifier=None,
-    )
-
-    assert "label" in labels.columns
-    assert "probability" in labels.columns
-    assert labels.shape[0] == len(sorting_analyzer_for_curation.sorting.unit_ids)
-
-    # test only sua mua classifier
-    labels = unitrefine_label_units(
-        sorting_analyzer_for_curation,
-        noise_neural_classifier=None,
-        sua_mua_classifier="SpikeInterface/UnitRefine_sua_mua_classifier_lightweight",
-    )
-
-    assert "label" in labels.columns
-    assert "probability" in labels.columns
-    assert labels.shape[0] == len(sorting_analyzer_for_curation.sorting.unit_ids)
-
-    # test passing none
-    with pytest.raises(ValueError):
-        labels = unitrefine_label_units(
-            sorting_analyzer_for_curation,
-            noise_neural_classifier=None,
-            sua_mua_classifier=None,
-        )
-
-    # test warnings when unexpected labels are returned
-    with pytest.warns(UserWarning):
-        labels = unitrefine_label_units(
-            sorting_analyzer_for_curation,
-            noise_neural_classifier="SpikeInterface/UnitRefine_sua_mua_classifier_lightweight",
-            sua_mua_classifier=None,
-        )
-
-    with pytest.warns(UserWarning):
-        labels = unitrefine_label_units(
-            sorting_analyzer_for_curation,
-            noise_neural_classifier=None,
-            sua_mua_classifier="SpikeInterface/UnitRefine_noise_neural_classifier_lightweight",
-        )
-
-
-def test_unitrefine_label_units_with_local_models(sorting_analyzer_for_curation, trained_pipeline_path):
-    # test with trained local models
-    sorting_analyzer_for_curation.compute("template_metrics", include_multi_channel_metrics=True)
-    sorting_analyzer_for_curation.compute("quality_metrics")
-
-    # test passing model folder
-    labels = unitrefine_label_units(
-        sorting_analyzer_for_curation,
-        noise_neural_classifier=trained_pipeline_path,
-    )
-
-    # test passing model folder
-    labels = unitrefine_label_units(
-        sorting_analyzer_for_curation,
-        noise_neural_classifier=trained_pipeline_path / "best_model.skops",
-    )
diff --git a/src/spikeinterface/curation/tests/test_qualitymetrics_curation.py b/src/spikeinterface/curation/tests/test_qualitymetrics_curation.py
@@ -0,0 +1,68 @@
+import pytest
+import json
+
+from spikeinterface.curation.tests.common import sorting_analyzer_for_curation
+from spikeinterface.curation import qualitymetrics_label_units
+
+
+def test_qualitymetrics_label_units(sorting_analyzer_for_curation):
+    """Test the `qualitymetrics_label_units` function."""
+    sorting_analyzer_for_curation.compute("quality_metrics")
+
+    thresholds = {
+        "snr": {"min": 5.0},
+        "firing_rate": {"min": 0.1, "max": 20.0},
+    }
+
+    labels = qualitymetrics_label_units(
+        sorting_analyzer_for_curation,
+        thresholds,
+    )
+
+    assert "label" in labels.columns
+    assert labels.shape[0] == len(sorting_analyzer_for_curation.sorting.unit_ids)
+
+    # Check that units with snr < 5.0 or firing_rate < 0.1 are labeled as 'noise'
+    qm = sorting_analyzer_for_curation.get_extension("quality_metrics").get_data()
+    for unit_id in sorting_analyzer_for_curation.sorting.unit_ids:
+        snr = qm.loc[unit_id, "snr"]
+        firing_rate = qm.loc[unit_id, "firing_rate"]
+        if (
+            snr >= thresholds["snr"]["min"]
+            and thresholds["firing_rate"]["min"] <= firing_rate <= thresholds["firing_rate"]["max"]
+        ):
+            assert labels.loc[unit_id, "label"] == "good"
+        else:
+            assert labels.loc[unit_id, "label"] == "noise"
+
+
+def test_qualitymetrics_label_units_with_file(sorting_analyzer_for_curation, tmp_path):
+    """Test the `qualitymetrics_label_units` function with thresholds from a JSON file."""
+    sorting_analyzer_for_curation.compute("quality_metrics")
+
+    thresholds = {
+        "snr": {"min": 5.0},
+        "firing_rate": {"min": 0.1},
+    }
+
+    thresholds_file = tmp_path / "thresholds.json"
+    with open(thresholds_file, "w") as f:
+        json.dump(thresholds, f)
+
+    labels = qualitymetrics_label_units(
+        sorting_analyzer_for_curation,
+        thresholds_file,
+    )
+
+    assert "label" in labels.columns
+    assert labels.shape[0] == len(sorting_analyzer_for_curation.sorting.unit_ids)
+
+    # Check that units with snr < 5.0 or firing_rate < 0.1 are labeled as 'noise'
+    qm = sorting_analyzer_for_curation.get_extension("quality_metrics").get_data()
+    for unit_id in sorting_analyzer_for_curation.sorting.unit_ids:
+        snr = qm.loc[unit_id, "snr"]
+        firing_rate = qm.loc[unit_id, "firing_rate"]
+        if snr >= thresholds["snr"]["min"] and firing_rate >= thresholds["firing_rate"]["min"]:
+            assert labels.loc[unit_id, "label"] == "good"
+        else:
+            assert labels.loc[unit_id, "label"] == "noise"
diff --git a/src/spikeinterface/curation/tests/test_unitrefine_curation.py b/src/spikeinterface/curation/tests/test_unitrefine_curation.py
diff --git a/src/spikeinterface/metrics/quality/misc_metrics.py b/src/spikeinterface/metrics/quality/misc_metrics.py
diff --git a/src/spikeinterface/metrics/template/metrics.py b/src/spikeinterface/metrics/template/metrics.py