diff --git a/AGENTS.md b/AGENTS.md index d753914bc..1a01b3993 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -124,8 +124,7 @@ Config class in `workflow/config.py` handles merging and validation. Core scientific stack from AlphaPept ecosystem (https://github.com/MannLabs): - **alphabase** - Base spectral library classes (`SpecLibBase`, `SpecLibFlat`) -- **alpharaw** - Raw file reading (Thermo, Sciex) -- **alphatims** - Bruker TimsTOF support +- **alpharaw** - Raw file reading - **alphapeptdeep** - Deep learning models for property prediction - **directlfq** - Label-free quantification diff --git a/alphadia/fdr/fdr.py b/alphadia/fdr/fdr.py index 769a4209d..ab3e5b872 100644 --- a/alphadia/fdr/fdr.py +++ b/alphadia/fdr/fdr.py @@ -66,7 +66,7 @@ def perform_fdr( # noqa: C901, PLR0913 # too complex, too many arguments The fragment dataframe. dia_cycle : np.ndarray, default=None - The DIA cycle as provided by alphatims. Required if df_fragments is provided. + The DIA cycle. Required if df_fragments is provided. fdr_heuristic : float, default=0.1 The FDR heuristic to use for the initial selection of PSMs before fragment competition diff --git a/alphadia/fragcomp/fragcomp.py b/alphadia/fragcomp/fragcomp.py index 1863c3736..dba0af28f 100644 --- a/alphadia/fragcomp/fragcomp.py +++ b/alphadia/fragcomp/fragcomp.py @@ -6,7 +6,8 @@ import numba as nb import numpy as np import pandas as pd -from alphatims import utils as timsutils +from alpharaw.utils.pjit import pjit +from alpharaw.utils.pjit import set_threads as set_pjit_threads from pandas.errors import SettingWithCopyWarning from alphadia.constants.keys import CalibCols @@ -48,7 +49,7 @@ def _get_fragment_overlap( return np.sum(ppm_delta_mz < mass_tol_ppm) -@timsutils.pjit(cache=USE_NUMBA_CACHING) +@pjit(cache=USE_NUMBA_CACHING) def _compete_for_fragments( # noqa: PLR0913 # Too many arguments thread_idx: int, # pjit decorator changes the passed argument from an iterable to single index precursor_start_idxs: np.ndarray, @@ -242,7 +243,7 @@ def __call__( The fragment dataframe. cycle: np.ndarray - DIA cycle as provided by alphatims. + DIA cycle. Returns ------- @@ -272,7 +273,7 @@ def __call__( valid = np.ones(len(psm_df)).astype(bool) # psm_df["valid"] = True - timsutils.set_threads(self.thread_count) + set_pjit_threads(self.thread_count) thread_plan_df = self._get_thread_plan_df(psm_df) _compete_for_fragments( diff --git a/alphadia/raw_data/bruker.py b/alphadia/raw_data/bruker.py index bff0e18ef..eb026a7e5 100644 --- a/alphadia/raw_data/bruker.py +++ b/alphadia/raw_data/bruker.py @@ -1,12 +1,11 @@ """Module providing methods to read and process raw data in the following formats: Bruker.""" import logging -import os -import alphatims.bruker -import alphatims.utils import numba as nb import numpy as np +from alpharaw.bruker.timstof import TimsTOFBase +from alpharaw.utils.pjit import pjit, set_threads from alphadia.exceptions import NotValidDiaDataError from alphadia.search.jitclasses.bruker_jit import TimsTOFTransposeJIT @@ -15,78 +14,35 @@ logger = logging.getLogger() -class TimsTOFTranspose(alphatims.bruker.TimsTOF): +class TimsTOFTranspose(TimsTOFBase): """Transposed TimsTOF data structure.""" def __init__( self, bruker_d_folder_name: str, - *, - mz_estimation_from_frame: int = 1, - mobility_estimation_from_frame: int = 1, - slice_as_dataframe: bool = True, - use_calibrated_mz_values_as_default: int = 0, - use_hdf_if_available: bool = False, - drop_polarity: bool = True, - convert_polarity_to_int: bool = True, ): - self.has_mobility = True - self.has_ms1 = True + super().__init__( + bruker_d_folder_name, + mz_estimation_from_frame=1, + mobility_estimation_from_frame=1, + drop_polarity=True, + convert_polarity_to_int=True, + ) - bruker_d_folder_name = bruker_d_folder_name.removesuffix("/") - logger.info(f"Importing data from {bruker_d_folder_name}") - if bruker_d_folder_name.endswith(".d"): - bruker_hdf_file_name = f"{bruker_d_folder_name[:-2]}.hdf" - hdf_file_exists = os.path.exists(bruker_hdf_file_name) - if use_hdf_if_available and hdf_file_exists: - self._import_data_from_hdf_file( - bruker_hdf_file_name, - ) - self.bruker_hdf_file_name = bruker_hdf_file_name - else: - self.bruker_d_folder_name = os.path.abspath(bruker_d_folder_name) - self._import_data_from_d_folder( - bruker_d_folder_name, - mz_estimation_from_frame, - mobility_estimation_from_frame, - drop_polarity, - convert_polarity_to_int, - mmap_detector_events=False, + try: + cycle_shape = self._cycle.shape[0] + except AttributeError as e: + raise NotValidDiaDataError("Could not find cycle shape attribute.") from e + else: + if cycle_shape != 1: + raise NotValidDiaDataError( + f"Unexpected cycle shape: {cycle_shape} (expected: 1)." ) - try: - cycle_shape = self._cycle.shape[0] - except AttributeError as e: - raise NotValidDiaDataError( - "Could not find cycle shape attribute." - ) from e - else: - if cycle_shape != 1: - raise NotValidDiaDataError( - f"Unexpected cycle shape: {cycle_shape} (expected: 1)." - ) - - self.transpose() - - elif bruker_d_folder_name.endswith(".hdf"): - self._import_data_from_hdf_file( - bruker_d_folder_name, - ) - self.bruker_hdf_file_name = bruker_d_folder_name - else: - raise NotImplementedError("ERROR: file extension not understood") - - if not hasattr(self, "version"): - self._version = "N.A." - if self.version != alphatims.__version__: - logger.info( - "WARNING: " - f"AlphaTims version {self.version} was used to initialize " - f"{bruker_d_folder_name}, while the current version of " - f"AlphaTims is {alphatims.__version__}." - ) - self.slice_as_dataframe = slice_as_dataframe - self.use_calibrated_mz_values_as_default(use_calibrated_mz_values_as_default) + self.has_mobility = True + self.has_ms1 = True + + self.transpose() # Precompile logger.info(f"Successfully imported data from {bruker_d_folder_name}") @@ -152,7 +108,7 @@ def to_jitclass(self) -> TimsTOFTransposeJIT: ) -@alphatims.utils.pjit(cache=USE_NUMBA_CACHING) +@pjit(cache=USE_NUMBA_CACHING) def _transpose_chunk( chunk_idx, # pjit decorator changes the passed argument from an iterable to single index chunks, @@ -200,7 +156,7 @@ def _build_chunks(number_of_elements, num_chunks): @nb.njit(cache=USE_NUMBA_CACHING) def _transpose(tof_indices, push_indptr, n_tof_indices, values): - """The default alphatims data format consists of a sparse matrix where pushes are the rows, tof indices (discrete mz values) the columns and intensities the values. + """The default alpharaw (formerly: alphatims) data format consists of a sparse matrix where pushes are the rows, tof indices (discrete mz values) the columns and intensities the values. A lookup starts with a given push index p which points to the row. The start and stop indices of the row are accessed from dia_data.push_indptr[p] and dia_data.push_indptr[p+1]. The tof indices are then accessed from dia_data.tof_indices[start:stop] and the corresponding intensities from dia_data.intensity_values[start:stop]. @@ -257,7 +213,7 @@ def _transpose(tof_indices, push_indptr, n_tof_indices, values): chunks = _build_chunks(n_tof_indices, 20) with nb.objmode: - alphatims.utils.set_threads(20) + set_threads(20) # TODO: this should set from config! _transpose_chunk( range(len(chunks) - 1), # type: ignore # noqa: PGH003 # function is wrapped by pjit -> will be turned into single index and passed to the method diff --git a/alphadia/reporting/logging.py b/alphadia/reporting/logging.py index b78d06345..c4cc61bf2 100644 --- a/alphadia/reporting/logging.py +++ b/alphadia/reporting/logging.py @@ -8,7 +8,6 @@ import alphabase import alpharaw -import alphatims import directlfq import peptdeep @@ -60,7 +59,6 @@ def print_environment() -> None: logger.info("================ AlphaX Environment ===============") logger.info(f"{'alphabase':<15} : {alphabase.__version__}") logger.info(f"{'alpharaw':<15} : {alpharaw.__version__}") - logger.info(f"{'alphatims':<15} : {alphatims.__version__:}") logger.info(f"{'alphapeptdeep':<15} : {peptdeep.__version__}") logger.info(f"{'directlfq':<15} : {directlfq.__version__}") logger.info("===================================================") diff --git a/alphadia/search/scoring/containers/score_group.py b/alphadia/search/scoring/containers/score_group.py index f20faaced..7afe428df 100644 --- a/alphadia/search/scoring/containers/score_group.py +++ b/alphadia/search/scoring/containers/score_group.py @@ -418,4 +418,4 @@ def collect_fragments(self): # TODO: why is this necessary? -ScoreGroupContainer.__module__ = "alphatims.extraction.plexscoring" +# ScoreGroupContainer.__module__ = "alphatims.extraction.plexscoring" diff --git a/alphadia/search/scoring/scoring.py b/alphadia/search/scoring/scoring.py index 095e99a0a..f2328bf03 100644 --- a/alphadia/search/scoring/scoring.py +++ b/alphadia/search/scoring/scoring.py @@ -2,9 +2,10 @@ import logging -import alphatims.utils import numpy as np import pandas as pd +from alpharaw.utils.pjit import pjit +from alpharaw.utils.pjit import set_threads as set_pjit_threads from alphadia.constants.keys import CalibCols from alphadia.raw_data import DiaData @@ -111,7 +112,7 @@ def _get_isotope_column_names(colnames): return [f"i_{i}" for i in get_isotope_columns(colnames)] -@alphatims.utils.pjit(cache=USE_NUMBA_CACHING) +@pjit(cache=USE_NUMBA_CACHING) def _process_score_groups( i, # pjit decorator changes the passed argument from an iterable to single index sg_container: ScoreGroupContainer, @@ -124,7 +125,7 @@ def _process_score_groups( ): """ Helper function. - Is decorated with alphatims.utils.pjit to enable parallel execution of HybridElutionGroup.process. + Is decorated with `pjit` to enable parallel execution of HybridElutionGroup.process. """ sg_container[i].process( @@ -630,7 +631,7 @@ def __call__( thread_count = 1 iterator_len = min(10, iterator_len) - alphatims.utils.set_threads(thread_count) + set_pjit_threads(thread_count) _process_score_groups( range(iterator_len), # type: ignore # noqa: PGH003 # function is wrapped by pjit -> will be turned into single index and passed to the method score_group_container, diff --git a/alphadia/search/selection/selection.py b/alphadia/search/selection/selection.py index fe01717fd..2d516e54f 100644 --- a/alphadia/search/selection/selection.py +++ b/alphadia/search/selection/selection.py @@ -2,10 +2,11 @@ import logging -import alphatims.utils import numba as nb import numpy as np import pandas as pd +from alpharaw.utils.pjit import pjit +from alpharaw.utils.pjit import set_threads as set_pjit_threads from alphadia import utils from alphadia.constants.keys import CalibCols @@ -75,7 +76,7 @@ def _is_valid( return True -@alphatims.utils.pjit(cache=USE_NUMBA_CACHING) +@pjit(cache=USE_NUMBA_CACHING) def _select_candidates_pjit( i: int, # pjit decorator changes the passed argument from an iterable to single index jit_data: DiaDataJIT, @@ -626,7 +627,7 @@ def __call__(self, thread_count: int = 10, debug: bool = False) -> pd.DataFrame: Elution groups are stored in the ElutionGroupContainer Numba JIT object. 2. Then, the elution groups are iterated over and the candidates are selected. - The candidate selection is performed in parallel using the alphatims.utils.pjit function. + The candidate selection is performed in parallel using the `pjit` decorator. 3. Finally, the candidates are collected from the ElutionGroup, assembled into a pd.DataFrame and precursor information is appended. @@ -653,7 +654,7 @@ def __call__(self, thread_count: int = 10, debug: bool = False) -> pd.DataFrame: iterator_len = min(10, len(self.precursors_flat)) thread_count = 1 - alphatims.utils.set_threads(thread_count) + set_pjit_threads(thread_count) _select_candidates_pjit( range(iterator_len), # type: ignore # noqa: PGH003 # function is wrapped by pjit -> will be turned into single index and passed to the method diff --git a/alphadia/workflow/base.py b/alphadia/workflow/base.py index 6b58c03aa..3e1d28881 100644 --- a/alphadia/workflow/base.py +++ b/alphadia/workflow/base.py @@ -3,12 +3,12 @@ import time from alphabase.spectral_library.flat import SpecLibFlat -from alphatims.bruker import TimsTOF from alphadia.constants.keys import ConfigKeys from alphadia.constants.settings import FIGURES_FOLDER_NAME from alphadia.exceptions import GenericUserError from alphadia.raw_data import DiaData +from alphadia.raw_data.bruker import TimsTOFTranspose from alphadia.reporting import reporting from alphadia.workflow.config import Config from alphadia.workflow.managers.calibration_manager import CalibrationManager @@ -119,7 +119,7 @@ def load( if self._config["search"]["extraction_backend"] == "rust": time_start = time.time() - if isinstance(self._dia_data, TimsTOF): + if isinstance(self._dia_data, TimsTOFTranspose): raise GenericUserError( "NOT_SUPPORTED_BY_NG", "Rust backend does not support TimsTOF data yet. Please use extraction_backend='python'.", diff --git a/docs/index.md b/docs/index.md index 055705473..ec3e63a13 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ ![Coverage](https://github.com/MannLabs/alphadia/raw/main/coverage.svg) ![Github](https://img.shields.io/github/stars/mannlabs/alphadia?style=social) -Open-source DIA search engine built with the alphaX ecosystem. Built with [alpharaw](https://github.com/MannLabs/alpharaw) and [alphatims](https://github.com/MannLabs/alphatims) for raw file acces. Spectral libraries are predicted with [peptdeep](https://github.com/MannLabs/alphapeptdeep) and managed by [alphabase](https://github.com/MannLabs/alphabase). Quantification is powered by [directLFQ](https://github.com/MannLabs/directLFQ). +Open-source DIA search engine built with the alphaX ecosystem. Built with [alpharaw](https://github.com/MannLabs/alpharaw) for raw file acces. Spectral libraries are predicted with [peptdeep](https://github.com/MannLabs/alphapeptdeep) and managed by [alphabase](https://github.com/MannLabs/alphabase). Quantification is powered by [directLFQ](https://github.com/MannLabs/directLFQ). **Features** - Empirical library and fully predicted library search diff --git a/release/pyinstaller/alphadia.spec b/release/pyinstaller/alphadia.spec index 59beacf44..d8b5ffd16 100644 --- a/release/pyinstaller/alphadia.spec +++ b/release/pyinstaller/alphadia.spec @@ -30,7 +30,7 @@ datas, binaries, hidden_imports = PyInstaller.utils.hooks.collect_all( include_py_files=True ) -alpha_x = ['alphabase', 'alpharaw','alphatims','peptdeep','alphadia-search-rs'] +alpha_x = ['alphabase', 'alpharaw', 'peptdeep', 'alphadia-search-rs'] for alpha_package in alpha_x: _datas, _binaries, _hidden_imports = PyInstaller.utils.hooks.collect_all( alpha_package, diff --git a/requirements/_requirements.freeze.txt b/requirements/_requirements.freeze.txt index 8dd9f2dea..eabbebd1d 100644 --- a/requirements/_requirements.freeze.txt +++ b/requirements/_requirements.freeze.txt @@ -2,7 +2,6 @@ alphabase==1.7.1 # manually bumped alphadia-search-rs==1.1.1 alpharaw==0.4.7 -alphatims==1.0.9 argparse==1.4.0 biopython==1.85 certifi==2025.1.31 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 6de9ac25a..81c4ff860 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -3,7 +3,6 @@ # alphaX alphadia-search-rs>=1,<2 alpharaw>=0.3.1 -alphatims>=1.0.8 alphabase>=1.5.0 peptdeep>=1.4.1 directlfq>=0.2.19