Skip to content

Support sparse pandas DataFrames #2380

@WeilerP

Description

@WeilerP

Description

Support saving sparse columns in pandas DataFrame. At the moment, the following fails (anndata 0.12.10)

import numpy as np
import pandas as pd

import anndata as ad

adata = ad.AnnData(X=np.eye(3))
adata.obs["sparse_col"] = pd.arrays.SparseArray([0.0, 1, 0.0], fill_value=0.0)
adata.write_h5ad("dummy.h5ad")

with

---------------------------------------------------------------------------
IORegistryError                           Traceback (most recent call last)
Cell In[10], line 1
----> 1 adata.write_h5ad("dummy.h5ad")

    [... skipping hidden 1 frame]

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_core/anndata.py:1934, in AnnData.write_h5ad(self, filename, convert_strings_to_categoricals, compression, compression_opts, as_dense)
   1931 if filename is None:
   1932     filename = self.filename
-> 1934 write_h5ad(
   1935     Path(filename),
   1936     self,
   1937     convert_strings_to_categoricals=convert_strings_to_categoricals,
   1938     compression=compression,
   1939     compression_opts=compression_opts,
   1940     as_dense=as_dense,
   1941 )
   1942 # Only reset the filename if the AnnData object now points to a complete new copy
   1943 if self.isbacked and not self.is_view:

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:349, in no_write_dataset_2d.<locals>.raise_error_if_dataset_2d_present(store, adata, *args, **kwargs)
    342     msg = (
    343         "Writing AnnData objects with a Dataset2D not supported yet. "
    344         "Please use `ds.to_memory` to bring the dataset into memory. "
    345         "Note that if you have generated this object by concatenating several `AnnData` objects"
    346         "the original types may be lost."
    347     )
    348     raise NotImplementedError(msg)
--> 349 return write(store, adata, *args, **kwargs)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/h5ad.py:98, in write_h5ad(filepath, adata, as_dense, convert_strings_to_categoricals, dataset_kwargs, **kwargs)
     90 _write_x(
     91     f,
     92     adata,  # accessing adata.X reopens adata.file if it’s backed
   (...)     95     dataset_kwargs=dataset_kwargs,
     96 )
     97 _write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs)
---> 98 write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
     99 write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
    100 write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:518, in write_elem(store, k, elem, dataset_kwargs)
    494 def write_elem(
    495     store: GroupStorageType,
    496     k: str,
   (...)    499     dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
    500 ) -> None:
    501     """
    502     Write an element to a storage group using anndata encoding.
    503 
   (...)    516         E.g. for zarr this would be `chunks`, `compressor`.
    517     """
--> 518     Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    270     raise ValueError(msg)
    271 try:
--> 272     return func(*args, **kwargs)
    273 except Exception as e:
    274     path = _get_display_path(store)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:389, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
    386 write_func = self.find_write_func(dest_type, elem, modifiers)
    388 if self.callback is None:
--> 389     return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
    390 return self.callback(
    391     write_func,
    392     store,
   (...)    396     iospec=self.registry.get_spec(elem),
    397 )

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:77, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
     75 @wraps(func)
     76 def wrapper(g: GroupStorageType, k: str, *args, **kwargs):
---> 77     result = func(g, k, *args, **kwargs)
     78     g[k].attrs.setdefault("encoding-type", spec.encoding_type)
     79     g[k].attrs.setdefault("encoding-version", spec.encoding_version)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/methods.py:1021, in write_dataframe(f, key, df, _writer, dataset_kwargs)
   1016 _writer.write_elem(
   1017     group, index_name, df.index._values, dataset_kwargs=dataset_kwargs
   1018 )
   1019 for colname, series in df.items():
   1020     # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending)
-> 1021     _writer.write_elem(
   1022         group, colname, series._values, dataset_kwargs=dataset_kwargs
   1023     )

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    270     raise ValueError(msg)
    271 try:
--> 272     return func(*args, **kwargs)
    273 except Exception as e:
    274     path = _get_display_path(store)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:386, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
    383 elif k in store:
    384     del store[k]
--> 386 write_func = self.find_write_func(dest_type, elem, modifiers)
    388 if self.callback is None:
    389     return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:334, in Writer.find_write_func(self, dest_type, elem, modifiers)
    330         return self.registry.get_write(
    331             dest_type, pattern, modifiers, writer=self
    332         )
    333 # Raises IORegistryError
--> 334 return self.registry.get_write(dest_type, type(elem), modifiers, writer=self)

File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:141, in IORegistry.get_write(self, dest_type, src_type, modifiers, writer)
    139     dest_type = h5py.Group
    140 if (dest_type, src_type, modifiers) not in self.write:
--> 141     raise IORegistryError._from_write_parts(dest_type, src_type, modifiers)
    142 internal = self.write[(dest_type, src_type, modifiers)]
    143 return partial(internal, _writer=writer)

IORegistryError: No method registered for writing <class 'pandas.core.arrays.sparse.array.SparseArray'> into <class 'h5py._hl.group.Group'>
Error raised while writing key 'sparse_col' of <class 'h5py._hl.group.Group'> to /obs

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions