Support saving sparse columns in pandas DataFrame. At the moment, the following fails (anndata 0.12.10)
---------------------------------------------------------------------------
IORegistryError Traceback (most recent call last)
Cell In[10], line 1
----> 1 adata.write_h5ad("dummy.h5ad")
[... skipping hidden 1 frame]
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_core/anndata.py:1934, in AnnData.write_h5ad(self, filename, convert_strings_to_categoricals, compression, compression_opts, as_dense)
1931 if filename is None:
1932 filename = self.filename
-> 1934 write_h5ad(
1935 Path(filename),
1936 self,
1937 convert_strings_to_categoricals=convert_strings_to_categoricals,
1938 compression=compression,
1939 compression_opts=compression_opts,
1940 as_dense=as_dense,
1941 )
1942 # Only reset the filename if the AnnData object now points to a complete new copy
1943 if self.isbacked and not self.is_view:
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:349, in no_write_dataset_2d.<locals>.raise_error_if_dataset_2d_present(store, adata, *args, **kwargs)
342 msg = (
343 "Writing AnnData objects with a Dataset2D not supported yet. "
344 "Please use `ds.to_memory` to bring the dataset into memory. "
345 "Note that if you have generated this object by concatenating several `AnnData` objects"
346 "the original types may be lost."
347 )
348 raise NotImplementedError(msg)
--> 349 return write(store, adata, *args, **kwargs)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/h5ad.py:98, in write_h5ad(filepath, adata, as_dense, convert_strings_to_categoricals, dataset_kwargs, **kwargs)
90 _write_x(
91 f,
92 adata, # accessing adata.X reopens adata.file if it’s backed
(...) 95 dataset_kwargs=dataset_kwargs,
96 )
97 _write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs)
---> 98 write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
99 write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
100 write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:518, in write_elem(store, k, elem, dataset_kwargs)
494 def write_elem(
495 store: GroupStorageType,
496 k: str,
(...) 499 dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
500 ) -> None:
501 """
502 Write an element to a storage group using anndata encoding.
503
(...) 516 E.g. for zarr this would be `chunks`, `compressor`.
517 """
--> 518 Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
270 raise ValueError(msg)
271 try:
--> 272 return func(*args, **kwargs)
273 except Exception as e:
274 path = _get_display_path(store)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:389, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
386 write_func = self.find_write_func(dest_type, elem, modifiers)
388 if self.callback is None:
--> 389 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
390 return self.callback(
391 write_func,
392 store,
(...) 396 iospec=self.registry.get_spec(elem),
397 )
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:77, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
75 @wraps(func)
76 def wrapper(g: GroupStorageType, k: str, *args, **kwargs):
---> 77 result = func(g, k, *args, **kwargs)
78 g[k].attrs.setdefault("encoding-type", spec.encoding_type)
79 g[k].attrs.setdefault("encoding-version", spec.encoding_version)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/methods.py:1021, in write_dataframe(f, key, df, _writer, dataset_kwargs)
1016 _writer.write_elem(
1017 group, index_name, df.index._values, dataset_kwargs=dataset_kwargs
1018 )
1019 for colname, series in df.items():
1020 # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending)
-> 1021 _writer.write_elem(
1022 group, colname, series._values, dataset_kwargs=dataset_kwargs
1023 )
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
270 raise ValueError(msg)
271 try:
--> 272 return func(*args, **kwargs)
273 except Exception as e:
274 path = _get_display_path(store)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:386, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
383 elif k in store:
384 del store[k]
--> 386 write_func = self.find_write_func(dest_type, elem, modifiers)
388 if self.callback is None:
389 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:334, in Writer.find_write_func(self, dest_type, elem, modifiers)
330 return self.registry.get_write(
331 dest_type, pattern, modifiers, writer=self
332 )
333 # Raises IORegistryError
--> 334 return self.registry.get_write(dest_type, type(elem), modifiers, writer=self)
File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:141, in IORegistry.get_write(self, dest_type, src_type, modifiers, writer)
139 dest_type = h5py.Group
140 if (dest_type, src_type, modifiers) not in self.write:
--> 141 raise IORegistryError._from_write_parts(dest_type, src_type, modifiers)
142 internal = self.write[(dest_type, src_type, modifiers)]
143 return partial(internal, _writer=writer)
IORegistryError: No method registered for writing <class 'pandas.core.arrays.sparse.array.SparseArray'> into <class 'h5py._hl.group.Group'>
Error raised while writing key 'sparse_col' of <class 'h5py._hl.group.Group'> to /obs
Description
Support saving sparse columns in pandas DataFrame. At the moment, the following fails (anndata 0.12.10)
with
--------------------------------------------------------------------------- IORegistryError Traceback (most recent call last) Cell In[10], line 1 ----> 1 adata.write_h5ad("dummy.h5ad") [... skipping hidden 1 frame] File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_core/anndata.py:1934, in AnnData.write_h5ad(self, filename, convert_strings_to_categoricals, compression, compression_opts, as_dense) 1931 if filename is None: 1932 filename = self.filename -> 1934 write_h5ad( 1935 Path(filename), 1936 self, 1937 convert_strings_to_categoricals=convert_strings_to_categoricals, 1938 compression=compression, 1939 compression_opts=compression_opts, 1940 as_dense=as_dense, 1941 ) 1942 # Only reset the filename if the AnnData object now points to a complete new copy 1943 if self.isbacked and not self.is_view: File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:349, in no_write_dataset_2d.<locals>.raise_error_if_dataset_2d_present(store, adata, *args, **kwargs) 342 msg = ( 343 "Writing AnnData objects with a Dataset2D not supported yet. " 344 "Please use `ds.to_memory` to bring the dataset into memory. " 345 "Note that if you have generated this object by concatenating several `AnnData` objects" 346 "the original types may be lost." 347 ) 348 raise NotImplementedError(msg) --> 349 return write(store, adata, *args, **kwargs) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/h5ad.py:98, in write_h5ad(filepath, adata, as_dense, convert_strings_to_categoricals, dataset_kwargs, **kwargs) 90 _write_x( 91 f, 92 adata, # accessing adata.X reopens adata.file if it’s backed (...) 95 dataset_kwargs=dataset_kwargs, 96 ) 97 _write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs) ---> 98 write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs) 99 write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs) 100 write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:518, in write_elem(store, k, elem, dataset_kwargs) 494 def write_elem( 495 store: GroupStorageType, 496 k: str, (...) 499 dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), 500 ) -> None: 501 """ 502 Write an element to a storage group using anndata encoding. 503 (...) 516 E.g. for zarr this would be `chunks`, `compressor`. 517 """ --> 518 Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs) 270 raise ValueError(msg) 271 try: --> 272 return func(*args, **kwargs) 273 except Exception as e: 274 path = _get_display_path(store) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:389, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers) 386 write_func = self.find_write_func(dest_type, elem, modifiers) 388 if self.callback is None: --> 389 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) 390 return self.callback( 391 write_func, 392 store, (...) 396 iospec=self.registry.get_spec(elem), 397 ) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:77, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs) 75 @wraps(func) 76 def wrapper(g: GroupStorageType, k: str, *args, **kwargs): ---> 77 result = func(g, k, *args, **kwargs) 78 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 79 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/methods.py:1021, in write_dataframe(f, key, df, _writer, dataset_kwargs) 1016 _writer.write_elem( 1017 group, index_name, df.index._values, dataset_kwargs=dataset_kwargs 1018 ) 1019 for colname, series in df.items(): 1020 # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending) -> 1021 _writer.write_elem( 1022 group, colname, series._values, dataset_kwargs=dataset_kwargs 1023 ) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/utils.py:272, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs) 270 raise ValueError(msg) 271 try: --> 272 return func(*args, **kwargs) 273 except Exception as e: 274 path = _get_display_path(store) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:386, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers) 383 elif k in store: 384 del store[k] --> 386 write_func = self.find_write_func(dest_type, elem, modifiers) 388 if self.callback is None: 389 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:334, in Writer.find_write_func(self, dest_type, elem, modifiers) 330 return self.registry.get_write( 331 dest_type, pattern, modifiers, writer=self 332 ) 333 # Raises IORegistryError --> 334 return self.registry.get_write(dest_type, type(elem), modifiers, writer=self) File ~/envs/anndata_debug/lib/python3.12/site-packages/anndata/_io/specs/registry.py:141, in IORegistry.get_write(self, dest_type, src_type, modifiers, writer) 139 dest_type = h5py.Group 140 if (dest_type, src_type, modifiers) not in self.write: --> 141 raise IORegistryError._from_write_parts(dest_type, src_type, modifiers) 142 internal = self.write[(dest_type, src_type, modifiers)] 143 return partial(internal, _writer=writer) IORegistryError: No method registered for writing <class 'pandas.core.arrays.sparse.array.SparseArray'> into <class 'h5py._hl.group.Group'> Error raised while writing key 'sparse_col' of <class 'h5py._hl.group.Group'> to /obs