Skip to content

Commit 186e24d

Browse files
committed
FIX: rewrite Numpy 2.4 deprecation warning when loading old H5 files (fixes #1178)
by a more user-friendly warning
1 parent 0baf6ed commit 186e24d

6 files changed

Lines changed: 107 additions & 6 deletions

File tree

larray/inout/hdf.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from larray.core.constants import nan
1212
from larray.core.group import Group, LGroup, _translate_group_key_hdf
1313
from larray.core.metadata import Metadata
14-
from larray.util.misc import LHDFStore
14+
from larray.util.misc import LHDFStore, PickleDtypeWarningRewriter
1515
from larray.util.types import Scalar
1616
from larray.inout.session import register_file_handler
1717
from larray.inout.common import FileHandler, _supported_typenames, _supported_scalars_types
@@ -138,9 +138,14 @@ class PandasHDFHandler(FileHandler):
138138
r"""
139139
Handler for HDF5 files using Pandas.
140140
"""
141+
def __init__(self, *args, **kwargs):
142+
super().__init__(*args, **kwargs)
143+
self.warn_catcher = None
141144

142145
def _open_for_read(self):
143146
self.handle = HDFStore(self.fname, mode='r')
147+
self.warn_catcher = PickleDtypeWarningRewriter(self.fname, stacklevel=6)
148+
self.warn_catcher.__enter__()
144149

145150
def _open_for_write(self):
146151
self.handle = HDFStore(self.fname)
@@ -197,3 +202,5 @@ def _dump_metadata(self, metadata):
197202

198203
def close(self):
199204
self.handle.close()
205+
if self.warn_catcher is not None:
206+
self.warn_catcher.__exit__(None, None, None)

larray/tests/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535

3636
NUMPY2 = NumpyVersion(np.__version__) >= '2.0.0'
37+
NUMPY24 = NumpyVersion(np.__version__) >= '2.4.0'
3738
SKIP_EXCEL_TESTS = False
3839
TESTDATADIR = Path(__file__).parent
3940

larray/tests/test_array.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
assert_nparray_equal, assert_nparray_nan_equal,
1313
needs_xlwings, needs_pytables, needs_xlsxwriter, needs_openpyxl,
1414
needs_matplotlib,
15-
NUMPY2,
15+
NUMPY2, NUMPY24,
1616
must_warn, must_raise
1717
)
1818
from larray import (
@@ -3847,6 +3847,16 @@ def test_hdf_roundtrip(tmp_path, meta):
38473847
s = Session(fpath)
38483848
assert s.names == sorted(['a0', 'a1', 'a2', 'a3', 'c0,c2', 'c0::2', 'even', ':name?with*special__[characters]'])
38493849

3850+
# test reading a file created by an older version
3851+
fpath = inputpath('test_session.h5')
3852+
msg = (f"'{fpath}' was created with an old version of NumPy. Please "
3853+
"rewrite the file with a recent version of NumPy to avoid future "
3854+
"compatibility issues.")
3855+
# if we do not use numpy >= 2.4, the warning is not triggered
3856+
num_expected = int(NUMPY24)
3857+
with must_warn(FutureWarning, msg=msg, num_expected=num_expected):
3858+
_ = read_hdf(fpath, '__groups__/a01')
3859+
38503860

38513861
def test_from_string():
38523862
expected = ndtest("c=c0,c1")

larray/tests/test_checked_session.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
ndtest, full, full_like, zeros_like, ones, ones_like, isnan)
1313
from larray.tests.common import (inputpath, assert_array_nan_equal, meta, # noqa: F401
1414
needs_pytables, needs_openpyxl, needs_xlwings,
15-
must_warn, must_raise)
15+
must_warn, must_raise, NUMPY24)
1616
from larray.tests.test_session import (a, a2, a3, anonymous, a01, ano01, b, b2, b024, # noqa: F401
1717
c, d, e, f, g, h,
1818
assert_seq_equal, session, test_getitem, test_getattr,
@@ -155,7 +155,18 @@ class TestInheritance(CheckedSessionExample):
155155

156156
@needs_pytables
157157
def test_init_checkedsession_hdf():
158-
cs = CheckedSessionExample(inputpath('test_session.h5'))
158+
fpath = inputpath('test_session.h5')
159+
msg = (f"'{fpath}' was created with an old version of NumPy. Please "
160+
"rewrite the file with a recent version of NumPy to avoid future "
161+
"compatibility issues.")
162+
163+
# check_file is False because the warning stack level is calibrated for
164+
# Session and not CheckedSession which uses a Session
165+
# if we do not use numpy >= 2.4, the warning is not triggered
166+
num_expected = int(NUMPY24)
167+
with must_warn(FutureWarning, msg=msg, check_file=False,
168+
num_expected=num_expected):
169+
cs = CheckedSessionExample(fpath)
159170
assert set(cs.keys()) == {'b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', 'c', 'd', 'e', 'g', 'f', 'h'}
160171

161172

larray/tests/test_session.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
meta, inputpath,
1212
assert_larray_equal, assert_array_nan_equal, assert_larray_nan_equal,
1313
needs_xlwings, needs_pytables, needs_openpyxl,
14-
must_warn, must_raise
14+
must_warn, must_raise,
15+
NUMPY24
1516
)
1617
from larray.inout.common import _supported_scalars_types
1718
from larray import (Session, Axis, Array, Group, CheckedSession,
@@ -91,7 +92,14 @@ def test_init_session_xlsx():
9192

9293
@needs_pytables
9394
def test_init_session_hdf():
94-
s = Session(inputpath('test_session.h5'))
95+
fpath = inputpath('test_session.h5')
96+
msg = (f"'{fpath}' was created with an old version of NumPy. Please "
97+
"rewrite the file with a recent version of NumPy to avoid future "
98+
"compatibility issues.")
99+
# if we do not use numpy >= 2.4, the warning is not triggered
100+
num_expected = int(NUMPY24)
101+
with must_warn(FutureWarning, msg=msg, num_expected=num_expected):
102+
s = Session(fpath)
95103
assert list(s.keys()) == ['e', 'f', 'g', 'h', 'a', 'a2', 'anonymous', 'b', 'a01', 'ano01', 'b024']
96104

97105

larray/util/misc.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@
1616
from collections import defaultdict
1717

1818
import numpy as np
19+
from numpy.exceptions import VisibleDeprecationWarning
1920
import pandas as pd
2021

2122
from larray.util.types import R
2223
from typing import Callable
2324

25+
PICKLE_NP_DTYPE_WARNING_PATTERN = r'dtype\(\): align .*'
26+
PICKLE_NP_DTYPE_WARNING_RE = re.compile(PICKLE_NP_DTYPE_WARNING_PATTERN)
27+
2428
try:
2529
np.set_printoptions(legacy='1.13')
2630
except TypeError:
@@ -799,6 +803,56 @@ def common_dtype(arrays) -> np.dtype:
799803
return np_array_common_dtype(arrays)
800804

801805

806+
class PickleDtypeWarningRewriter:
807+
"""
808+
Context manager to rewrite warnings about old pickled numpy dtypes.
809+
810+
When reading from an HDF file, pandas may emit warnings about old pickled
811+
numpy dtypes. This context manager catches these warnings and rewrites them
812+
as FutureWarnings with a more user-friendly message.
813+
"""
814+
def __init__(self, filepath, stacklevel):
815+
self.filepath = filepath
816+
self.stacklevel = stacklevel
817+
self.warnings_catcher = None
818+
self.caught_warnings = None
819+
820+
def __enter__(self):
821+
warnings_catcher = warnings.catch_warnings(record=True)
822+
self.warnings_catcher = warnings_catcher
823+
self.caught_warnings = warnings_catcher.__enter__()
824+
# do not use "always" to avoid showing the warning multiple times for
825+
# the same file if it contains multiple pickled dtypes
826+
warnings.filterwarnings('once',
827+
message=PICKLE_NP_DTYPE_WARNING_PATTERN,
828+
category=VisibleDeprecationWarning,
829+
module='pandas')
830+
return None
831+
832+
def __exit__(self, type_, value, traceback):
833+
# exit catch_warnings context manager (to restore normal showwarning)
834+
self.warnings_catcher.__exit__(type_, value, traceback)
835+
for warning in self.caught_warnings:
836+
msg = warning.message
837+
# We still need to check the warning message because other
838+
# types of warnings are not (and should not be) filtered
839+
if (isinstance(msg, VisibleDeprecationWarning) and
840+
PICKLE_NP_DTYPE_WARNING_RE.match(str(msg))):
841+
fpath = self.filepath
842+
warnings.warn(
843+
f"'{fpath}' was created with an old version of "
844+
"NumPy. Please rewrite the file with a recent "
845+
"version of NumPy to avoid future compatibility "
846+
"issues.",
847+
FutureWarning, stacklevel=self.stacklevel
848+
)
849+
else:
850+
# Otherwise, we should re-emit the warning as-is.
851+
# In practice, the stack frame of the warning is lost when
852+
# we catch it, but we keep the same message and category
853+
warnings.warn(warning.message, warning.category)
854+
855+
802856
class LHDFStore:
803857
"""Context manager for pandas HDFStore."""
804858

@@ -808,16 +862,26 @@ def __init__(self, filepath_or_buffer, **kwargs):
808862
raise IOError('The HDFStore must be open for reading.')
809863
self.store = filepath_or_buffer
810864
self.close_store = False
865+
# if the store is already open, the warning catcher should already
866+
# be in place
867+
warn_catcher = None
811868
else:
812869
self.store = pd.HDFStore(filepath_or_buffer, **kwargs)
813870
self.close_store = True
871+
warn_catcher = PickleDtypeWarningRewriter(str(filepath_or_buffer),
872+
stacklevel=4)
873+
self.warnings_catcher = warn_catcher
814874

815875
def __enter__(self):
876+
if self.warnings_catcher is not None:
877+
self.warnings_catcher.__enter__()
816878
return self.store
817879

818880
def __exit__(self, type_, value, traceback):
819881
if self.close_store:
820882
self.store.close()
883+
if self.warnings_catcher is not None:
884+
self.warnings_catcher.__exit__(type_, value, traceback)
821885

822886

823887
class SequenceZip:

0 commit comments

Comments
 (0)