1616from collections import defaultdict
1717
1818import numpy as np
19+ from numpy .exceptions import VisibleDeprecationWarning
1920import pandas as pd
2021
2122from larray .util .types import R
2223from typing import Callable
2324
25+ PICKLE_NP_DTYPE_WARNING_PATTERN = r'dtype\(\): align .*'
26+ PICKLE_NP_DTYPE_WARNING_RE = re .compile (PICKLE_NP_DTYPE_WARNING_PATTERN )
27+
2428try :
2529 np .set_printoptions (legacy = '1.13' )
2630except TypeError :
@@ -799,6 +803,56 @@ def common_dtype(arrays) -> np.dtype:
799803 return np_array_common_dtype (arrays )
800804
801805
806+ class PickleDtypeWarningRewriter :
807+ """
808+ Context manager to rewrite warnings about old pickled numpy dtypes.
809+
810+ When reading from an HDF file, pandas may emit warnings about old pickled
811+ numpy dtypes. This context manager catches these warnings and rewrites them
812+ as FutureWarnings with a more user-friendly message.
813+ """
814+ def __init__ (self , filepath , stacklevel ):
815+ self .filepath = filepath
816+ self .stacklevel = stacklevel
817+ self .warnings_catcher = None
818+ self .caught_warnings = None
819+
820+ def __enter__ (self ):
821+ warnings_catcher = warnings .catch_warnings (record = True )
822+ self .warnings_catcher = warnings_catcher
823+ self .caught_warnings = warnings_catcher .__enter__ ()
824+ # do not use "always" to avoid showing the warning multiple times for
825+ # the same file if it contains multiple pickled dtypes
826+ warnings .filterwarnings ('once' ,
827+ message = PICKLE_NP_DTYPE_WARNING_PATTERN ,
828+ category = VisibleDeprecationWarning ,
829+ module = 'pandas' )
830+ return None
831+
832+ def __exit__ (self , type_ , value , traceback ):
833+ # exit catch_warnings context manager (to restore normal showwarning)
834+ self .warnings_catcher .__exit__ (type_ , value , traceback )
835+ for warning in self .caught_warnings :
836+ msg = warning .message
837+ # We still need to check the warning message because other
838+ # types of warnings are not (and should not be) filtered
839+ if (isinstance (msg , VisibleDeprecationWarning ) and
840+ PICKLE_NP_DTYPE_WARNING_RE .match (str (msg ))):
841+ fpath = self .filepath
842+ warnings .warn (
843+ f"'{ fpath } ' was created with an old version of "
844+ "NumPy. Please rewrite the file with a recent "
845+ "version of NumPy to avoid future compatibility "
846+ "issues." ,
847+ FutureWarning , stacklevel = self .stacklevel
848+ )
849+ else :
850+ # Otherwise, we should re-emit the warning as-is.
851+ # In practice, the stack frame of the warning is lost when
852+ # we catch it, but we keep the same message and category
853+ warnings .warn (warning .message , warning .category )
854+
855+
802856class LHDFStore :
803857 """Context manager for pandas HDFStore."""
804858
@@ -808,16 +862,26 @@ def __init__(self, filepath_or_buffer, **kwargs):
808862 raise IOError ('The HDFStore must be open for reading.' )
809863 self .store = filepath_or_buffer
810864 self .close_store = False
865+ # if the store is already open, the warning catcher should already
866+ # be in place
867+ warn_catcher = None
811868 else :
812869 self .store = pd .HDFStore (filepath_or_buffer , ** kwargs )
813870 self .close_store = True
871+ warn_catcher = PickleDtypeWarningRewriter (str (filepath_or_buffer ),
872+ stacklevel = 4 )
873+ self .warnings_catcher = warn_catcher
814874
815875 def __enter__ (self ):
876+ if self .warnings_catcher is not None :
877+ self .warnings_catcher .__enter__ ()
816878 return self .store
817879
818880 def __exit__ (self , type_ , value , traceback ):
819881 if self .close_store :
820882 self .store .close ()
883+ if self .warnings_catcher is not None :
884+ self .warnings_catcher .__exit__ (type_ , value , traceback )
821885
822886
823887class SequenceZip :
0 commit comments