Skip to content

Commit 941d9e3

Browse files
committed
[read] -all read zip in Azure env
1 parent d9fb3a7 commit 941d9e3

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies = [
2121
"numpy >= 1.26.0",
2222
"universal-pathlib == 0.1.4", # extend pathlib with azure, aws...
2323
"azure-core == 1.32.0",
24+
"fsspec == 2023.12.2",
2425
"PyYAML >= 6.0",
2526
"rich >= 13.6.0",
2627
"flake8 >= 5.0.4",

src/akutils/pandas_read_files.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import pandas as pd
2-
import zipfile
2+
import fsspec # type: ignore
33
import re
4-
from io import TextIOWrapper
4+
import zipfile
5+
from io import TextIOWrapper, BytesIO
56
from upath import UPath
7+
from upath.implementations.cloud import AzurePath
68
from pathlib import Path
79
from typing import Callable
810
from pandas._typing import (
@@ -80,7 +82,7 @@ def filter_on_countries(df, countries):
8082

8183
@timeit
8284
def read_multiple_csv_from_zip(
83-
zip_path: Path | UPath,
85+
zip_path: Path | UPath | BytesIO,
8486
regex: str = r".*",
8587
case_sensitive: bool = False,
8688
allowed_extension: list = [".csv", ".txt", ".dsv", ".gz", ".zip", ".tar", "7z"],
@@ -107,6 +109,12 @@ def read_multiple_csv_from_zip(
107109
**kwargs
108110
Pass any argument allowed by pd.read_csv and/or by the custom chunk function
109111
"""
112+
113+
# Handle Azure zip
114+
if type(zip_path) is AzurePath:
115+
with fsspec.open(zip_path, 'rb') as f:
116+
zip_path = BytesIO(f.read())
117+
110118
# Get first mathing zip from a directory
111119
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
112120
# Get list of file names in the archive

0 commit comments

Comments
 (0)