Skip to content

Commit 70a38ad

Browse files
committed
implement checksum validation
1 parent b90f210 commit 70a38ad

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

tests/test_10_client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from pathlib import Path
2+
3+
from xarray_esgf import Client
4+
5+
6+
def test_missing_files(tmp_path: Path) -> None:
7+
selection = {"query": '"tas_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_201901-201912.nc"'}
8+
client = Client(
9+
selection,
10+
esgpull_path=str(tmp_path / "esgpull"),
11+
)
12+
assert len(client.missing_files) == 1
13+
client.download()
14+
assert len(client.missing_files) == 0

xarray_esgf/client.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import dataclasses
3+
import hashlib
34
from collections import defaultdict
45
from collections.abc import Callable, Iterable
56
from functools import cached_property
@@ -66,18 +67,31 @@ def files(self) -> list[File]:
6667
keep_duplicates=False,
6768
)
6869

69-
def _get_local_path(self, file: File) -> Path:
70+
def get_local_path(self, file: File) -> Path:
7071
return self._client.fs.paths.data / file.local_path / file.filename
7172

73+
@property
74+
def missing_files(self) -> list[File]:
75+
missing_files = []
76+
for file in self.files:
77+
path = self.get_local_path(file)
78+
if path.exists():
79+
with path.open("rb") as f:
80+
digest = hashlib.file_digest(f, file.checksum_type)
81+
if digest.hexdigest() == file.checksum:
82+
continue
83+
missing_files.append(file)
84+
return missing_files
85+
7286
@cached_property
7387
def local_paths(self) -> dict[str, list[Path]]:
7488
datasets = defaultdict(list)
7589
for file in self.files:
76-
datasets[file.dataset_id].append(self._get_local_path(file))
90+
datasets[file.dataset_id].append(self.get_local_path(file))
7791
return dict(datasets)
7892

7993
def download(self) -> None:
80-
_, errors = asyncio.run(self._client.download(self.files, use_db=False))
94+
_, errors = asyncio.run(self._client.download(self.missing_files, use_db=False))
8195
exceptions = []
8296
for error in errors:
8397
err = error.err

0 commit comments

Comments
 (0)