6060 NotAvailableError ,
6161 TimeOutError ,
6262)
63+ from eodag .utils .s3 import open_s3_zipped_object
6364
6465if TYPE_CHECKING :
6566 from boto3 .resources .collection import ResourceCollection
@@ -233,6 +234,7 @@ def __init__(self, provider: str, config: PluginConfig) -> None:
233234 super (AwsDownload , self ).__init__ (provider , config )
234235 self .requester_pays = getattr (self .config , "requester_pays" , False )
235236 self .s3_session : Optional [boto3 .session .Session ] = None
237+ self .s3_resource : Optional [boto3 .resources .base .ServiceResource ] = None
236238
237239 def download (
238240 self ,
@@ -326,19 +328,32 @@ def download(
326328 bucket_names_and_prefixes , auth
327329 )
328330
331+ # files in zip
332+ updated_bucket_names_and_prefixes = self ._download_file_in_zip (
333+ product , bucket_names_and_prefixes , product_local_path , progress_callback
334+ )
335+ # prevent nothing-to-download errors if download was performed in zip
336+ raise_error = (
337+ False
338+ if len (updated_bucket_names_and_prefixes ) != len (bucket_names_and_prefixes )
339+ else True
340+ )
341+
329342 # downloadable files
330343 unique_product_chunks = self ._get_unique_products (
331- bucket_names_and_prefixes ,
344+ updated_bucket_names_and_prefixes ,
332345 authenticated_objects ,
333346 asset_filter ,
334347 ignore_assets ,
335348 product ,
349+ raise_error = raise_error ,
336350 )
337351
338352 total_size = sum ([p .size for p in unique_product_chunks ]) or None
339353
340354 # download
341- progress_callback .reset (total = total_size )
355+ if len (unique_product_chunks ) > 0 :
356+ progress_callback .reset (total = total_size )
342357 try :
343358 for product_chunk in unique_product_chunks :
344359 try :
@@ -390,17 +405,65 @@ def download(
390405
391406 return product_local_path
392407
408+ def _download_file_in_zip (
409+ self , product , bucket_names_and_prefixes , product_local_path , progress_callback
410+ ):
411+ """
412+ Download file in zip from a prefix like `foo/bar.zip!file.txt`
413+ """
414+ if self .s3_resource is None :
415+ logger .debug ("Cannot check files in s3 zip without s3 resource" )
416+ return bucket_names_and_prefixes
417+
418+ s3_client = self .s3_resource .meta .client
419+
420+ downloaded = []
421+ for i , pack in enumerate (bucket_names_and_prefixes ):
422+ bucket_name , prefix = pack
423+ if ".zip!" in prefix :
424+ splitted_path = prefix .split (".zip!" )
425+ zip_prefix = f"{ splitted_path [0 ]} .zip"
426+ rel_path = splitted_path [- 1 ]
427+ dest_file = os .path .join (product_local_path , rel_path )
428+ dest_abs_path_dir = os .path .dirname (dest_file )
429+ if not os .path .isdir (dest_abs_path_dir ):
430+ os .makedirs (dest_abs_path_dir )
431+
432+ with open_s3_zipped_object (
433+ bucket_name , zip_prefix , s3_client , partial = False
434+ ) as zip_file :
435+ # file size
436+ file_info = zip_file .getinfo (rel_path )
437+ progress_callback .reset (total = file_info .file_size )
438+ with zip_file .open (rel_path ) as extracted , open (
439+ dest_file , "wb"
440+ ) as output_file :
441+ # Read in 1MB chunks
442+ for zchunk in iter (lambda : extracted .read (1024 * 1024 ), b"" ):
443+ output_file .write (zchunk )
444+ progress_callback (len (zchunk ))
445+
446+ downloaded .append (i )
447+
448+ return [
449+ pack
450+ for i , pack in enumerate (bucket_names_and_prefixes )
451+ if i not in downloaded
452+ ]
453+
393454 def _download_preparation (
394455 self ,
395456 product : EOProduct ,
396457 progress_callback : ProgressCallback ,
397458 ** kwargs : Unpack [DownloadConf ],
398459 ) -> tuple [Optional [str ], Optional [str ]]:
399460 """
400- preparation for the download:
461+ Preparation for the download:
462+
401463 - check if file was already downloaded
402464 - get file path
403465 - create directories
466+
404467 :param product: product to be downloaded
405468 :param progress_callback: progress callback to be used
406469 :param kwargs: additional arguments
@@ -424,7 +487,8 @@ def _download_preparation(
424487
425488 def _configure_safe_build (self , build_safe : bool , product : EOProduct ):
426489 """
427- updates the product properties with fetch metadata if safe build is enabled
490+ Updates the product properties with fetch metadata if safe build is enabled
491+
428492 :param build_safe: if safe build is enabled
429493 :param product: product to be updated
430494 """
@@ -514,10 +578,11 @@ def _do_authentication(
514578 auth : Optional [Union [AuthBase , S3SessionKwargs ]] = None ,
515579 ) -> tuple [dict [str , Any ], ResourceCollection ]:
516580 """
517- authenticates with s3 and retrieves the available objects
518- raises an error when authentication is not possible
581+ Authenticates with s3 and retrieves the available objects
582+
519583 :param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes
520584 :param auth: authentication information
585+ :raises AuthenticationError: authentication is not possible
521586 :return: authenticated objects per bucket, list of available objects
522587 """
523588 if not isinstance (auth , (dict , type (None ))):
@@ -584,14 +649,17 @@ def _get_unique_products(
584649 asset_filter : Optional [str ],
585650 ignore_assets : bool ,
586651 product : EOProduct ,
652+ raise_error : bool = True ,
587653 ) -> set [Any ]:
588654 """
589- retrieve unique product chunks based on authenticated objects and asset filters
655+ Retrieve unique product chunks based on authenticated objects and asset filters
656+
590657 :param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes
591658 :param authenticated_objects: available objects per bucket
592659 :param asset_filter: text for which assets should be filtered
593660 :param ignore_assets: if product instead of individual assets should be used
594661 :param product: product that shall be downloaded
662+ :param raise_error: raise error if there is nothing to download
595663 :return: set of product chunks that can be downloaded
596664 """
597665 product_chunks : list [Any ] = []
@@ -613,12 +681,12 @@ def _get_unique_products(
613681 unique_product_chunks ,
614682 )
615683 )
616- if not unique_product_chunks :
684+ if not unique_product_chunks and raise_error :
617685 raise NotAvailableError (
618686 rf"No file basename matching re.fullmatch(r'{ asset_filter } ') was found in { product .remote_location } "
619687 )
620688
621- if not unique_product_chunks :
689+ if not unique_product_chunks and raise_error :
622690 raise NoMatchingProductType ("No product found to download." )
623691
624692 return unique_product_chunks
@@ -702,6 +770,13 @@ def _stream_download_dict(
702770 bucket_names_and_prefixes , auth
703771 )
704772
773+ # stream not implemented for prefixes like `foo/bar.zip!file.txt`
774+ for _ , prefix in bucket_names_and_prefixes :
775+ if prefix and ".zip!" in prefix :
776+ raise NotImplementedError (
777+ "Download streaming is not implemented for files in zip on S3"
778+ )
779+
705780 # downloadable files
706781 unique_product_chunks = self ._get_unique_products (
707782 bucket_names_and_prefixes ,
@@ -936,6 +1011,7 @@ def _get_authenticated_objects_from_auth_profile(
9361011 objects = s3_resource .Bucket (bucket_name ).objects
9371012 list (objects .filter (Prefix = prefix ).limit (1 ))
9381013 self .s3_session = s3_session
1014+ self .s3_resource = s3_resource
9391015 return objects
9401016 else :
9411017 return None
@@ -966,6 +1042,7 @@ def _get_authenticated_objects_from_auth_keys(
9661042 objects = s3_resource .Bucket (bucket_name ).objects
9671043 list (objects .filter (Prefix = prefix ).limit (1 ))
9681044 self .s3_session = s3_session
1045+ self .s3_resource = s3_resource
9691046 return objects
9701047 else :
9711048 return None
@@ -987,6 +1064,7 @@ def _get_authenticated_objects_from_env(
9871064 objects = s3_resource .Bucket (bucket_name ).objects
9881065 list (objects .filter (Prefix = prefix ).limit (1 ))
9891066 self .s3_session = s3_session
1067+ self .s3_resource = s3_resource
9901068 return objects
9911069
9921070 def get_product_bucket_name_and_prefix (
0 commit comments