1- import urllib .request
1+ import datetime
2+ import requests
23import shutil
4+ import os
5+ import concurrent .futures
6+ from rfcx ._api_rfcx import guardianAudio
37
48def __save_file (url , local_path ):
59 """ Download the file from `url` and save it locally under `local_path` """
6- with urllib .request .urlopen (url ) as response , open (local_path , 'wb' ) as out_file :
7- shutil .copyfileobj (response , out_file )
10+ response = requests .get (url , stream = True )
11+ if (response .status_code == 200 ):
12+ with open (local_path , 'wb' ) as out_file :
13+ response .raw .decode_content = True
14+ shutil .copyfileobj (response .raw , out_file )
15+ else :
16+ print ("Can not download {} with status {}" .format (url , response .status_code ))
817
9- def __local_audio_file_path (path , audio_id , audio_extension ):
18+ def __local_audio_file_path (path , audio_name , audio_extension ):
1019 """ Create string for the name and the path """
11- return path + '/' + audio_id + "." + audio_extension
20+ return path + '/' + audio_name + "." + audio_extension
1221
1322def save_audio_file (destination_path , audio_id , source_audio_extension = 'opus' ):
1423 """ Prepare `url` and `local_path` and save it using function `__save_file`
1524 Args:
16- destination_path: Path to the save directory .
25+ destination_path: Audio save path .
1726 audio_id: RFCx audio id.
1827 source_audio_extension: (optional, default= '.opus') Extension for saving audio files.
1928
@@ -28,3 +37,59 @@ def save_audio_file(destination_path, audio_id, source_audio_extension='opus'):
2837 local_path = __local_audio_file_path (destination_path , audio_id , source_audio_extension )
2938 __save_file (url , local_path )
3039 print ('File {}.{} saved to {}' .format (audio_id , source_audio_extension , destination_path ))
40+
41+ def __generate_date_list_in_isoformat (start , end ):
42+ """ Generate list of date in iso format ending with `Z` """
43+ delta = end - start
44+ dates = [(start + datetime .timedelta (days = i )).replace (microsecond = 0 ).isoformat () + 'Z' for i in range (delta .days + 1 )]
45+ return dates
46+
47+ def __segmentDownload (audio_path , file_ext , segment ):
48+ audio_id = segment ['guid' ]
49+ audio_name = "{}_{}_{}" .format (segment ['guardian_guid' ], segment ['measured_at' ].replace (':' , '-' ).replace ('.' , '-' ), audio_id )
50+ url = "https://assets.rfcx.org/audio/" + audio_id + "." + file_ext
51+ local_path = __local_audio_file_path (audio_path , audio_name , file_ext )
52+ __save_file (url , local_path )
53+
54+ def downloadGuardianAudio (token , destination_path , guardian_id , min_date , max_date , file_ext = 'opus' , parallel = True ):
55+ """ Download RFCx audio on specific time range using `guardianAudio` to get audio segments information
56+ and save it using function `__save_file`
57+ Args:
58+ token: RFCx client token.
59+ destination_path: Audio save path.
60+ guardian_id: RFCx guardian id
61+ min_date: Download start date
62+ max_date: Download end date
63+ file_ext: (optional, default= '.opus') Extension for saving audio file.
64+ parallel: (optional, default= True) Enable to parallel download audio from RFCx
65+
66+ Returns:
67+ None.
68+
69+ Raises:
70+ TypeError: if missing required arguements.
71+
72+ """
73+ audio_path = destination_path + '/' + guardian_id
74+ if not os .path .exists (audio_path ):
75+ os .makedirs (audio_path )
76+ dates = __generate_date_list_in_isoformat (min_date , max_date )
77+
78+ for date in dates :
79+ date_end = date .replace ('00:00:00' , '23:59:59' )
80+ segments = guardianAudio (token , guardian_id , date , date_end , limit = 1000 , descending = False )
81+
82+ if segments :
83+ if (parallel ):
84+ with concurrent .futures .ThreadPoolExecutor (max_workers = 100 ) as executor :
85+ futures = []
86+ for segment in segments :
87+ futures .append (executor .submit (__segmentDownload , audio_path = audio_path , file_ext = file_ext , segment = segment ))
88+
89+ futures , _ = concurrent .futures .wait (futures )
90+ else :
91+ for segment in segments :
92+ __segmentDownload (audio_path , file_ext , segment )
93+ print ("Finish download on" , guardian_id , date [:- 10 ])
94+ else :
95+ print ("No data on date:" , date [:- 10 ])
0 commit comments