Skip to content

Commit d55ef57

Browse files
committed
Merge remote-tracking branch 'origin/feature/CS-263-automate-the-uploading-of-philippines-greece-and-chile-audio-to-obs'
2 parents 7a9e571 + c6926ae commit d55ef57

File tree

4 files changed

+41
-26
lines changed

4 files changed

+41
-26
lines changed

package-rfcx/rfcx/_api_rfcx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ def guardians(token, sites):
1414
url = '{}{}?{}'.format(host, path, urllib.parse.urlencode(data, True))
1515
return _request(url, token=token)
1616

17-
def guardianAudio(token, guardianId, start, end, limit, descending):
18-
data = {'starting_after': start, 'ending_before': end, 'limit': limit, 'order': 'descending' if descending else 'ascending'}
17+
def guardianAudio(token, guardianId, start, end, limit, offset=0, descending=True):
18+
data = {'starting_after': start, 'ending_before': end, 'limit': limit, 'offset': offset, 'order': 'descending' if descending else 'ascending'}
1919
path = f'/v1/guardians/{guardianId}/audio'
2020
url = '{}{}?{}'.format(host, path, urllib.parse.urlencode(data, True))
2121
return _request(url, token=token)

package-rfcx/rfcx/audio.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,25 @@ def save_audio_file(destination_path, audio_id, source_audio_extension='opus'):
3838
__save_file(url, local_path)
3939
print('File {}.{} saved to {}'.format(audio_id, source_audio_extension, destination_path))
4040

41-
def __generate_date_list_in_isoformat(start, end):
42-
""" Generate list of date in iso format ending with `Z` """
43-
delta = end - start
44-
dates = [(start + datetime.timedelta(days=i)).replace(microsecond=0).isoformat() + 'Z' for i in range(delta.days + 1)]
45-
return dates
41+
def __generate_date_in_isoformat(date):
42+
""" Generate date in iso format ending with `Z` """
43+
return date.replace(microsecond=0).isoformat() + 'Z'
44+
45+
def __get_all_segments(token, guardian_id, start, end):
46+
all_segments = []
47+
empty_segment = False
48+
offset = 0
49+
50+
while not empty_segment:
51+
# No data will return `None` from server
52+
segments = guardianAudio(token, guardian_id, start, end, limit=1000, offset=offset, descending=False)
53+
if segments:
54+
all_segments.extend(segments)
55+
offset = offset + 1000
56+
else:
57+
empty_segment = True
58+
59+
return all_segments
4660

4761
def __segmentDownload(audio_path, file_ext, segment):
4862
audio_id = segment['guid']
@@ -73,23 +87,24 @@ def downloadGuardianAudio(token, destination_path, guardian_id, min_date, max_da
7387
audio_path = destination_path + '/' + guardian_id
7488
if not os.path.exists(audio_path):
7589
os.makedirs(audio_path)
76-
dates = __generate_date_list_in_isoformat(min_date, max_date)
7790

78-
for date in dates:
79-
date_end = date.replace('00:00:00', '23:59:59')
80-
segments = guardianAudio(token, guardian_id, date, date_end, limit=1000, descending=False)
91+
start = __generate_date_in_isoformat(min_date)
92+
end = __generate_date_in_isoformat(max_date)
8193

82-
if segments:
83-
if(parallel):
84-
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
85-
futures = []
86-
for segment in segments:
87-
futures.append(executor.submit(__segmentDownload, audio_path=audio_path, file_ext=file_ext, segment=segment))
88-
89-
futures, _ = concurrent.futures.wait(futures)
90-
else:
94+
segments = __get_all_segments(token, guardian_id, start, end)
95+
96+
if segments:
97+
print("Downloading {} audio from {}".format(len(segments), guardian_id))
98+
if(parallel):
99+
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
100+
futures = []
91101
for segment in segments:
92-
__segmentDownload(audio_path, file_ext, segment)
93-
print("Finish download on", guardian_id, date[:-10])
102+
futures.append(executor.submit(__segmentDownload, audio_path=audio_path, file_ext=file_ext, segment=segment))
103+
104+
futures, _ = concurrent.futures.wait(futures)
94105
else:
95-
print("No data on date:", date[:-10])
106+
for segment in segments:
107+
__segmentDownload(audio_path, file_ext, segment)
108+
print("Finish download on {}".format(guardian_id))
109+
else:
110+
print("No data found on {} - {} at {}".format(start[:-10], end[:-10], guardian_id))

package-rfcx/rfcx/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def guardians(self, sites=None):
134134

135135
return api_rfcx.guardians(self.credentials.id_token, sites)
136136

137-
def guardianAudio(self, guardianId=None, start=None, end=None, limit=50, descending=True):
137+
def guardianAudio(self, guardianId=None, start=None, end=None, limit=50, offset=0, descending=True):
138138
"""Retrieve audio information about a specific guardian (TO BE DEPRECATED - use streams in future)
139139
140140
Args:
@@ -157,7 +157,7 @@ def guardianAudio(self, guardianId=None, start=None, end=None, limit=50, descend
157157
if end == None:
158158
end = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z'
159159

160-
return api_rfcx.guardianAudio(self.credentials.id_token, guardianId, start, end, limit, descending)
160+
return api_rfcx.guardianAudio(self.credentials.id_token, guardianId, start, end, limit, offset, descending)
161161

162162
def tags(self, type, labels, start=None, end=None, sites=None, limit=1000):
163163
"""Retrieve tags (annotations or confirmed/rejected reviews) from the RFCx API

package-rfcx/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
REQUIRED_PACKAGES = ['httplib2', 'six']
44

55
setup(name='rfcx',
6-
version='0.0.9',
6+
version='0.0.10',
77
url='https://github.com/rfcx/rfcx-sdk-python',
88
license='None',
99
author='Rainforest Connection',

0 commit comments

Comments
 (0)