Skip to content

Commit dceb663

Browse files
authored
Merge pull request #261 from oree-xx/report_automation
Improve report sections and avoid regenerating existing files
2 parents 08075ba + 06a937a commit dceb663

File tree

8 files changed

+186
-74
lines changed

8 files changed

+186
-74
lines changed

scripts/2-process/gcs_process.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,24 @@
2727

2828
# Constants
2929
QUARTER = os.path.basename(PATHS["data_quarter"])
30+
FILE_PATHS = [
31+
shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv"),
32+
shared.path_join(PATHS["data_phase"], "gcs_status_combined_totals.csv"),
33+
shared.path_join(PATHS["data_phase"], "gcs_status_lastest_totals.csv"),
34+
shared.path_join(PATHS["data_phase"], "gcs_status_prior_totals.csv"),
35+
shared.path_join(PATHS["data_phase"], "gcs_status_retired_totals.csv"),
36+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_country.csv"),
37+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"),
38+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_language.csv"),
39+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_restrictions.csv"),
40+
]
3041

3142

3243
def parse_arguments():
3344
"""
3445
Parse command-line options, returns parsed argument namespace.
3546
"""
47+
global QUARTER
3648
LOGGER.info("Parsing command-line options")
3749
parser = argparse.ArgumentParser(description=__doc__)
3850
parser.add_argument(
@@ -48,15 +60,23 @@ def parse_arguments():
4860
parser.add_argument(
4961
"--enable-git",
5062
action="store_true",
51-
help="Enable git actions such as fetch, merge, add, commit, and push"
52-
" (default: False)",
63+
help="Enable git actions such as fetch, merge, add, commit, and push",
64+
)
65+
parser.add_argument(
66+
"--force",
67+
action="store_true",
68+
help="Regenerate data even if processed files already exist",
5369
)
5470
args = parser.parse_args()
5571
if not args.enable_save and args.enable_git:
5672
parser.error("--enable-git requires --enable-save")
5773
if args.quarter != QUARTER:
58-
global PATHS
74+
global FILE_PATHS, PATHS
75+
FILE_PATHS = shared.paths_list_update(
76+
LOGGER, FILE_PATHS, QUARTER, args.quarter
77+
)
5978
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
79+
QUARTER = args.quarter
6080
args.logger = LOGGER
6181
args.paths = PATHS
6282
return args
@@ -308,6 +328,7 @@ def main():
308328
args = parse_arguments()
309329
shared.paths_log(LOGGER, PATHS)
310330
shared.git_fetch_and_merge(args, PATHS["repo"])
331+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
311332

312333
# Count data
313334
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,17 @@
2424

2525
# Constants
2626
QUARTER = os.path.basename(PATHS["data_quarter"])
27+
FILE_PATHS = [
28+
shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
29+
shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
30+
]
2731

2832

2933
def parse_arguments():
3034
"""
3135
Parse command-line options, returns parsed argument namespace.
3236
"""
37+
global QUARTER
3338
LOGGER.info("Parsing command-line options")
3439
parser = argparse.ArgumentParser(description=__doc__)
3540
parser.add_argument(
@@ -48,24 +53,27 @@ def parse_arguments():
4853
help="Enable git actions such as fetch, merge, add, commit, and push"
4954
" (default: False)",
5055
)
56+
parser.add_argument(
57+
"--force",
58+
action="store_true",
59+
help="Regenerate data even if processed files already exist",
60+
)
61+
5162
args = parser.parse_args()
5263
if not args.enable_save and args.enable_git:
5364
parser.error("--enable-git requires --enable-save")
5465
if args.quarter != QUARTER:
55-
global PATHS
66+
global FILE_PATHS, PATHS
67+
FILE_PATHS = shared.paths_list_update(
68+
LOGGER, FILE_PATHS, QUARTER, args.quarter
69+
)
5670
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
71+
QUARTER = args.quarter
5772
args.logger = LOGGER
5873
args.paths = PATHS
5974
return args
6075

6176

62-
def check_for_data_file(file_path):
63-
if os.path.exists(file_path):
64-
raise shared.QuantifyingException(
65-
f"Processed data already exists for {QUARTER}", 0
66-
)
67-
68-
6977
def data_to_csv(args, data, file_path):
7078
if not args.enable_save:
7179
return
@@ -98,7 +106,6 @@ def process_totals_by_license(args, count_data):
98106
file_path = shared.path_join(
99107
PATHS["data_phase"], "github_totals_by_license.csv"
100108
)
101-
check_for_data_file(file_path)
102109
data_to_csv(args, data, file_path)
103110

104111

@@ -133,15 +140,14 @@ def process_totals_by_restriction(args, count_data):
133140
file_path = shared.path_join(
134141
PATHS["data_phase"], "github_totals_by_restriction.csv"
135142
)
136-
check_for_data_file(file_path)
137143
data_to_csv(args, data, file_path)
138144

139145

140146
def main():
141147
args = parse_arguments()
142148
shared.paths_log(LOGGER, PATHS)
143149
shared.git_fetch_and_merge(args, PATHS["repo"])
144-
150+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
145151
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
146152
count_data = shared.open_data_file(
147153
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]
@@ -167,7 +173,7 @@ def main():
167173
LOGGER.info(e.message)
168174
else:
169175
LOGGER.error(e.message)
170-
sys.exit(e.code)
176+
sys.exit(e.exit_code)
171177
except SystemExit as e:
172178
LOGGER.error(f"System exit with code: {e.code}")
173179
sys.exit(e.code)

scripts/2-process/wikipedia_process.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,24 @@
2828

2929
# Constants
3030
QUARTER = os.path.basename(PATHS["data_quarter"])
31+
FILE_PATHS = [
32+
shared.path_join(
33+
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
34+
),
35+
shared.path_join(
36+
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
37+
),
38+
shared.path_join(
39+
PATHS["data_phase"], "wikipedia_language_representation.csv"
40+
),
41+
]
3142

3243

3344
def parse_arguments():
3445
"""
3546
Parse command-line options, returns parsed argument namespace.
3647
"""
48+
global QUARTER
3749
LOGGER.info("Parsing command-line options")
3850
parser = argparse.ArgumentParser(description=__doc__)
3951
parser.add_argument(
@@ -52,24 +64,27 @@ def parse_arguments():
5264
help="Enable git actions such as fetch, merge, add, commit, and push"
5365
" (default: False)",
5466
)
67+
parser.add_argument(
68+
"--force",
69+
action="store_true",
70+
help="Regenerate data even if processed files already exist",
71+
)
72+
5573
args = parser.parse_args()
5674
if not args.enable_save and args.enable_git:
5775
parser.error("--enable-git requires --enable-save")
5876
if args.quarter != QUARTER:
59-
global PATHS
77+
global FILE_PATHS, PATHS
78+
FILE_PATHS = shared.paths_list_update(
79+
LOGGER, FILE_PATHS, QUARTER, args.quarter
80+
)
6081
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
82+
QUARTER = args.quarter
6183
args.logger = LOGGER
6284
args.paths = PATHS
6385
return args
6486

6587

66-
def check_for_data_file(file_path):
67-
if os.path.exists(file_path):
68-
raise shared.QuantifyingException(
69-
f"Processed data already exists for {QUARTER}", 0
70-
)
71-
72-
7388
def data_to_csv(args, data, file_path):
7489
if not args.enable_save:
7590
return
@@ -98,7 +113,6 @@ def process_highest_language_usage(args, count_data):
98113
file_path = shared.path_join(
99114
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
100115
)
101-
check_for_data_file(file_path)
102116
data_to_csv(args, top_10, file_path)
103117

104118

@@ -122,7 +136,6 @@ def process_least_language_usage(args, count_data):
122136
file_path = shared.path_join(
123137
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
124138
)
125-
check_for_data_file(file_path)
126139
data_to_csv(args, bottom_10, file_path)
127140

128141

@@ -149,14 +162,14 @@ def process_language_representation(args, count_data):
149162
file_path = shared.path_join(
150163
PATHS["data_phase"], "wikipedia_language_representation.csv"
151164
)
152-
check_for_data_file(file_path)
153165
data_to_csv(args, language_counts, file_path)
154166

155167

156168
def main():
157169
args = parse_arguments()
158170
shared.paths_log(LOGGER, PATHS)
159171
shared.git_fetch_and_merge(args, PATHS["repo"])
172+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
160173
file_count = shared.path_join(
161174
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
162175
)

scripts/3-report/gcs_report.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import sys
1010
import textwrap
1111
import traceback
12+
from pathlib import Path
1213

1314
# Third-party
1415
from pygments import highlight
@@ -27,7 +28,8 @@
2728

2829
# Constants
2930
QUARTER = os.path.basename(PATHS["data_quarter"])
30-
SECTION = "Google Custom Search (GCS)"
31+
SECTION_FILE = Path(__file__).name
32+
SECTION_TITLE = "Google Custom Search (GCS)"
3133

3234

3335
def parse_arguments():
@@ -83,7 +85,8 @@ def gcs_intro(args):
8385
total_count = f"{data['Count'].sum():,d}"
8486
shared.update_readme(
8587
args,
86-
SECTION,
88+
SECTION_FILE,
89+
SECTION_TITLE,
8790
"Overview",
8891
None,
8992
None,
@@ -137,7 +140,8 @@ def plot_products(args):
137140

138141
shared.update_readme(
139142
args,
140-
SECTION,
143+
SECTION_FILE,
144+
SECTION_TITLE,
141145
title,
142146
image_path,
143147
"Plots showing Creative Commons (CC) legal tool product totals and"
@@ -180,7 +184,8 @@ def plot_tool_status(args):
180184

181185
shared.update_readme(
182186
args,
183-
SECTION,
187+
SECTION_FILE,
188+
SECTION_TITLE,
184189
title,
185190
image_path,
186191
"Plots showing Creative Commons (CC) legal tool status totals and"
@@ -223,7 +228,8 @@ def plot_latest_tools(args):
223228

224229
shared.update_readme(
225230
args,
226-
SECTION,
231+
SECTION_FILE,
232+
SECTION_TITLE,
227233
title,
228234
image_path,
229235
"Plots showing latest Creative Commons (CC) legal tool totals and"
@@ -265,7 +271,8 @@ def plot_prior_tools(args):
265271

266272
shared.update_readme(
267273
args,
268-
SECTION,
274+
SECTION_FILE,
275+
SECTION_TITLE,
269276
title,
270277
image_path,
271278
"Plots showing prior Creative Commons (CC) legal tool totals and"
@@ -311,7 +318,8 @@ def plot_retired_tools(args):
311318

312319
shared.update_readme(
313320
args,
314-
SECTION,
321+
SECTION_FILE,
322+
SECTION_TITLE,
315323
title,
316324
image_path,
317325
"Plots showing retired Creative Commons (CC) legal tools total and"
@@ -360,7 +368,8 @@ def plot_countries_highest_usage(args):
360368

361369
shared.update_readme(
362370
args,
363-
SECTION,
371+
SECTION_FILE,
372+
SECTION_TITLE,
364373
title,
365374
image_path,
366375
"Plots showing countries with the highest useage of the latest"
@@ -413,7 +422,8 @@ def plot_languages_highest_usage(args):
413422

414423
shared.update_readme(
415424
args,
416-
SECTION,
425+
SECTION_FILE,
426+
SECTION_TITLE,
417427
title,
418428
image_path,
419429
"Plots showing languages with the highest useage of the latest"
@@ -460,7 +470,8 @@ def plot_free_culture(args):
460470

461471
shared.update_readme(
462472
args,
463-
SECTION,
473+
SECTION_FILE,
474+
SECTION_TITLE,
464475
title,
465476
image_path,
466477
"Plots showing Approved for Free Cultural Works legal tool usage.",

0 commit comments

Comments
 (0)