Skip to content

Commit ff11b0a

Browse files
authored
Merge pull request #273 from oree-xx/check_completion
Consolidate checks for completion file exists
2 parents f4ff30b + d2b1d75 commit ff11b0a

File tree

7 files changed

+29
-63
lines changed

7 files changed

+29
-63
lines changed

scripts/2-process/gcs_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def main():
317317
args = parse_arguments()
318318
shared.paths_log(LOGGER, PATHS)
319319
shared.git_fetch_and_merge(args, PATHS["repo"])
320-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
320+
shared.check_completion_file_exists(args, FILE_PATHS)
321321

322322
# Count data
323323
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def main():
136136
args = parse_arguments()
137137
shared.paths_log(LOGGER, PATHS)
138138
shared.git_fetch_and_merge(args, PATHS["repo"])
139-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
139+
shared.check_completion_file_exists(args, FILE_PATHS)
140140
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
141141
count_data = shared.open_data_file(
142142
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]

scripts/2-process/wikipedia_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def main():
158158
args = parse_arguments()
159159
shared.paths_log(LOGGER, PATHS)
160160
shared.git_fetch_and_merge(args, PATHS["repo"])
161-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
161+
shared.check_completion_file_exists(args, FILE_PATHS)
162162
file_count = shared.path_join(
163163
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
164164
)

scripts/3-report/gcs_report.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def parse_arguments():
3636
"""
3737
Parses command-line arguments, returns parsed arguments.
3838
"""
39+
global QUARTER
3940
LOGGER.info("Parsing command-line arguments")
4041
parser = argparse.ArgumentParser(description=__doc__)
4142
parser.add_argument(
@@ -70,28 +71,12 @@ def parse_arguments():
7071
if args.quarter != QUARTER:
7172
global PATHS
7273
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
74+
QUARTER = args.quarter
7375
args.logger = LOGGER
7476
args.paths = PATHS
7577
return args
7678

7779

78-
def check_report_completion(args):
79-
""" "
80-
The function checks for the last plot and image
81-
caption created in this script. This helps to
82-
immediately know if all plots in the script have
83-
been created and should not be regenerated.
84-
85-
"""
86-
if args.force:
87-
return
88-
last_entry = shared.path_join(PATHS["data_phase"], "gcs_free_culture.png")
89-
if os.path.exists(last_entry):
90-
raise shared.QuantifyingException(
91-
f"{last_entry} already exists. Report script completed", 0
92-
)
93-
94-
9580
def gcs_intro(args):
9681
"""
9782
Write Google Custom Search (GCS) introduction.
@@ -513,7 +498,8 @@ def main():
513498
args = parse_arguments()
514499
shared.paths_log(LOGGER, PATHS)
515500
shared.git_fetch_and_merge(args, PATHS["repo"])
516-
check_report_completion(args)
501+
last_entry = shared.path_join(PATHS["data_phase"], "gcs_free_culture.png")
502+
shared.check_completion_file_exists(args, last_entry)
517503
gcs_intro(args)
518504
plot_products(args)
519505
plot_tool_status(args)

scripts/3-report/github_report.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def parse_arguments():
3434
"""
3535
Parses command-line arguments, returns parsed arguments.
3636
"""
37+
global QUARTER
3738
LOGGER.info("Parsing command-line arguments")
3839
parser = argparse.ArgumentParser(description=__doc__)
3940
parser.add_argument(
@@ -68,30 +69,12 @@ def parse_arguments():
6869
if args.quarter != QUARTER:
6970
global PATHS
7071
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
72+
QUARTER = args.quarter
7173
args.logger = LOGGER
7274
args.paths = PATHS
7375
return args
7476

7577

76-
def check_report_completion(args):
77-
""" "
78-
The function checks for the last plot and image
79-
caption created in this script. This helps to
80-
immediately know if all plots in the script have
81-
been created and should not be regenerated.
82-
83-
"""
84-
if args.force:
85-
return
86-
last_entry = shared.path_join(
87-
PATHS["data_phase"], "github_restriction.png"
88-
)
89-
if os.path.exists(last_entry):
90-
raise shared.QuantifyingException(
91-
f"{last_entry} already exists. Report script completed", 0
92-
)
93-
94-
9578
def load_data(args):
9679
"""
9780
Load the collected data from the CSV file.
@@ -262,7 +245,10 @@ def main():
262245
args = parse_arguments()
263246
shared.paths_log(LOGGER, PATHS)
264247
shared.git_fetch_and_merge(args, PATHS["repo"])
265-
check_report_completion(args)
248+
last_entry = shared.path_join(
249+
PATHS["data_phase"], "github_restriction.png"
250+
)
251+
shared.check_completion_file_exists(args, last_entry)
266252
github_intro(args)
267253
plot_totals_by_license_type(args)
268254
plot_totals_by_restriction(args)

scripts/3-report/wikipedia_report.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def parse_arguments():
3434
"""
3535
Parses command-line arguments, returns parsed arguments.
3636
"""
37+
global QUARTER
3738
LOGGER.info("Parsing command-line arguments")
3839
parser = argparse.ArgumentParser(description=__doc__)
3940
parser.add_argument(
@@ -68,30 +69,12 @@ def parse_arguments():
6869
if args.quarter != QUARTER:
6970
global PATHS
7071
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
72+
QUARTER = args.quarter
7173
args.logger = LOGGER
7274
args.paths = PATHS
7375
return args
7476

7577

76-
def check_report_completion(args):
77-
""" "
78-
The function checks for the last plot and image
79-
caption created in this script. This helps to
80-
immediately know if all plots in the script have
81-
been created and should not be regenerated.
82-
83-
"""
84-
if args.force:
85-
return
86-
last_entry = shared.path_join(
87-
PATHS["data_phase"], "wikipedia_least_language_usage.png"
88-
)
89-
if os.path.exists(last_entry):
90-
raise shared.QuantifyingException(
91-
f"{last_entry} already exists. Report script completed", 0
92-
)
93-
94-
9578
def wikipedia_intro(args):
9679
"""
9780
Write Wikipedia introduction.
@@ -285,7 +268,10 @@ def main():
285268
args = parse_arguments()
286269
shared.paths_log(LOGGER, PATHS)
287270
shared.git_fetch_and_merge(args, PATHS["repo"])
288-
check_report_completion(args)
271+
last_entry = shared.path_join(
272+
PATHS["data_phase"], "wikipedia_least_language_usage.png"
273+
)
274+
shared.check_completion_file_exists(args, last_entry)
289275
wikipedia_intro(args)
290276
plot_language_representation(args)
291277
plot_highest_language_usage(args)

scripts/shared.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,21 @@ def data_to_csv(args, data, file_path):
4747
)
4848

4949

50-
def check_for_data_files(args, file_paths, QUARTER):
50+
def check_completion_file_exists(args, file_paths):
51+
""" "
52+
This function checks if expected output files
53+
exists. If any exist and --force is not provided,
54+
the script exits early by raising a QuantifyingException.
55+
In the case of a report file, we check if last output exists.
56+
"""
5157
if args.force:
5258
return
59+
if isinstance(file_paths, str):
60+
file_paths = [file_paths]
5361
for path in file_paths:
5462
if os.path.exists(path):
5563
raise QuantifyingException(
56-
f"Processed data already exists for {QUARTER}", 0
64+
f"Output files already exists for {args.quarter}", 0
5765
)
5866

5967

0 commit comments

Comments
 (0)