Skip to content

Commit d816892

Browse files
committed
added new run time evaluation script
1 parent 92c3520 commit d816892

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/usr/bin/env python3
2+
"""Compare 'Time taken' entries between two ORCID worker log files."""
3+
4+
import re
5+
import sys
6+
import statistics
7+
from collections import defaultdict
8+
9+
PATTERN = re.compile(
10+
r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} DEBUG\s+ORCID (\S+) Time taken: ([\d.]+)"
11+
)
12+
13+
14+
def parse_log(filepath):
15+
"""Extract Time taken values grouped by ORCID ID."""
16+
groups = defaultdict(list)
17+
with open(filepath) as f:
18+
for line in f:
19+
m = PATTERN.match(line)
20+
if m:
21+
orcid_id = m.group(1)
22+
value = float(m.group(2))
23+
groups[orcid_id].append(value)
24+
return groups
25+
26+
27+
def print_stats(label, values):
28+
"""Print descriptive statistics for a list of values."""
29+
values_sorted = sorted(values)
30+
n = len(values_sorted)
31+
mean = statistics.mean(values_sorted)
32+
median = statistics.median(values_sorted)
33+
print(f" {label}:")
34+
print(f" Count: {n}")
35+
print(f" Min: {values_sorted[0]:.3f}s")
36+
print(f" Median: {median:.3f}s")
37+
print(f" Mean: {mean:.3f}s")
38+
print(f" Max: {values_sorted[-1]:.3f}s")
39+
return {"count": n, "min": values_sorted[0], "median": median, "mean": mean, "max": values_sorted[-1]}
40+
41+
42+
def main():
43+
before_file = sys.argv[1] if len(sys.argv) > 1 else "orcid-before.log"
44+
after_file = sys.argv[2] if len(sys.argv) > 2 else "orcid-after.log"
45+
46+
before = parse_log(before_file)
47+
after = parse_log(after_file)
48+
49+
all_orcids = sorted(set(list(before.keys()) + list(after.keys())))
50+
51+
before_all = []
52+
after_all = []
53+
for orcid in all_orcids:
54+
before_all.extend(before.get(orcid, []))
55+
after_all.extend(after.get(orcid, []))
56+
57+
print("=" * 60)
58+
print("BEFORE vs AFTER comparison (ORCID worker)")
59+
print("=" * 60)
60+
61+
# Per-ORCID stats
62+
for orcid in all_orcids:
63+
print(f"\n--- {orcid} ---")
64+
b_stats = a_stats = None
65+
if orcid in before:
66+
b_stats = print_stats("Before", before[orcid])
67+
else:
68+
print(" Before: (no data)")
69+
if orcid in after:
70+
a_stats = print_stats("After", after[orcid])
71+
else:
72+
print(" After: (no data)")
73+
74+
if b_stats and a_stats:
75+
print(f" Change in median: {a_stats['median'] - b_stats['median']:+.3f}s ({(a_stats['median'] / b_stats['median'] - 1) * 100:+.1f}%)")
76+
print(f" Change in mean: {a_stats['mean'] - b_stats['mean']:+.3f}s ({(a_stats['mean'] / b_stats['mean'] - 1) * 100:+.1f}%)")
77+
78+
# Overall stats
79+
print(f"\n{'=' * 60}")
80+
print("ALL ORCID IDs COMBINED")
81+
print("=" * 60)
82+
b_total = a_total = None
83+
if before_all:
84+
b_total = print_stats("Before", before_all)
85+
else:
86+
print(" Before: (no data)")
87+
if after_all:
88+
a_total = print_stats("After", after_all)
89+
else:
90+
print(" After: (no data)")
91+
if b_total and a_total:
92+
print(f" Change in median: {a_total['median'] - b_total['median']:+.3f}s ({(a_total['median'] / b_total['median'] - 1) * 100:+.1f}%)")
93+
print(f" Change in mean: {a_total['mean'] - b_total['mean']:+.3f}s ({(a_total['mean'] / b_total['mean'] - 1) * 100:+.1f}%)")
94+
95+
96+
if __name__ == "__main__":
97+
main()

0 commit comments

Comments
 (0)