From d232d4713ebc4ba756db602cb354b96922cabbaa Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 15 Jun 2026 09:41:45 +0800 Subject: [PATCH] fix(sa-eval): run cleanup on SKIP_* / exception paths evaluate_model has 5 early-return paths via _skip_result that bypassed the trailing cleanup_onnx_artifacts call, so any model that failed mid-pipeline (e.g. SKIP_GRAPH_OPT for LLMs whose graph_optimize step aborts) leaked multi-GB ONNX external-data files. One Qwen2.5-1.5B SKIP_GRAPH_OPT folder alone leaked 22.9 GB. Move the cleanup invocation from evaluate_model's success path into a try/finally in the main loop so --cleanup runs regardless of which return path is taken (SKIP_BUILD / SKIP_EXPORT / SKIP_GRAPH_OPT / SKIP_SA_PRE / SKIP_SA_POST or exception). --- scripts/e2e_eval/run_sa_eval.py | 40 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scripts/e2e_eval/run_sa_eval.py b/scripts/e2e_eval/run_sa_eval.py index ccf6edbb3..fc9bd671b 100644 --- a/scripts/e2e_eval/run_sa_eval.py +++ b/scripts/e2e_eval/run_sa_eval.py @@ -673,7 +673,6 @@ def evaluate_model( run_quantize: bool = True, quantize_precision: str = "int8", run_compile: bool = True, - cleanup: bool = False, ) -> dict | None: """Run the winml build + SA analysis pipeline for a single model.""" hf_id = model_entry["hf_id"] @@ -900,9 +899,6 @@ def _fmt(p: dict | None) -> str: out_file.write_text(json.dumps(result, indent=2, ensure_ascii=False), encoding="utf-8") safe_print(f" Written: {out_file}") - if cleanup: - cleanup_onnx_artifacts(model_dir) - return result @@ -1319,22 +1315,26 @@ def main() -> None: pass # Corrupted result file — re-run safe_print(f"\n[{i}/{len(models_to_run)}]") - result = evaluate_model( - entry, - output_dir, - use_cache=args.use_cache, - ep=args.ep, - device=args.device, - run_perf=not args.no_perf, - perf_iterations=args.perf_iterations, - perf_warmup=args.perf_warmup, - run_quantize=not args.no_quantize, - quantize_precision=args.quantize_precision, - run_compile=not args.no_compile, - cleanup=args.cleanup, - ) - if result: - all_results.append(result) + try: + result = evaluate_model( + entry, + output_dir, + use_cache=args.use_cache, + ep=args.ep, + device=args.device, + run_perf=not args.no_perf, + perf_iterations=args.perf_iterations, + perf_warmup=args.perf_warmup, + run_quantize=not args.no_quantize, + quantize_precision=args.quantize_precision, + run_compile=not args.no_compile, + ) + if result: + all_results.append(result) + finally: + # Cleanup must run on SKIP_* / exception paths too, not just success. + if args.cleanup: + cleanup_onnx_artifacts(model_dir) elapsed = time.monotonic() - t_start complete = [r for r in all_results if r.get("status") == "COMPLETE"]