@@ -114,7 +114,7 @@ def compile_cuda_script( # # noqa: C901
114114 )
115115
116116
117- def run_cuda_program (args : list [str ]) -> RunResult :
117+ def run_program (args : list [str ]) -> RunResult :
118118 # set up a pipe so the tester can communicate its verdict with us
119119 env = os .environ .copy ()
120120 pipe_read , pipe_write = os .pipe ()
@@ -142,7 +142,9 @@ def run_cuda_program(args: list[str]) -> RunResult:
142142 result_dict [key .strip ()] = value .strip ()
143143
144144 return RunResult (
145- success = run_process .returncode == 0 ,
145+ # TODO should we return 0 also on test failure?
146+ # TODO check what return codes python uses, e.g. on uncaught exception
147+ success = (run_process .returncode == 0 or run_process .returncode == 1 ),
146148 command = _make_cmd (run_process .args ),
147149 stdout = run_process .stdout ,
148150 stderr = run_process .stderr ,
@@ -206,7 +208,7 @@ def run_cuda_script( # # noqa: C901
206208 result = {},
207209 )
208210
209- run_result = run_cuda_program (["./eval.out" ])
211+ run_result = run_program (["./eval.out" ])
210212 return compile_result , run_result
211213
212214 finally :
@@ -221,9 +223,9 @@ def run_pytorch_script( # noqa: C901
221223 reference_content : Optional [str ] = None ,
222224 submission_content : Optional [str ] = None ,
223225 arch : int = None ,
224- ) -> tuple [ str , float ] :
226+ ) -> RunResult :
225227 """
226- Executes the provided PyTorch GPU kernel in an isolated environment with a timeout
228+ Executes the provided PyTorch GPU kernel in an isolated environment
227229
228230 Args:
229231 script_content: The PyTorch script containing the GPU kernel to benchmark
@@ -247,33 +249,8 @@ def run_pytorch_script( # noqa: C901
247249 with open ("eval.py" , "w" ) as f :
248250 f .write (script_content )
249251
250- execution_start_time = time .perf_counter ()
251- result = subprocess .run (
252- ["python" , "eval.py" ],
253- stdout = subprocess .PIPE ,
254- stderr = subprocess .PIPE ,
255- text = True ,
256- )
257-
258- if result .returncode != 0 :
259- raise RuntimeError (
260- "Script execution failed with return code "
261- + f"{ result .returncode } :\n { result .stderr } "
262- )
263-
264- score = None
265- for line in result .stdout .splitlines ():
266- if line .startswith ("score:" ):
267- score = float (line .split (":" )[1 ].strip ())
268- return "score" , score
269-
270- if score is None :
271- execution_end_time = time .perf_counter ()
272- score = execution_end_time - execution_start_time
252+ return run_program (["python" , "eval.py" ])
273253
274- return result .stdout , score
275- except Exception as e :
276- return f"Error executing script: { str (e )} " , 0.0
277254 finally :
278255 tmp_files = ["eval.py" , "reference.py" , "train.py" ]
279256 for f in tmp_files :
0 commit comments