allow arbitrary sources and headers

ngc92 · ngc92 · commit 2366d78c31fa · 2025-01-14T18:10:25.000+02:00
diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -19,7 +19,9 @@ def test_does_not_compile():
     output_t custom_kernel(input_tt data) {   }
     """
 
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+    )
     assert comp.success is False
     assert run.success is False
     assert comp.nvcc_found is True
@@ -50,7 +52,9 @@ def test_cuda_runtime_error():
 }
 
     """
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is False
     assert run.command == "./eval.out"
@@ -78,7 +82,9 @@ def test_cuda_validation_fail():
     }
 
         """
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is False
     assert run.command == "./eval.out"
@@ -92,7 +98,9 @@ def test_cuda_validation_fail():
 def test_cuda_correct():
     sub = Path("examples/identity_cuda/submission.cuh").read_text()
 
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "train.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is True
     assert "warming up..." in run.stdout
diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -19,7 +19,9 @@ def test_does_not_import():
     this is a syntax error
     """
 
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+    )
     assert run.success is False
     assert run.exit_code == 1
     assert "IndentationError: unexpected indent\n" in run.stderr
@@ -32,7 +34,9 @@ def test_error():
 def custom_kernel(input):
     return [torch.zeros_like(i) for i in input]
         """
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+    )
     assert run.success is False
     assert run.command == "python eval.py"
     # we never reach the benchmark part, because the test fails
@@ -45,7 +49,9 @@ def custom_kernel(input):
 def test_correct():
     sub = Path("examples/identity_py/submission.py").read_text()
 
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "train.py": sub}, "eval.py"
+    )
     assert run.success is True
     assert "warming up..." in run.stdout
     assert run.exit_code == 0
diff --git a/scripts/local-test.py b/scripts/local-test.py
@@ -9,7 +9,11 @@
 ref = Path("examples/identity_cuda/reference.cuh")
 sub = Path("examples/identity_cuda/submission.cuh")
 
-cout, score = run_cuda_script(cu_eval, ref.read_text(), sub.read_text(), arch=None)
+cout, score = run_cuda_script(
+    {"eval.cu": cu_eval},
+    {"reference.cuh": ref.read_text(), "train.cuh": sub.read_text()},
+    arch=None,
+)
 print(cout)
 print(score)
 exit(0 if score > 0 else 1)
diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py
@@ -80,10 +80,8 @@ def modal_run_pytorch_script(  # noqa: C901
     try:
         with timeout(timeout_seconds):
             run_result = run_pytorch_script(
-                script_content=script_content,
-                reference_content=reference_content,
-                submission_content=submission_content,
-                arch=arch,
+                {"eval.py": script_content, "reference.py": reference_content, "train.py": submission_content},
+                "eval.py"
             )
             if not run_result.success:
                 # exit code 1 encodes failed tests
@@ -126,9 +124,8 @@ def modal_run_cuda_script(  # # noqa: C901
     try:
         with timeout(timeout_seconds):
             compile_result, run_result = run_cuda_script(
-                script_content,
-                reference_content=reference_content,
-                submission_content=submission_content,
+                {"eval.cu": script_content},
+                {"reference.cuh": reference_content, "train.cuh": submission_content},
                 arch=arch,
                 include_dirs=MODAL_CUDA_INCLUDE_DIRS,
             )
diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py
@@ -3,7 +3,7 @@
 import shlex
 import subprocess
 import time
-from typing import Optional
+from pathlib import Path
 
 from consts import CUDA_FLAGS
 
@@ -155,19 +155,19 @@ def run_program(args: list[str]) -> RunResult:
 
 
 def run_cuda_script(  # # noqa: C901
-    script_content: str,
-    reference_content: str = None,
-    submission_content: str = None,
+    sources: dict[str, str],
+    headers: dict[str, str] = None,
     arch: int = None,
     include_dirs: list[str] = None,
 ) -> tuple[CompileResult, RunResult]:
     """
     Executes the provided CUDA kernel in an isolated environment with a timeout
 
     Args:
-        script_content: The CUDA script containing the GPU kernel
-        reference_content: The (optional) reference code, used for leaderboards.
-        submission_content: The (optional) submission code, used for leaderboards.
+        sources: The source files to compile. Mapping file name to content.
+        headers: Additional header files to create for the compile run.
+            Mapping of file name to file contents. These files will _not_ be added to the
+            compile command.
         arch: The arch code for the compute/sm versions. If None, native arch is used.
         include_dirs: Additional include directories, e.g., for thunderkittens/cutlass etc
 
@@ -179,80 +179,66 @@ def run_cuda_script(  # # noqa: C901
 
     try:
         # Write submission files to directory
-        if reference_content is not None:
-            with open("reference.cuh", "w") as f:
-                f.write(reference_content)
+        for source, content in sources.items():
+            Path(source).write_text(content)
 
-        if submission_content is not None:
-            with open("train.cuh", "w") as f:
-                f.write(submission_content)
-
-        with open("eval.cu", "w") as f:
-            f.write(script_content)
+        for header, content in headers.items():
+            Path(header).write_text(content)
 
         compile_result = compile_cuda_script(
-            files=["eval.cu"],
+            files=list(sources.keys()),
             arch=arch,
             include_dirs=include_dirs,
             verbose=True,
         )
-
-        if not compile_result.success:
-            return compile_result, RunResult(
-                success=False,
-                command="",
-                stdout="",
-                stderr="",
-                exit_code=-1,
-                duration=0.0,
-                result={},
-            )
-
-        run_result = run_program(["./eval.out"])
-        return compile_result, run_result
-
+    # cleaning up all source files _before_ we let the user code run, just in
+    # case there's something in there that the user isn't supposed to snoop
     finally:
-        tmp_files = ["reference.cuh", "train.cuh", "eval.cu", "eval.out"]
+        tmp_files = list(sources.keys()) + list(headers.keys())
         for f in tmp_files:
             if os.path.exists(f):
                 os.remove(f)
 
+    if not compile_result.success:
+        return compile_result, RunResult(
+            success=False,
+            command="",
+            stdout="",
+            stderr="",
+            exit_code=-1,
+            duration=0.0,
+            result={},
+        )
+
+    run_result = run_program(["./eval.out"])
+    return compile_result, run_result
+
 
 def run_pytorch_script(  # noqa: C901
-    script_content: str,
-    reference_content: Optional[str] = None,
-    submission_content: Optional[str] = None,
+    sources: dict[str, str],
+    main: str,
     arch: int = None,
 ) -> RunResult:
     """
     Executes the provided PyTorch GPU kernel in an isolated environment
 
     Args:
-        script_content: The PyTorch script containing the GPU kernel to benchmark
-        reference_content: The (optional) reference code, used for leaderboards.
-        submission_content: The (optional) submission code, used for leaderboards.
+        sources: Files to generate
+        main: Which file to run. Must be one of the keys in sources.
         arch: The arch code for the compute/sm versions.
 
     Returns:
         tuple[str, float]: (Kernel output, execution time in milliseconds)
     """
     try:
-        # Write submission files to directory
-        if reference_content is not None:
-            with open("reference.py", "w") as f:
-                f.write(reference_content)
-
-        if submission_content is not None:
-            with open("train.py", "w") as f:
-                f.write(submission_content)
+        assert main in sources.keys()
 
-        with open("eval.py", "w") as f:
-            f.write(script_content)
-
-        return run_program(["python", "eval.py"])
+        # Write submission files to directory
+        for source, content in sources.items():
+            Path(source).write_text(content)
+        return run_program(["python", main])
 
     finally:
-        tmp_files = ["eval.py", "reference.py", "train.py"]
-        for f in tmp_files:
+        for f in sources.keys():
             if os.path.exists(f):
                 os.remove(f)