Allow arbitrary sets of files to be used in python and cuda runner code (#132)

ngc92 · alexzhang13 · web-flow · commit 598ea6db73fb · 2025-01-15T12:59:12.000-05:00
* allow arbitrary sources and headers

* fixed runners and merge multi-file with new runners

* fix runner

to squash, fix arguments for testing runner

to squash, fix arguments for testing runner

to squash, fix arguments for testing runner

for now, fix to train until we fix naming

train -&gt; submission

---------

Co-authored-by: alexzhang13 &lt;alex.lx.zhang@gmail.com&gt;
diff --git a/.github/workflows/runner.py b/.github/workflows/runner.py
@@ -13,17 +13,18 @@
 
 if config["lang"] == "cu":
     comp, run = run_cuda_script(
-        config.get("eval.cu", cu_eval),
-        config.get("reference.cuh", None),
-        config.get("submission.cuh", None),
+        {"eval.cu": cu_eval},
+        {key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
         arch=None,
     )
     result = {"compile": asdict(comp), "run": asdict(run)}
 else:
     run = run_pytorch_script(
-        config.get("eval.py", py_eval),
-        config.get("reference.py", None),
-        config.get("submission.py", None),
+        {
+            "eval.py": py_eval,
+            **{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
+        },
+        main="eval.py",
         arch=None,
     )
     result = {"run": asdict(run)}
diff --git a/docs/docs/creating-a-leaderboard/cuda-creations.md b/docs/docs/creating-a-leaderboard/cuda-creations.md
@@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
 #include <iostream>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100
@@ -118,7 +118,7 @@ int main() {
     return 0;
 }
 ```
-You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
+You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
 code and submission code respectively, just renamed to a fix module so we can include them. The
 general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
 concern itself with three things:
diff --git a/docs/docs/creating-a-leaderboard/python-creations.md b/docs/docs/creating-a-leaderboard/python-creations.md
@@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
 import torch
 import time
 from reference import ref_kernel, generate_input, check_implementation
-from train import custom_kernel
+from submission import custom_kernel
 
 
 def correctness() -> bool:
diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -20,13 +20,15 @@ def test_does_not_compile():
     output_t custom_kernel(input_tt data) {   }
     """
 
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
+    )
     assert comp.success is False
     assert run.success is False
     assert comp.nvcc_found is True
     assert comp.exit_code != ExitCode.SUCCESS
     assert comp.stdout == ""
-    assert 'train.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
+    assert 'submission.cuh(2): error: identifier "input_tt" is undefined' in comp.stderr
     assert '1 error detected in the compilation of "eval.cu".' in comp.stderr
     assert comp.command.startswith("/usr/local/cuda/bin/nvcc")
     assert "nvcc: NVIDIA (R) Cuda compiler driver" in comp.nvcc_version
@@ -52,7 +54,9 @@ def test_cuda_runtime_error():
 }
 
     """
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is False
     assert run.command == "./eval.out"
@@ -80,7 +84,9 @@ def test_cuda_validation_fail():
     }
 
         """
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is True
     assert run.passed is False
@@ -95,7 +101,9 @@ def test_cuda_validation_fail():
 def test_cuda_correct():
     sub = Path("examples/identity_cuda/submission.cuh").read_text()
 
-    comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
+    comp, run = run_cuda_script(
+        {"eval.cu": cu_eval}, {"reference.cuh": ref.read_text(), "submission.cuh": sub}, arch=None
+    )
     assert comp.success is True
     assert run.success is True
     assert "warming up..." in run.stdout
diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -20,7 +20,9 @@ def test_does_not_import():
     this is a syntax error
     """
 
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
+    )
     assert run.success is False
     assert run.exit_code != ExitCode.SUCCESS
     assert "IndentationError: unexpected indent\n" in run.stderr
@@ -33,7 +35,12 @@ def test_error():
 def custom_kernel(input):
     return [torch.zeros_like(i) for i in input]
         """
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub},
+        "eval.py",
+        arch=None,
+    )
     assert run.success is True
     assert run.passed is False
     assert run.command == "python eval.py"
@@ -47,7 +54,9 @@ def custom_kernel(input):
 def test_correct():
     sub = Path("examples/identity_py/submission.py").read_text()
 
-    run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
+    run = run_pytorch_script(
+        {"eval.py": py_eval, "reference.py": ref.read_text(), "submission.py": sub}, "eval.py"
+    )
     assert run.success is True
     assert "warming up..." in run.stdout
     assert run.exit_code == ExitCode.SUCCESS
diff --git a/scripts/local-test.py b/scripts/local-test.py
@@ -9,7 +9,11 @@
 ref = Path("examples/identity_cuda/reference.cuh")
 sub = Path("examples/identity_cuda/submission.cuh")
 
-cout, score = run_cuda_script(cu_eval, ref.read_text(), sub.read_text(), arch=None)
+cout, score = run_cuda_script(
+    {"eval.cu": cu_eval},
+    {"reference.cuh": ref.read_text(), "submission.cuh": sub.read_text()},
+    arch=None,
+)
 print(cout)
 print(score)
 exit(0 if score > 0 else 1)
diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py
@@ -59,7 +59,7 @@ async def run_modal(
                 "**Running on Modal...**\n> ⏳ Waiting for available GPU..."
             )
 
-            filename = "train.py" if script.filename.endswith(".py") else "train.cu"
+            filename = "submission.py" if script.filename.endswith(".py") else "train.cu"
             reference_content = None
             if reference_script is not None or reference_code is not None:
                 reference_content = (
diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py
@@ -67,7 +67,7 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum:
 MODAL_PATH = "/tmp/dcs/"
 MODAL_EVAL_CODE_PATH = "/tmp/dcs/eval.py"
 MODAL_REFERENCE_CODE_PATH = "/tmp/dcs/reference.py"
-MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/train.py"
+MODAL_SUBMISSION_CODE_PATH = "/tmp/dcs/submission.py"
 
 
 # Compilation flags for Modal
diff --git a/src/discord-cluster-manager/eval.cu b/src/discord-cluster-manager/eval.cu
@@ -7,7 +7,7 @@
 #include <memory>
 
 #include "reference.cuh"
-#include "train.cuh"
+#include "submission.cuh"
 
 #define WARMUP_RUNS 10
 #define TIMED_RUNS 100
diff --git a/src/discord-cluster-manager/eval.py b/src/discord-cluster-manager/eval.py
@@ -5,7 +5,7 @@
 
 import torch
 from reference import check_implementation, generate_input, ref_kernel
-from train import custom_kernel
+from submission import custom_kernel
 
 
 class PopcornLogger:
diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py
@@ -20,7 +20,12 @@
 
 # Move this to another file later:
 python_image = Image.debian_slim(python_version="3.10").pip_install(
-    ["torch", "triton", "jax[cuda12]", "jax2torch"]
+    [
+        "torch",
+        "triton",
+        "jax[cuda12]",
+        "jax2torch",
+    ]
 )
 
 cuda_image = (
@@ -80,10 +85,12 @@ def modal_run_pytorch_script(  # noqa: C901
     try:
         with timeout(timeout_seconds):
             run_result = run_pytorch_script(
-                script_content=script_content,
-                reference_content=reference_content,
-                submission_content=submission_content,
-                arch=arch,
+                {
+                    "eval.py": script_content,
+                    "reference.py": reference_content,
+                    "submission.py": submission_content,
+                },
+                "eval.py",
             )
             return FullResult(success=True, error="", compile=None, run=run_result)
         # TODO fixup error handling!
@@ -106,9 +113,8 @@ def modal_run_cuda_script(  # # noqa: C901
     try:
         with timeout(timeout_seconds):
             comp, run = run_cuda_script(
-                script_content,
-                reference_content=reference_content,
-                submission_content=submission_content,
+                {"eval.cu": script_content},
+                {"reference.cuh": reference_content, "submission.cuh": submission_content},
                 arch=arch,
                 include_dirs=MODAL_CUDA_INCLUDE_DIRS,
             )
diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py
@@ -3,7 +3,7 @@
 import shlex
 import subprocess
 import time
-from typing import Optional
+from pathlib import Path
 
 from consts import CUDA_FLAGS, ExitCode
 
@@ -169,19 +169,19 @@ def run_program(args: list[str]) -> RunResult:
 
 
 def run_cuda_script(  # # noqa: C901
-    script_content: str,
-    reference_content: str = None,
-    submission_content: str = None,
+    sources: dict[str, str],
+    headers: dict[str, str] = None,
     arch: int = None,
     include_dirs: list[str] = None,
 ) -> tuple[CompileResult, RunResult]:
     """
     Executes the provided CUDA kernel in an isolated environment
 
     Args:
-        script_content: The CUDA script containing the GPU kernel
-        reference_content: The (optional) reference code, used for leaderboards.
-        submission_content: The (optional) submission code, used for leaderboards.
+        sources: The source files to compile. Mapping file name to content.
+        headers: Additional header files to create for the compile run.
+            Mapping of file name to file contents. These files will _not_ be added to the
+            compile command.
         arch: The arch code for the compute/sm versions. If None, native arch is used.
         include_dirs: Additional include directories, e.g., for thunderkittens/cutlass etc
 
@@ -193,19 +193,14 @@ def run_cuda_script(  # # noqa: C901
 
     try:
         # Write submission files to directory
-        if reference_content is not None:
-            with open("reference.cuh", "w") as f:
-                f.write(reference_content)
+        for source, content in sources.items():
+            Path(source).write_text(content)
 
-        if submission_content is not None:
-            with open("train.cuh", "w") as f:
-                f.write(submission_content)
-
-        with open("eval.cu", "w") as f:
-            f.write(script_content)
+        for header, content in headers.items():
+            Path(header).write_text(content)
 
         compile_result = compile_cuda_script(
-            files=["eval.cu"],
+            files=list(sources.keys()),
             arch=arch,
             include_dirs=include_dirs,
             verbose=True,
@@ -226,48 +221,54 @@ def run_cuda_script(  # # noqa: C901
         run_result = run_program(["./eval.out"])
         return compile_result, run_result
 
+    # cleaning up all source files _before_ we let the user code run, just in
+    # case there's something in there that the user isn't supposed to snoop
     finally:
-        tmp_files = ["reference.cuh", "train.cuh", "eval.cu", "eval.out"]
+        tmp_files = list(sources.keys()) + list(headers.keys())
         for f in tmp_files:
             if os.path.exists(f):
                 os.remove(f)
 
+    if not compile_result.success:
+        return compile_result, RunResult(
+            success=False,
+            command="",
+            stdout="",
+            stderr="",
+            exit_code=-1,
+            duration=0.0,
+            result={},
+        )
+
+    run_result = run_program(["./eval.out"])
+    return compile_result, run_result
+
 
 def run_pytorch_script(  # noqa: C901
-    script_content: str,
-    reference_content: Optional[str] = None,
-    submission_content: Optional[str] = None,
+    sources: dict[str, str],
+    main: str,
     arch: int = None,
 ) -> RunResult:
     """
     Executes the provided PyTorch GPU kernel in an isolated environment
 
     Args:
-        script_content: The PyTorch script containing the GPU kernel to benchmark
-        reference_content: The (optional) reference code, used for leaderboards.
-        submission_content: The (optional) submission code, used for leaderboards.
+        sources: Files to generate
+        main: Which file to run. Must be one of the keys in sources.
         arch: The arch code for the compute/sm versions.
 
     Returns:
         RunResult
     """
     try:
-        # Write submission files to directory
-        if reference_content is not None:
-            with open("reference.py", "w") as f:
-                f.write(reference_content)
-
-        if submission_content is not None:
-            with open("train.py", "w") as f:
-                f.write(submission_content)
-
-        with open("eval.py", "w") as f:
-            f.write(script_content)
+        assert main in sources.keys()
 
-        return run_program(["python", "eval.py"])
+        # Write submission files to directory
+        for source, content in sources.items():
+            Path(source).write_text(content)
+        return run_program(["python", main])
 
     finally:
-        tmp_files = ["eval.py", "reference.py", "train.py"]
-        for f in tmp_files:
+        for f in sources.keys():
             if os.path.exists(f):
                 os.remove(f)

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ async def run_modal(`
`59`	`59`	`"Running on Modal...\n> ⏳ Waiting for available GPU..."`
`60`	`60`	`)`
`61`	`61`
`62`		`- filename = "train.py" if script.filename.endswith(".py") else "train.cu"`
	`62`	`+ filename = "submission.py" if script.filename.endswith(".py") else "train.cu"`
`63`	`63`	`reference_content = None`
`64`	`64`	`if reference_script is not None or reference_code is not None:`
`65`	`65`	`reference_content = (`