Skip to content

Commit 6d030d0

Browse files
committed
extended test for python implementation + adjusted exit codes for consistency
1 parent b1cedc2 commit 6d030d0

File tree

7 files changed

+136
-46
lines changed

7 files changed

+136
-46
lines changed

.github/workflows/cuda_test.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

.github/workflows/runner_ci.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Runner CI
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
jobs:
10+
check-cuda:
11+
runs-on: [gpumode-nvidia-arc]
12+
timeout-minutes: 10
13+
container:
14+
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
15+
steps:
16+
- uses: actions/checkout@v3
17+
18+
- name: Setup Python
19+
uses: actions/setup-python@v5
20+
with:
21+
python-version: '3.10'
22+
23+
- name: Install pytest
24+
shell: bash
25+
run: pip install pytest
26+
27+
- name: Run script
28+
shell: bash
29+
run: pytest scripts/ci_test_cuda.py
30+
31+
env:
32+
CUDA_VISIBLE_DEVICES: 0
33+
34+
check-pytorch:
35+
runs-on: [gpumode-nvidia-arc]
36+
timeout-minutes: 10
37+
container:
38+
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
39+
steps:
40+
- uses: actions/checkout@v3
41+
42+
- name: Setup Python
43+
uses: actions/setup-python@v5
44+
with:
45+
python-version: '3.10'
46+
47+
- name: Install uv
48+
uses: astral-sh/setup-uv@v3
49+
with:
50+
version: "latest"
51+
52+
- name: Setup Python environment
53+
run: |
54+
uv venv .venv
55+
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
56+
echo "$PWD/.venv/bin" >> $GITHUB_PATH
57+
uv pip install numpy torch setuptools ninja pytest
58+
59+
- name: Run script
60+
shell: bash
61+
run: pytest scripts/ci_test_python.py
62+
63+
env:
64+
CUDA_VISIBLE_DEVICES: 0
65+

scripts/ci_test_cuda.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ def test_cuda_runtime_error():
5555
assert run.success is False
5656
assert run.command == "./eval.out"
5757
assert "warming up..." in run.stdout
58-
assert "cudaDeviceSynchronize() at eval.cu(64) in `measure_runtime`" in run.stderr
58+
assert "cudaDeviceSynchronize() at eval.cu(63) in `measure_runtime`" in run.stderr
5959
assert "an illegal memory access was encountered" in run.stderr
60-
assert run.exit_code == 3
60+
assert run.exit_code == 110
6161
assert len(run.result) == 0
6262

6363

@@ -85,7 +85,7 @@ def test_cuda_validation_fail():
8585
# we never reach the benchmark part, because the test fails
8686
assert "warming up..." not in run.stdout
8787
assert "ERROR AT 0, 0" in run.stderr
88-
assert run.exit_code == 1
88+
assert run.exit_code == 112
8989
assert run.result["check"] == "fail"
9090

9191

scripts/ci_test_python.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import os
2+
import sys
3+
from pathlib import Path
4+
5+
if Path().resolve().name == "scripts":
6+
os.chdir("..")
7+
8+
sys.path.append("src/discord-cluster-manager")
9+
10+
from leaderboard_eval import py_eval
11+
from run_eval import run_pytorch_script
12+
13+
ref = Path("examples/identity_py/reference.py")
14+
15+
16+
def test_does_not_import():
17+
# input_tt is a typo, so this won't compile
18+
sub = """
19+
this is a syntax error
20+
"""
21+
22+
run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
23+
assert run.success is False
24+
assert run.exit_code == 1
25+
assert "IndentationError: unexpected indent\n" in run.stderr
26+
27+
28+
def test_error():
29+
# no-op, runs fine but isn't correct
30+
sub = """
31+
import torch
32+
def custom_kernel(input):
33+
return [torch.zeros_like(i) for i in input]
34+
"""
35+
run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
36+
assert run.success is False
37+
assert run.command == "python eval.py"
38+
# we never reach the benchmark part, because the test fails
39+
assert "warming up..." not in run.stdout
40+
assert "mismatch found! custom implementation doesnt match reference." in run.stdout
41+
assert run.exit_code == 112
42+
assert run.result["check"] == "fail"
43+
44+
45+
def test_correct():
46+
sub = Path("examples/identity_py/submission.py").read_text()
47+
48+
run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
49+
assert run.success is True
50+
assert "warming up..." in run.stdout
51+
assert run.exit_code == 0
52+
assert run.result["check"] == "pass"

src/discord-cluster-manager/eval.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i
4646
<< line << ") in `"
4747
<< function << "`: "
4848
<< cudaGetErrorString(status) << std::endl;
49-
// following pytest convention, exit code 3 means internal error
50-
std::exit(3);
49+
std::exit(110);
5150
}
5251
}
5352

@@ -83,7 +82,7 @@ void measure_runtime(PopcornOutput& logger) {
8382
auto reference_output = ref_kernel(copy);
8483
if (!check_implementation(submission_output, reference_output)) {
8584
logger.log("check", "fail");
86-
std::exit(1);
85+
std::exit(112);
8786
}
8887

8988
}
@@ -122,7 +121,7 @@ int main() {
122121
int fd = std::stoi(output_fd);
123122
logger.File.reset(::fdopen(fd, "w"));
124123
} else {
125-
return 4; // pytest: usage error
124+
return 111;
126125
}
127126

128127
auto data = generate_input();
@@ -131,7 +130,7 @@ int main() {
131130

132131
if (!check_implementation(submission_output, reference_output)) {
133132
logger.log("check", "fail");
134-
return 1;
133+
return 112;
135134
}
136135

137136
measure_runtime(logger);

src/discord-cluster-manager/eval.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import math
22
import os
3+
import sys
34
import time
45

56
import torch
@@ -56,7 +57,7 @@ def metric(logger: PopcornLogger):
5657
torch.cuda.synchronize()
5758
if not check_implementation(custom_output, ref_output):
5859
logger.log("check", "fail")
59-
exit(1)
60+
exit(112)
6061

6162
total_time = sum(times)
6263
average_duration = total_time / timed_runs
@@ -75,10 +76,15 @@ def metric(logger: PopcornLogger):
7576

7677

7778
def main():
78-
logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
79+
try:
80+
logger = PopcornLogger(int(os.environ["POPCORN_FD"]))
81+
except Exception as e:
82+
print(e, file=sys.stderr)
83+
exit(111)
84+
7985
if not correctness():
8086
logger.log("check", "fail")
81-
exit(1)
87+
exit(112)
8288
metric(logger)
8389

8490

src/discord-cluster-manager/run_eval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,10 @@ def run_program(args: list[str]) -> RunResult:
141141
key, _, value = line.partition(":")
142142
result_dict[key.strip()] = value.strip()
143143

144+
# 0 everything was fine
145+
# 112 program ran fine, but we detected a test failure
144146
return RunResult(
145-
# TODO should we return 0 also on test failure?
146-
# TODO check what return codes python uses, e.g. on uncaught exception
147-
success=(run_process.returncode == 0 or run_process.returncode == 1),
147+
success=run_process.returncode == 0,
148148
command=_make_cmd(run_process.args),
149149
stdout=run_process.stdout,
150150
stderr=run_process.stderr,

0 commit comments

Comments
 (0)