Skip to content

Commit 4b69281

Browse files
authored
Merge branch 'main' into ngc92/seed
2 parents 35785a9 + 598ea6d commit 4b69281

File tree

21 files changed

+575
-497
lines changed

21 files changed

+575
-497
lines changed

.github/workflows/amd_workflow.yml

Lines changed: 20 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,60 @@
11
name: AMD PyTorch Job
2-
32
on:
43
workflow_dispatch:
54
inputs:
6-
script_content:
7-
description: 'Content of Python script'
8-
required: true
9-
type: string
10-
filename:
11-
description: 'Name of Python script'
5+
payload:
6+
description: 'Content of the user submission, as json string'
127
required: true
138
type: string
14-
reference_content:
15-
description: 'Content of the reference code script (optional)'
16-
required: false
17-
type: string
18-
reference_filename:
19-
description: 'Name of reference script (supports .py or .cu)'
20-
required: false
21-
type: string
22-
eval_content:
23-
description: 'Content of the outer eval code script (optional)'
24-
required: false
25-
type: string
26-
eval_filename:
27-
description: 'Name of outer eval script (supports .py or .cu)'
9+
requirements:
10+
description: 'Contents for a requirements.txt file'
2811
required: false
2912
type: string
3013

3114
jobs:
32-
train:
15+
run:
3316
runs-on: [amdgpu-mi250-x86-64]
3417
timeout-minutes: 10
3518
env:
3619
VENV_DIR: /groups/aig_sharks/pytorch_venv
3720
steps:
21+
- uses: actions/checkout@v3
3822
- name: Setup Python
3923
uses: actions/setup-python@v5
4024
with:
4125
python-version: '3.10'
4226

43-
- name: Create script
44-
shell: python
45-
run: |
46-
with open('${{ github.event.inputs.filename }}', 'w') as f:
47-
f.write('''${{ github.event.inputs.script_content }}''')
48-
49-
- name: Create reference scripts if provided
50-
shell: bash
51-
run: |
52-
if [[ -n "${{ github.event.inputs.reference_filename }}" ]]; then
53-
echo "Creating reference script..."
54-
cat > "${{ github.event.inputs.reference_filename }}" <<EOL
55-
${{ github.event.inputs.reference_content }}
56-
EOL
57-
cat "${{ github.event.inputs.reference_filename }}" # Debug: Show file contents
58-
else
59-
echo "No reference content provided."
60-
fi
6127

62-
- name: Create eval scripts if provided
28+
- name: Create input files
6329
shell: bash
6430
run: |
65-
if [[ -n "${{ github.event.inputs.eval_filename }}" ]]; then
66-
echo "Creating reference script..."
67-
cat > "${{ github.event.inputs.eval_filename }}" <<EOL
68-
${{ github.event.inputs.eval_content }}
31+
cat > "payload.json" <<'EOL'
32+
${{ github.event.inputs.payload }}
6933
EOL
70-
cat "${{ github.event.inputs.eval_filename }}" # Debug: Show file contents
71-
else
72-
echo "No eval content provided."
73-
fi
7434
7535
- name: Setup Virtual Environment and Install Dependencies
36+
shell: bash
7637
run: |
7738
python -m venv ${VENV_DIR}
7839
source ${VENV_DIR}/bin/activate
7940
pip install --upgrade pip
80-
pip install --pre pytorch-triton-rocm==3.1.0+cf34004b8a torch==2.6.0.dev20241023+rocm6.2 --index-url https://download.pytorch.org/whl/nightly/rocm6.2
8141
42+
if [[ -n "${{ github.event.inputs.requirements }}" ]]; then
43+
cat > "requirements.txt" <<'EOL'
44+
${{ github.event.inputs.requirements }}
45+
EOL
46+
pip install -r "requirements.txt"
47+
fi
8248
- name: Run script
8349
shell: bash
8450
run: |
85-
if [[ -n "${{ github.event.inputs.eval_content }}" ]]; then
86-
echo "Running Python file..."
87-
python3 "${{ github.event.inputs.eval_filename }}" > training.log 2>&1
88-
cat training.log # Debug: show output
89-
else
90-
echo "Running Python file..."
91-
python3 "${{ github.event.inputs.filename }}" > training.log 2>&1
92-
cat training.log # Debug: show output
93-
fi
51+
python3 .github/workflows/runner.py
52+
cat result.json # Debug: show output
9453
9554
- name: Upload training artifacts
9655
uses: actions/upload-artifact@v4
9756
if: always()
9857
with:
99-
name: training-artifacts
58+
name: run-result
10059
path: |
101-
training.log
102-
${{ github.event.inputs.filename }}
60+
result.json

.github/workflows/nvidia_workflow.yml

Lines changed: 46 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -2,134 +2,68 @@ name: NVIDIA PyTorch/CUDA Job
22
on:
33
workflow_dispatch:
44
inputs:
5-
script_content:
6-
description: 'Content of Python/CUDA script (.py or .cu file)'
5+
payload:
6+
description: 'Content of the user submission, as json string'
77
required: true
88
type: string
9-
filename:
10-
description: 'Name of script (supports .py or .cu)'
11-
required: true
12-
type: string
13-
reference_content:
14-
description: 'Content of the reference code script (optional)'
15-
required: false
16-
type: string
17-
reference_filename:
18-
description: 'Name of reference script (supports .py or .cu)'
19-
required: false
20-
type: string
21-
eval_content:
22-
description: 'Content of the outer eval code script (optional)'
23-
required: false
24-
type: string
25-
eval_filename:
26-
description: 'Name of outer eval script (supports .py or .cu)'
9+
requirements:
10+
description: 'Contents for a requirements.txt file'
2711
required: false
2812
type: string
2913

3014
jobs:
31-
train:
15+
run:
3216
runs-on: [gpumode-nvidia-arc]
3317
timeout-minutes: 10
3418
container:
3519
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
3620
steps:
37-
- name: Setup Python
38-
uses: actions/setup-python@v5
39-
with:
40-
python-version: '3.10'
41-
42-
- name: Install uv
43-
uses: astral-sh/setup-uv@v3
44-
with:
45-
version: "latest"
46-
47-
- name: Setup Python environment
48-
run: |
49-
uv venv .venv
50-
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
51-
echo "$PWD/.venv/bin" >> $GITHUB_PATH
52-
53-
- name: Create script file
54-
shell: bash
55-
run: |
56-
cat << 'EOL' > ${{ github.event.inputs.filename }}
57-
${{ github.event.inputs.script_content }}
58-
EOL
59-
cat ${{ github.event.inputs.filename }} # Debug: show file contents
21+
- uses: actions/checkout@v3
22+
23+
- name: Setup Python
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: '3.10'
27+
28+
- name: Create input files
29+
shell: bash
30+
run: |
31+
cat > "payload.json" <<'EOL'
32+
${{ github.event.inputs.payload }}
33+
EOL
6034
61-
- name: Create reference scripts if provided
62-
shell: bash
63-
run: |
64-
if [[ -n "${{ github.event.inputs.reference_filename }}" ]]; then
65-
echo "Creating reference script..."
66-
cat > "${{ github.event.inputs.reference_filename }}" <<EOL
67-
${{ github.event.inputs.reference_content }}
68-
EOL
69-
cat "${{ github.event.inputs.reference_filename }}" # Debug: Show file contents
70-
else
71-
echo "No reference content provided."
72-
fi
35+
- name: Install uv
36+
uses: astral-sh/setup-uv@v3
37+
with:
38+
version: "latest"
7339

74-
- name: Create eval scripts if provided
75-
shell: bash
76-
run: |
77-
if [[ -n "${{ github.event.inputs.eval_filename }}" ]]; then
78-
echo "Creating eval script..."
79-
cat > "${{ github.event.inputs.eval_filename }}" <<EOL
80-
${{ github.event.inputs.eval_content }}
81-
EOL
82-
cat "${{ github.event.inputs.eval_filename }}" # Debug: Show file contents
83-
else
84-
echo "No eval content provided."
85-
fi
40+
- name: Setup Python environment
41+
shell: bash
42+
run: |
43+
uv venv .venv
44+
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
45+
echo "$PWD/.venv/bin" >> $GITHUB_PATH
8646
87-
- name: Install dependencies
88-
run: |
89-
if grep -rE "(import torch|from torch)" "${{ github.event.inputs.filename }}"; then
90-
echo "PyTorch detected, installing torch"
91-
uv pip install numpy torch setuptools ninja
92-
fi
93-
if grep -rE "(import triton|from triton)" "${{ github.event.inputs.filename }}"; then
94-
echo "Triton detected, installing triton"
95-
uv pip install triton
96-
fi
47+
if [[ -n "${{ github.event.inputs.requirements }}" ]]; then
48+
cat > "requirements.txt" <<'EOL'
49+
${{ github.event.inputs.requirements }}
50+
EOL
51+
uv pip install -r "requirements.txt"
52+
fi
9753
98-
- name: Run script
99-
shell: bash
100-
run: |
101-
# Check if eval content exists without trying to evaluate it
102-
if [ -f "${{ github.event.inputs.eval_filename }}" ]; then
103-
if [[ "${{ github.event.inputs.eval_filename }}" == *.cu ]]; then
104-
echo "Compiling and running CUDA files..."
105-
CUDA_FILES="${{ github.event.inputs.eval_filename }}"
106-
echo "Compiling: $CUDA_FILES"
107-
nvcc $CUDA_FILES -o cuda_program
108-
./cuda_program > training.log 2>&1
109-
else
110-
echo "Running Python file..."
111-
python3 "${{ github.event.inputs.eval_filename }}" > training.log 2>&1
112-
fi
113-
else
114-
if [[ "${{ github.event.inputs.filename }}" == *.cu ]]; then
115-
echo "Compiling and running CUDA file..."
116-
nvcc "${{ github.event.inputs.filename }}" -o cuda_program
117-
./cuda_program > training.log 2>&1
118-
else
119-
echo "Running Python file..."
120-
python3 "${{ github.event.inputs.filename }}" > training.log 2>&1
121-
fi
122-
fi
123-
cat training.log # Debug: show output
54+
- name: Run script
55+
shell: bash
56+
run: |
57+
python .github/workflows/runner.py
58+
cat result.json # Debug: show output
12459
125-
- name: Upload training artifacts
126-
uses: actions/upload-artifact@v4
127-
if: always()
128-
with:
129-
name: training-artifacts
130-
path: |
131-
training.log
132-
${{ github.event.inputs.filename }}
60+
- name: Upload training artifacts
61+
uses: actions/upload-artifact@v4
62+
if: always()
63+
with:
64+
name: run-result
65+
path: |
66+
result.json
13367
13468
env:
13569
CUDA_VISIBLE_DEVICES: 0

.github/workflows/runner.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import json
2+
import sys
3+
from dataclasses import asdict
4+
from pathlib import Path
5+
6+
sys.path.append("src/discord-cluster-manager")
7+
8+
from leaderboard_eval import cu_eval, py_eval
9+
from run_eval import run_cuda_script, run_pytorch_script
10+
11+
config = json.loads(Path("payload.json").read_text()) # type: dict
12+
Path("payload.json").unlink()
13+
14+
if config["lang"] == "cu":
15+
comp, run = run_cuda_script(
16+
{"eval.cu": cu_eval},
17+
{key: config[key] for key in ["reference.cuh", "submission.cuh"] if key in config},
18+
arch=None,
19+
)
20+
result = {"compile": asdict(comp), "run": asdict(run)}
21+
else:
22+
run = run_pytorch_script(
23+
{
24+
"eval.py": py_eval,
25+
**{key: config[key] for key in ["reference.py", "submission.py"] if key in config},
26+
},
27+
main="eval.py",
28+
arch=None,
29+
)
30+
result = {"run": asdict(run)}
31+
32+
Path("result.json").write_text(json.dumps(result))

docs/docs/creating-a-leaderboard/cuda-creations.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Let's break down what's going on in this relatively short file:
5656
#include <iostream>
5757

5858
#include "reference.cuh"
59-
#include "train.cuh"
59+
#include "submission.cuh"
6060

6161
#define WARMUP_RUNS 10
6262
#define TIMED_RUNS 100
@@ -118,7 +118,7 @@ int main() {
118118
return 0;
119119
}
120120
```
121-
You'll notice that we include from headers named `reference.cuh` and `train.cuh`. These are the reference
121+
You'll notice that we include from headers named `reference.cuh` and `submission.cuh`. These are the reference
122122
code and submission code respectively, just renamed to a fix module so we can include them. The
123123
general idea is that the evaluation code can treat the leaderboard as a basic abstraction, and only
124124
concern itself with three things:

docs/docs/creating-a-leaderboard/python-creations.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Let's break down what's going on in this relatively short file:
5353
import torch
5454
import time
5555
from reference import ref_kernel, generate_input, check_implementation
56-
from train import custom_kernel
56+
from submission import custom_kernel
5757

5858

5959
def correctness() -> bool:

examples/identity_py/reference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,6 @@ def generate_input(seed: int) -> List[torch.Tensor]:
4343

4444

4545
if __name__ == "__main__":
46-
inputs = generate_input(seed=42)
46+
inputs = generate_input()
4747
for idx, tensor in enumerate(inputs):
4848
print(f"Input Tensor {idx + 1} (Shape: {tensor.shape}):\n{tensor}")

0 commit comments

Comments
 (0)