Skip to content

Commit 5738825

Browse files
committed
generate_input receives a random seed argument
1 parent 598ea6d commit 5738825

File tree

5 files changed

+50
-39
lines changed

5 files changed

+50
-39
lines changed

examples/identity_cuda/reference.cuh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <cstdlib>
77
#include <cmath>
88
#include <array>
9+
#include <random>
910
#include <iostream>
1011

1112
#define N_SIZES 10
@@ -15,13 +16,16 @@ const int Ns[N_SIZES] = {128, 256, 512, 1024, 2048,
1516
using input_t = std::array<std::vector<float>, N_SIZES>;
1617
using output_t = input_t;
1718

18-
input_t generate_input() {
19+
input_t generate_input(int seed) {
20+
std::mt19937 rng(seed);
1921
input_t data;
2022

23+
std::uniform_real_distribution<float> dist(0, 1);
24+
2125
for (int i = 0; i < N_SIZES; ++i) {
2226
data[i].resize(Ns[i]);
2327
for (int j = 0; j < Ns[i]; ++j) {
24-
data[i][j] = static_cast<float>(rand()) / RAND_MAX;
28+
data[i][j] = dist(rng);
2529
}
2630
}
2731

examples/identity_py/reference.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def ref_kernel(xs: List[torch.Tensor]) -> List[torch.Tensor]:
1616
return xs
1717

1818

19-
def generate_input() -> List[torch.Tensor]:
19+
def generate_input(seed: int) -> List[torch.Tensor]:
2020
"""
2121
Generates random input tensor of the specified shape.
2222
Returns:
@@ -34,8 +34,10 @@ def generate_input() -> List[torch.Tensor]:
3434
device = torch.device("cpu")
3535

3636
tensors = []
37+
rng = torch.Generator(device=device)
38+
rng.manual_seed(seed)
3739
for shape in shapes:
38-
tensors.append(torch.randn(shape, device=device))
40+
tensors.append(torch.randn(shape, device=device, generator=rng))
3941

4042
return tensors
4143

src/discord-cluster-manager/eval.cu

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,24 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i
5252

5353
#define cuda_check(expr) cuda_check(expr, #expr, __FILE__, __LINE__, __FUNCTION__)
5454

55-
void measure_runtime(PopcornOutput& logger) {
55+
void measure_runtime(PopcornOutput& logger, std::mt19937& rng) {
5656
std::cout << "warming up..." << std::endl;
5757

58-
for (int i = 0; i < WARMUP_RUNS; i++) {
59-
auto data = generate_input();
60-
// discard result; this is just warmup, we don't care what it returns
61-
(void)custom_kernel(data);
58+
{
59+
auto warmup_data = generate_input(rng());
60+
for (int i = 0; i < WARMUP_RUNS; i++) {
61+
// discard result; this is just warmup, we don't care what it returns
62+
(void)custom_kernel(warmup_data);
63+
cuda_check(cudaDeviceSynchronize());
64+
}
6265
}
63-
cuda_check(cudaDeviceSynchronize());
6466

6567
std::vector<std::int64_t> durations;
6668
durations.reserve(TIMED_RUNS);
6769

6870
for (int i = 0; i < TIMED_RUNS; i++) {
69-
auto data = generate_input();
71+
auto data = generate_input(rng());
72+
7073
// make a copy of the input data to be used by the reference implementation
7174
auto copy = data;
7275

@@ -124,7 +127,15 @@ int main() {
124127
return 111;
125128
}
126129

127-
auto data = generate_input();
130+
// get the seed
131+
const char *seed_str = std::getenv("POPCORN_SEED");
132+
int seed = 42;
133+
if (seed_str) {
134+
seed = std::stoi(output_fd);
135+
}
136+
137+
std::mt19937 rng(seed);
138+
auto data = generate_input(rng());
128139
auto reference_output = ref_kernel(data);
129140
auto submission_output = custom_kernel(data);
130141

@@ -133,6 +144,6 @@ int main() {
133144
return 112;
134145
}
135146

136-
measure_runtime(logger);
147+
measure_runtime(logger, rng);
137148
return 0;
138149
}

src/discord-cluster-manager/eval.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ def log(self, key: str, value):
1616
print(f"{key}: {value}\n", file=self.channel)
1717

1818

19-
def correctness() -> bool:
19+
def correctness(rng: torch.Generator) -> bool:
2020
for _ in range(10): # check multiple times
21-
inputs = generate_input()
22-
21+
inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())
2322
custom_output = custom_kernel(inputs)
2423
ref_output = ref_kernel(inputs)
2524

@@ -30,22 +29,22 @@ def correctness() -> bool:
3029
return True
3130

3231

33-
def metric(logger: PopcornLogger):
32+
def metric(logger: PopcornLogger, rng: torch.Generator):
3433
warmup_runs = 10
3534
timed_runs = 100
3635

3736
# Warmup Code
3837
print("warming up...")
3938
for _ in range(warmup_runs):
40-
inputs = generate_input()
39+
inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())
4140
_ = custom_kernel(inputs)
4241
torch.cuda.synchronize()
4342

4443
# Timing Code
4544
times = []
4645

4746
for _ in range(timed_runs):
48-
inputs = generate_input()
47+
inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())
4948

5049
start_time = time.time()
5150
custom_output = custom_kernel(inputs)
@@ -82,10 +81,14 @@ def main():
8281
print(e, file=sys.stderr)
8382
exit(111)
8483

85-
if not correctness():
84+
seed = int(os.environ.get("POPCORN_FD", 42))
85+
rng = torch.Generator()
86+
rng.manual_seed(seed)
87+
88+
if not correctness(rng):
8689
logger.log("check", "fail")
8790
exit(112)
88-
metric(logger)
91+
metric(logger, rng)
8992

9093

9194
if __name__ == "__main__":

src/discord-cluster-manager/run_eval.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def compile_cuda_script( # # noqa: C901
6363
arch: Architecture to compile for. If None, uses `native`
6464
include_dirs: additional include directories to supply to nvcc
6565
verbose: whether to print progress or be silent
66-
66+
seed: Seed value to use for generating test cases
6767
Returns:
6868
A `CompileResult` that summarizes the compilation process.
6969
@@ -125,11 +125,12 @@ def compile_cuda_script( # # noqa: C901
125125
)
126126

127127

128-
def run_program(args: list[str]) -> RunResult:
128+
def run_program(args: list[str], seed: int) -> RunResult:
129129
# set up a pipe so the tester can communicate its verdict with us
130130
env = os.environ.copy()
131131
pipe_read, pipe_write = os.pipe()
132132
env["POPCORN_FD"] = str(pipe_write)
133+
env["POPCORN_SEED"] = str(seed)
133134

134135
execution_start_time = time.perf_counter()
135136
run_process = subprocess.run(
@@ -173,6 +174,7 @@ def run_cuda_script( # # noqa: C901
173174
headers: dict[str, str] = None,
174175
arch: int = None,
175176
include_dirs: list[str] = None,
177+
seed: int = 42,
176178
) -> tuple[CompileResult, RunResult]:
177179
"""
178180
Executes the provided CUDA kernel in an isolated environment
@@ -184,6 +186,7 @@ def run_cuda_script( # # noqa: C901
184186
compile command.
185187
arch: The arch code for the compute/sm versions. If None, native arch is used.
186188
include_dirs: Additional include directories, e.g., for thunderkittens/cutlass etc
189+
seed: Random seed to initialize the RNG for testing
187190
188191
Returns:
189192
tuple[CompileResult, RunResult]: CUDA compile/eval result information
@@ -218,9 +221,6 @@ def run_cuda_script( # # noqa: C901
218221
result={},
219222
)
220223

221-
run_result = run_program(["./eval.out"])
222-
return compile_result, run_result
223-
224224
# cleaning up all source files _before_ we let the user code run, just in
225225
# case there's something in there that the user isn't supposed to snoop
226226
finally:
@@ -229,25 +229,15 @@ def run_cuda_script( # # noqa: C901
229229
if os.path.exists(f):
230230
os.remove(f)
231231

232-
if not compile_result.success:
233-
return compile_result, RunResult(
234-
success=False,
235-
command="",
236-
stdout="",
237-
stderr="",
238-
exit_code=-1,
239-
duration=0.0,
240-
result={},
241-
)
242-
243-
run_result = run_program(["./eval.out"])
232+
run_result = run_program(["./eval.out"], seed=seed)
244233
return compile_result, run_result
245234

246235

247236
def run_pytorch_script( # noqa: C901
248237
sources: dict[str, str],
249238
main: str,
250239
arch: int = None,
240+
seed: int = 42,
251241
) -> RunResult:
252242
"""
253243
Executes the provided PyTorch GPU kernel in an isolated environment
@@ -256,6 +246,7 @@ def run_pytorch_script( # noqa: C901
256246
sources: Files to generate
257247
main: Which file to run. Must be one of the keys in sources.
258248
arch: The arch code for the compute/sm versions.
249+
seed: Random seed to initialize the RNG for testing
259250
260251
Returns:
261252
RunResult
@@ -266,7 +257,7 @@ def run_pytorch_script( # noqa: C901
266257
# Write submission files to directory
267258
for source, content in sources.items():
268259
Path(source).write_text(content)
269-
return run_program(["python", main])
260+
return run_program(["python", main], seed=seed)
270261

271262
finally:
272263
for f in sources.keys():

0 commit comments

Comments
 (0)