fix RNG and GEMM issues

andrej · andrej · commit 33cc2a242a8f · 2025-11-18T09:14:13.000-07:00
diff --git a/applications/llama_3.2_1b/inference.py b/applications/llama_3.2_1b/inference.py
@@ -138,6 +138,16 @@ def inference(
     )
     logging.info("Model and tokenizer loaded.")
 
+    # Important: Set the seed again after initialization of the model. Each
+    # call that initializes an nn.Linear layer updates the RNG state, because
+    # weights are initialized with random values. For different JSON
+    # configurations, we initialize a different number of linear layers,
+    # so different configurations result in a different RNG state here. Since
+    # we use random numbers to sample from the token distribution during
+    # inference, it is important to have the same RNG state between runs so we
+    # can have reproducible results across configurations.
+    torch.manual_seed(1608560892)
+
     hook_handles = []
     if save_outputs:
         if os.path.exists(output_data_path):
diff --git a/applications/llama_3.2_1b/src/operator/aie_gemm.py b/applications/llama_3.2_1b/src/operator/aie_gemm.py
@@ -82,10 +82,19 @@ def __init__(
 
     def set_up(self):
         # Describe required artifacts (xclbin, insts.bin)
-        file_name_tile_base = f"{self.tile_m}x{self.tile_k}x{self.tile_n}"
+        # file_name_tile_base = f"{self.tile_m}x{self.tile_k}x{self.tile_n}"
         file_name_total_base = (
             f"{self.M}x{self.K}x{self.N}_{self.tile_m}x{self.tile_k}x{self.tile_n}"
         )
+        # FIXME: We should be able to reuse the same xclbin for same tile
+        # sizes, only swapping out the instruction sequence for different
+        # problem sizes. However, there seem to be cases where this does
+        # not work and the GEMM appears to be misconfigured for the wrong
+        # size (resulting in a timeout when trying to run it). Perhaps
+        # XRT is caching something, or something is wrong with the run-
+        # time parameter (synchronization)? For now, create separate
+        # xclbins for each problem size.
+        file_name_tile_base = file_name_total_base
         xclbin_kernel_name = f"gemm_{file_name_tile_base}"
         kernel_flags = [
             f"-DDIM_M={self.tile_m}",