intel · xuhancn · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/cmake/SYCLTLA.cmake b/cmake/SYCLTLA.cmake
@@ -26,7 +26,7 @@ if(NOT __SYCLTLA_INCLUDED)
   FetchContent_Declare(
       repo-sycl-tla
       GIT_REPOSITORY https://github.com/intel/sycl-tla.git
-      GIT_TAG        v0.6
+      GIT_TAG        v0.7
       GIT_SHALLOW    OFF
   )
   FetchContent_GetProperties(repo-sycl-tla)

diff --git a/src/ATen/CMakeLists.txt b/src/ATen/CMakeLists.txt
@@ -10,7 +10,7 @@
 
 file(GLOB xpu_native_cpp "native/xpu/*.cpp" "native/sparse/*.cpp" "native/sparse/xpu/*.cpp" "native/nested/*.cpp" "native/nested/xpu/*.cpp" "native/transformers/*.cpp" "native/quantized/*.cpp" "native/transformers/xpu/flash_attn/*.cpp")
 file(GLOB xpu_sycl "native/xpu/sycl/*.cpp" "native/sparse/xpu/sycl/*.cpp" "native/nested/xpu/sycl/*.cpp" "native/transformers/sycl/*.cpp" "native/quantized/sycl/*.cpp")
-file(GLOB xpu_sycltla "native/transformers/xpu/flash_attn/sycltla/*.cpp")
+file(GLOB xpu_sycltla "native/transformers/xpu/flash_attn/sycltla/*.cpp" "native/xpu/sycltla/*.cpp")
 
 if(USE_ONEMKL_XPU)
   file(GLOB xpu_mkl "native/xpu/mkl/*.cpp")
@@ -44,6 +44,7 @@ install_xpu_headers("native/quantized/xpu/sycl")
 install_xpu_headers("native/sparse/xpu")
 install_xpu_headers("native/sparse/xpu/sycl")
 install_xpu_headers("native/transformers/xpu")
+install_xpu_headers("native/xpu/sycltla")
 install_xpu_headers("native/transformers/xpu/flash_attn")
 
 if(xpu_ops_generated_headers)

diff --git a/src/ATen/native/xpu/GroupedMM.cpp b/src/ATen/native/xpu/GroupedMM.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include <ATen/native/xpu/GroupedMM.h>
+
+#ifdef USE_SYCLTLA
+#include <ATen/native/xpu/sycltla/GroupedMM.h>
+#endif
+
+namespace at::native::xpu {
+
+bool is_grouped_mm_available() {
+#ifdef USE_SYCLTLA
+  return true;
+#else
+  return false;
+#endif
+}
+
+void bf16bf16_grouped_mm(
+    at::Tensor mat_a,
+    at::Tensor mat_b,
+    std::optional<at::Tensor> offs,
+    std::optional<at::Tensor> bias,
+    at::Tensor& out) {
+#ifdef USE_SYCLTLA
+  at::xpu::detail::bf16bf16_grouped_mm(mat_a, mat_b, offs, bias, out);
+#else
+  TORCH_CHECK(
+      false,
+      "bf16bf16_grouped_mm: torch-xpu-ops was not compiled with SYCLTLA support.");
+#endif
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/GroupedMM.h b/src/ATen/native/xpu/GroupedMM.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+
+namespace at::native::xpu {
+
+bool is_grouped_mm_available();
+
+void bf16bf16_grouped_mm(
+    at::Tensor mat_a,
+    at::Tensor mat_b,
+    std::optional<at::Tensor> offs,
+    std::optional<at::Tensor> bias,
+    at::Tensor& out);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/ScaledGroupedMM.cpp b/src/ATen/native/xpu/ScaledGroupedMM.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include <ATen/native/xpu/ScaledGroupedMM.h>
+
+#ifdef USE_SYCLTLA
+#include <ATen/native/xpu/sycltla/ScaledGroupedMM.h>
+#endif
+
+namespace at::native::xpu {
+
+bool is_scaled_grouped_mm_available() {
+#ifdef USE_SYCLTLA
+  return true;
+#else
+  return false;
+#endif
+}
+
+void f8f8bf16_scaled_grouped_mm(
+    at::Tensor mat_a,
+    at::Tensor mat_b,
+    at::Tensor scale_a,
+    at::Tensor scale_b,
+    std::optional<at::Tensor> offs,
+    at::Tensor& out) {
+#ifdef USE_SYCLTLA
+  at::xpu::detail::f8f8bf16_scaled_grouped_mm(
+      mat_a, mat_b, scale_a, scale_b, offs, out);
+#else
+  TORCH_CHECK(
+      false,
+      "f8f8bf16_scaled_grouped_mm: torch-xpu-ops was not compiled with SYCLTLA support.");
+#endif
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/ScaledGroupedMM.h b/src/ATen/native/xpu/ScaledGroupedMM.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+
+namespace at::native::xpu {
+
+bool is_scaled_grouped_mm_available();
+
+void f8f8bf16_scaled_grouped_mm(
+    at::Tensor mat_a,
+    at::Tensor mat_b,
+    at::Tensor scale_a,
+    at::Tensor scale_b,
+    std::optional<at::Tensor> offs,
+    at::Tensor& out);
+
+} // namespace at::native::xpu