Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/SYCLTLA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if(NOT __SYCLTLA_INCLUDED)
FetchContent_Declare(
repo-sycl-tla
GIT_REPOSITORY https://github.com/intel/sycl-tla.git
GIT_TAG v0.6
GIT_TAG v0.7
GIT_SHALLOW OFF
)
FetchContent_GetProperties(repo-sycl-tla)
Expand Down
3 changes: 2 additions & 1 deletion src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

file(GLOB xpu_native_cpp "native/xpu/*.cpp" "native/sparse/*.cpp" "native/sparse/xpu/*.cpp" "native/nested/*.cpp" "native/nested/xpu/*.cpp" "native/transformers/*.cpp" "native/quantized/*.cpp" "native/transformers/xpu/flash_attn/*.cpp")
file(GLOB xpu_sycl "native/xpu/sycl/*.cpp" "native/sparse/xpu/sycl/*.cpp" "native/nested/xpu/sycl/*.cpp" "native/transformers/sycl/*.cpp" "native/quantized/sycl/*.cpp")
file(GLOB xpu_sycltla "native/transformers/xpu/flash_attn/sycltla/*.cpp")
file(GLOB xpu_sycltla "native/transformers/xpu/flash_attn/sycltla/*.cpp" "native/xpu/sycltla/*.cpp")

if(USE_ONEMKL_XPU)
file(GLOB xpu_mkl "native/xpu/mkl/*.cpp")
Expand Down Expand Up @@ -44,6 +44,7 @@ install_xpu_headers("native/quantized/xpu/sycl")
install_xpu_headers("native/sparse/xpu")
install_xpu_headers("native/sparse/xpu/sycl")
install_xpu_headers("native/transformers/xpu")
install_xpu_headers("native/xpu/sycltla")
install_xpu_headers("native/transformers/xpu/flash_attn")

if(xpu_ops_generated_headers)
Expand Down
42 changes: 42 additions & 0 deletions src/ATen/native/xpu/GroupedMM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/

#include <ATen/native/xpu/GroupedMM.h>

#ifdef USE_SYCLTLA
#include <ATen/native/xpu/sycltla/GroupedMM.h>
#endif

namespace at::native::xpu {

bool is_grouped_mm_available() {
#ifdef USE_SYCLTLA
return true;
#else
return false;
#endif
}

void bf16bf16_grouped_mm(
at::Tensor mat_a,
at::Tensor mat_b,
std::optional<at::Tensor> offs,
std::optional<at::Tensor> bias,
at::Tensor& out) {
#ifdef USE_SYCLTLA
at::xpu::detail::bf16bf16_grouped_mm(mat_a, mat_b, offs, bias, out);
#else
TORCH_CHECK(
false,
"bf16bf16_grouped_mm: torch-xpu-ops was not compiled with SYCLTLA support.");
#endif
}

} // namespace at::native::xpu
26 changes: 26 additions & 0 deletions src/ATen/native/xpu/GroupedMM.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/

#pragma once

#include <ATen/ATen.h>

namespace at::native::xpu {

bool is_grouped_mm_available();

void bf16bf16_grouped_mm(
at::Tensor mat_a,
at::Tensor mat_b,
std::optional<at::Tensor> offs,
std::optional<at::Tensor> bias,
at::Tensor& out);

} // namespace at::native::xpu
44 changes: 44 additions & 0 deletions src/ATen/native/xpu/ScaledGroupedMM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/

#include <ATen/native/xpu/ScaledGroupedMM.h>

#ifdef USE_SYCLTLA
#include <ATen/native/xpu/sycltla/ScaledGroupedMM.h>
#endif

namespace at::native::xpu {

bool is_scaled_grouped_mm_available() {
#ifdef USE_SYCLTLA
return true;
#else
return false;
#endif
}

void f8f8bf16_scaled_grouped_mm(
at::Tensor mat_a,
at::Tensor mat_b,
at::Tensor scale_a,
at::Tensor scale_b,
std::optional<at::Tensor> offs,
at::Tensor& out) {
#ifdef USE_SYCLTLA
at::xpu::detail::f8f8bf16_scaled_grouped_mm(
mat_a, mat_b, scale_a, scale_b, offs, out);
#else
TORCH_CHECK(
false,
"f8f8bf16_scaled_grouped_mm: torch-xpu-ops was not compiled with SYCLTLA support.");
#endif
}

} // namespace at::native::xpu
27 changes: 27 additions & 0 deletions src/ATen/native/xpu/ScaledGroupedMM.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/

#pragma once

#include <ATen/ATen.h>

namespace at::native::xpu {

bool is_scaled_grouped_mm_available();

void f8f8bf16_scaled_grouped_mm(
at::Tensor mat_a,
at::Tensor mat_b,
at::Tensor scale_a,
at::Tensor scale_b,
std::optional<at::Tensor> offs,
at::Tensor& out);

} // namespace at::native::xpu
Loading
Loading