Skip to content

Commit e70a42b

Browse files
committed
Add USM (and therefore cg_streaming) support to the Kokkos backend.
1 parent e7558c6 commit e70a42b

File tree

16 files changed

+898
-113
lines changed

16 files changed

+898
-113
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @file
3+
* @author Alexander Van Craen
4+
* @author Marcel Breyer
5+
* @copyright 2018-today The PLSSVM project - All Rights Reserved
6+
* @license This file is part of the PLSSVM project which is released under the MIT license.
7+
* See the LICENSE.md file in the project root for full license information.
8+
*
9+
* @brief Function to list all available memory spaces at compile time.
10+
* @note Must be a separate file such that the Kokkos header must not be included in the "execution_space.hpp" file.
11+
*/
12+
13+
#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_MEMORY_SPACES_HPP_
14+
#define PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_MEMORY_SPACES_HPP_
15+
16+
#include "plssvm/backends/Kokkos/memory_space.hpp" // plssvm::kokkos::memory_space
17+
18+
#include <array> // std::array
19+
20+
namespace plssvm::kokkos::detail {
21+
22+
/**
23+
* @brief List all available Kokkos::MemorySpaces at compile time.
24+
* @details The `memory_space::host_space` is always available!
25+
* @return a `std::array` containing all available memory spaces (`[[nodiscard]]`)
26+
*/
27+
[[nodiscard]] inline constexpr auto constexpr_available_memory_spaces() noexcept {
28+
// Note: the trailing comma is explicitly allowed by the standard
29+
return std::array{
30+
memory_space::host_space,
31+
#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_CUDA)
32+
memory_space::cuda_space,
33+
memory_space::cuda_usm_space,
34+
#endif
35+
#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_HIP)
36+
memory_space::hip_space,
37+
memory_space::hip_usm_space,
38+
#endif
39+
#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_SYCL)
40+
memory_space::sycl_space,
41+
memory_space::sycl_usm_space,
42+
#endif
43+
};
44+
}
45+
46+
} // namespace plssvm::kokkos::detail
47+
48+
#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_MEMORY_SPACES_HPP_

include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp

Lines changed: 51 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_
1313
#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_
1414

15-
#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces
16-
#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper
17-
#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space
18-
#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp" // plssvm::kokkos::execution_space_to_kokkos_type_t
19-
#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t
15+
#include "plssvm/backends/Kokkos/detail/constexpr_available_memory_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_memory_spaces
16+
#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper
17+
#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space
18+
#include "plssvm/backends/Kokkos/memory_space.hpp" // plssvm::kokkos::memory_space
19+
#include "plssvm/backends/Kokkos/memory_space_type_traits.hpp" // plssvm::kokkos::{memory_space_to_kokkos_type_t, kokkos_execution_space_to_kokkos_memory_space_t}
20+
#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t
2021

21-
#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::ExecutionSpace
22+
#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::MemorySpace
2223

2324
#include <array> // std::array
2425
#include <cstddef> // std::size_t
@@ -38,27 +39,27 @@ struct create_view_variant_type_helper;
3839

3940
/**
4041
* @brief Helper struct to create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of
41-
* `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
42+
* `plssvm::kokkos::memory_space` values as returned by `plssvm::kokkos::detail::constexpr_available_memory_spaces()`.
4243
* @tparam T the value type of the underlying Kokkos::View
4344
* @tparam Is the indices to index the `std::array`
4445
*/
4546
template <typename T, std::size_t... Is>
4647
struct create_view_variant_type_helper<T, std::index_sequence<Is...>> {
47-
/// The array containing all available execution spaces.
48-
constexpr static auto array = detail::constexpr_available_execution_spaces();
48+
/// The array containing all available memory spaces.
49+
constexpr static auto array = detail::constexpr_available_memory_spaces();
4950
/// The resulting variant type.
50-
using type = std::variant<Kokkos::View<T, execution_space_to_kokkos_type_t<array[Is]>>...>;
51+
using type = std::variant<Kokkos::View<T, memory_space_to_kokkos_type_t<array[Is]>>...>;
5152
};
5253

5354
/**
5455
* @brief Create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of
55-
* `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
56+
* `plssvm::kokkos::memory_space` values as returned by `plssvm::kokkos::detail::constexpr_available_memory_spaces()`.
5657
* @tparam T the value type of the underlying Kokkos::View
5758
*/
5859
template <typename T>
5960
struct create_view_variant_type {
6061
/// The number of types in the final variant.
61-
constexpr static std::size_t N = detail::constexpr_available_execution_spaces().size();
62+
constexpr static std::size_t N = detail::constexpr_available_memory_spaces().size();
6263
/// The final variant type.
6364
using type = typename create_view_variant_type_helper<T, std::make_index_sequence<N>>::type;
6465
};
@@ -82,37 +83,49 @@ class device_view_wrapper {
8283

8384
/**
8485
* @brief Construct the wrapper using the provided Kokkos::View instance by forwarding its value to the underlying `std::variant`.
85-
* @tparam ExecutionSpace the used Kokkos::ExecutionSpace type of the Kokkos::View
86+
* @tparam MemorySpace the used Kokkos::MemorySpace type of the Kokkos::View
8687
* @param[in] view the Kokkos::View instance
8788
*/
88-
template <typename ExecutionSpace>
89-
explicit device_view_wrapper(Kokkos::View<T, ExecutionSpace> &&view) :
90-
v_{ std::move(view) } { }
89+
template <typename MemorySpace>
90+
explicit device_view_wrapper(Kokkos::View<T, MemorySpace> &&view, const bool use_usm_allocations = false) :
91+
v_{ std::move(view) },
92+
use_usm_allocations_{ use_usm_allocations } { }
9193

9294
/**
9395
* @brief Given the provided `execution_space` enum value, tries to get the `std::variant` alternative for the corresponding Kokkos::ExecutionSpace type.
9496
* @tparam space the `execution_space` enum value
97+
* @tparam use_usm_allocations if `true` use USM allocations
9598
* @return the Kokkos::View instance (`[[nodiscard]]`)
9699
*/
97-
template <execution_space space>
98-
[[nodiscard]] Kokkos::View<T, execution_space_to_kokkos_type_t<space>> &get() {
99-
return std::get<Kokkos::View<T, execution_space_to_kokkos_type_t<space>>>(v_);
100+
template <execution_space space, bool use_usm_allocations = false>
101+
[[nodiscard]] auto &get() {
102+
constexpr memory_space mem_space = execution_space_to_memory_space_v<space, use_usm_allocations>;
103+
return std::get<Kokkos::View<T, memory_space_to_kokkos_type_t<mem_space>>>(v_);
100104
}
101105

102106
/**
103107
* @copydoc plssvm::kokkos::detail::device_view_wrapper::get
104108
*/
105-
template <execution_space space>
106-
[[nodiscard]] const Kokkos::View<T, execution_space_to_kokkos_type_t<space>> &get() const {
107-
return std::get<Kokkos::View<T, execution_space_to_kokkos_type_t<space>>>(v_);
109+
template <execution_space space, bool use_usm_allocations = false>
110+
[[nodiscard]] const auto &get() const {
111+
constexpr memory_space mem_space = execution_space_to_memory_space_v<space, use_usm_allocations>;
112+
return std::get<Kokkos::View<T, memory_space_to_kokkos_type_t<mem_space>>>(v_);
108113
}
109114

110115
/**
111-
* @brief Return the `execution_space` enum value of the currently active `std::variant` Kokkos::View type.
112-
* @return the `execution_space` enum value (`[[nodiscard]]`)
116+
* @brief Return the `memory_space` enum value of the currently active `std::variant` Kokkos::View type.
117+
* @return the `memory_space` enum value (`[[nodiscard]]`)
113118
*/
114-
[[nodiscard]] execution_space get_execution_space() const noexcept {
115-
return detail::constexpr_available_execution_spaces()[v_.index()];
119+
[[nodiscard]] constexpr memory_space get_memory_space() const noexcept {
120+
return detail::constexpr_available_memory_spaces()[v_.index()];
121+
}
122+
123+
/**
124+
* @brief Check whether USM allocations are used.
125+
* @return `true` if USM allocations are used, `false` otherwise (`[[nodiscard]]`)
126+
*/
127+
[[nodiscard]] bool uses_usm_allocations() const noexcept {
128+
return use_usm_allocations_;
116129
}
117130

118131
/**
@@ -164,21 +177,29 @@ class device_view_wrapper {
164177
private:
165178
/// The wrapped `std::variant` type.
166179
variant_type v_;
180+
/// `true` if USM allocations and, therefore, other Kokkos::MemorySpaces, are used.
181+
bool use_usm_allocations_;
167182
};
168183

169184
/**
170185
* @brief Given a execution @p space and the number of elements @p size, creates a Kokkos::View in the respective memory space.
171186
* @tparam T the value type of the underlying Kokkos::View
172187
* @param[in] device the device for which this view should be allocated
173188
* @param[in] size the size of the Kokkos::View (number of elements **not** byte!)
174-
* @return a Kokkos::View wrapper where the active member of the internal `std::variant` corresponds to the Kokkos::View in the Kokkos::ExecutionSpace specified by @p space (`[[nodiscard]]`)
189+
* @param[in] use_usm_allocations decide whether a USM memory space should be used or not
190+
* @return a Kokkos::View wrapper where the active member of the internal `std::variant` corresponds to the Kokkos::View in the Kokkos::MemorySpace based on the requested Kokkos::ExecutionSpace and @p use_usm_allocations (`[[nodiscard]]`)
175191
*/
176192
template <typename T>
177-
[[nodiscard]] device_view_wrapper<T> make_device_view_wrapper(const device_wrapper &device, const std::size_t size) {
193+
[[nodiscard]] device_view_wrapper<T> make_device_view_wrapper(const device_wrapper &device, const std::size_t size, const bool use_usm_allocations) {
178194
return device.execute_and_return([&](const auto &value) {
195+
// get the Kokkos execution space
179196
using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(value)>;
180-
181-
return device_view_wrapper{ Kokkos::View<T, kokkos_execution_space_type>{ Kokkos::view_alloc(value, "device_ptr_view"), size } };
197+
// check whether we want to use USM allocations or not
198+
if (use_usm_allocations) {
199+
return device_view_wrapper{ Kokkos::View<T, kokkos_execution_space_to_kokkos_memory_space_t<kokkos_execution_space_type, true>>{ Kokkos::view_alloc(value, "usm_device_ptr_view"), size }, use_usm_allocations };
200+
} else {
201+
return device_view_wrapper{ Kokkos::View<T, kokkos_execution_space_to_kokkos_memory_space_t<kokkos_execution_space_type, false>>{ Kokkos::view_alloc(value, "device_ptr_view"), size }, use_usm_allocations };
202+
}
182203
});
183204
}
184205

include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
#define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_
1414
#pragma once
1515

16-
#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
17-
#include "plssvm/target_platforms.hpp" // plssvm::target_platform
16+
#include "plssvm/backends/Kokkos/memory_space_type_traits.hpp" // plssvm::kokkos::kokkos_execution_space_to_kokkos_memory_space_t
17+
#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
18+
#include "plssvm/target_platforms.hpp" // plssvm::target_platform
1819

1920
#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents
2021

@@ -25,15 +26,21 @@ namespace plssvm::kokkos::detail {
2526
/**
2627
* @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
2728
* @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
29+
* @tparam USMEnabledMemorySpace the Kokkos::MemorySpace that may use USM allocations
2830
* @tparam target the target platform
2931
*/
30-
template <typename ExecutionSpace, target_platform target>
32+
template <typename ExecutionSpace, typename USMEnabledMemorySpace, target_platform target>
3133
class device_kernel_symm {
34+
/**
35+
* @brief The type of the used Kokkos::View that may use USM allocations.
36+
*/
37+
template <typename T>
38+
using usm_device_view_type = Kokkos::View<T *, USMEnabledMemorySpace>; // possible USM allocations
3239
/**
3340
* @brief The type of the used Kokkos::View.
3441
*/
3542
template <typename T>
36-
using device_view_type = Kokkos::View<T *, ExecutionSpace>;
43+
using device_view_type = Kokkos::View<T *, kokkos_execution_space_to_kokkos_memory_space_t<ExecutionSpace, false>>; // no USM allocations
3744

3845
public:
3946
/**
@@ -51,7 +58,7 @@ class device_kernel_symm {
5158
* @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
5259
* @param[in] grid_size_x the size of the execution grid in x-dimension
5360
*/
54-
device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_num_rows, const std::size_t device_row_offset, const real_type alpha, device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
61+
device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_num_rows, const std::size_t device_row_offset, const real_type alpha, usm_device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
5562
num_rows_{ num_rows },
5663
num_rhs_{ num_rhs },
5764
device_num_rows_{ device_num_rows },
@@ -173,7 +180,7 @@ class device_kernel_symm {
173180
const std::size_t device_num_rows_;
174181
const std::size_t device_row_offset_;
175182
const real_type alpha_;
176-
device_view_type<const real_type> A_;
183+
usm_device_view_type<const real_type> A_;
177184
device_view_type<const real_type> B_;
178185
const real_type beta_;
179186
device_view_type<real_type> C_;
@@ -187,15 +194,21 @@ class device_kernel_symm {
187194
* @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
188195
* @details In a multi-GPU setting, this function is responsible for mirroring down the columns this device is responsible for!
189196
* @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
197+
* @tparam USMEnabledMemorySpace the Kokkos::MemorySpace that may use USM allocations
190198
* @tparam target the target platform
191199
*/
192-
template <typename ExecutionSpace, target_platform target>
200+
template <typename ExecutionSpace, typename USMEnabledMemorySpace, target_platform target>
193201
class device_kernel_symm_mirror {
202+
/**
203+
* @brief The type of the used Kokkos::View that may use USM allocations.
204+
*/
205+
template <typename T>
206+
using usm_device_view_type = Kokkos::View<T *, USMEnabledMemorySpace>; // possible USM allocations
194207
/**
195208
* @brief The type of the used Kokkos::View.
196209
*/
197210
template <typename T>
198-
using device_view_type = Kokkos::View<T *, ExecutionSpace>;
211+
using device_view_type = Kokkos::View<T *, kokkos_execution_space_to_kokkos_memory_space_t<ExecutionSpace, false>>; // no USM allocations
199212

200213
public:
201214
/**
@@ -214,7 +227,7 @@ class device_kernel_symm_mirror {
214227
* @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
215228
* @param[in] grid_size_x the size of the execution grid in x-dimension
216229
*/
217-
device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_num_rows, const std::size_t device_row_offset, const real_type alpha, device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
230+
device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_num_rows, const std::size_t device_row_offset, const real_type alpha, usm_device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
218231
num_rows_{ num_rows },
219232
num_rhs_{ num_rhs },
220233
num_mirror_rows_{ num_mirror_rows },
@@ -332,7 +345,7 @@ class device_kernel_symm_mirror {
332345
const std::size_t device_num_rows_;
333346
const std::size_t device_row_offset_;
334347
const real_type alpha_;
335-
device_view_type<const real_type> A_;
348+
usm_device_view_type<const real_type> A_;
336349
device_view_type<const real_type> B_;
337350
const real_type beta_;
338351
device_view_type<real_type> C_;
@@ -352,7 +365,7 @@ class device_kernel_inplace_matrix_add {
352365
* @brief The type of the used Kokkos::View.
353366
*/
354367
template <typename T>
355-
using device_view_type = Kokkos::View<T *, ExecutionSpace>;
368+
using device_view_type = Kokkos::View<T *, kokkos_execution_space_to_kokkos_memory_space_t<ExecutionSpace, false>>; // no USM allocations
356369

357370
public:
358371
/**
@@ -426,7 +439,7 @@ class device_kernel_inplace_matrix_scale {
426439
* @brief The type of the used Kokkos::View.
427440
*/
428441
template <typename T>
429-
using device_view_type = Kokkos::View<T *, ExecutionSpace>;
442+
using device_view_type = Kokkos::View<T *, kokkos_execution_space_to_kokkos_memory_space_t<ExecutionSpace, false>>; // no USM allocations
430443

431444
public:
432445
/**

0 commit comments

Comments
 (0)