Skip to content

Commit 8e4ad01

Browse files
yhmtsaiBenBrock
andauthored
create cuSPARSE backend and spmv (#40)
* add cusparse spmv and corresponding cmake * setup CMake to use the same test and example with thrust * add the cusparse example without thrust * Add cuSPARSE to CI. (#50) * add the cuSPARSE into README * accept list into add_device_test --------- Co-authored-by: Benjamin Brock <[email protected]>
1 parent 7f9de98 commit 8e4ad01

File tree

18 files changed

+519
-12
lines changed

18 files changed

+519
-12
lines changed

.github/workflows/ci.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,21 @@ jobs:
132132
shell: bash -l {0}
133133
run: |
134134
./build/test/gtest/spblas-tests
135+
136+
cusparse:
137+
runs-on: 'gpu_nvidia'
138+
steps:
139+
- uses: actions/checkout@v4
140+
- name: CMake
141+
shell: bash -l {0}
142+
run: |
143+
module load cmake
144+
cmake -B build -DENABLE_CUSPARSE=ON -DCMAKE_PREFIX_PATH=/usr/local/cuda/targets/x86_64-linux/lib/cmake
145+
- name: Build
146+
shell: bash -l {0}
147+
run: |
148+
make -C build -j `nproc`
149+
- name: Test
150+
shell: bash -l {0}
151+
run: |
152+
./build/test/gtest/spblas-tests

CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(CMAKE_CXX_FLAGS "-O3 -march=native")
88

99
option(ENABLE_SANITIZERS "Enable Clang sanitizers" OFF)
1010
option(ENABLE_ROCSPARSE "Enable rocSPARSE" OFF)
11+
option(ENABLE_CUSPARSE "Enable cuSPARSE" OFF)
1112

1213
# Get includes, which declares the `spblas` library
1314
add_subdirectory(include)
@@ -73,6 +74,13 @@ if (ENABLE_ROCSPARSE)
7374
set(CMAKE_HIP_FLAGS "${CMAKE_CXX_FLAGS}")
7475
endif()
7576

77+
if (ENABLE_CUSPARSE)
78+
set(SPBLAS_GPU_BACKEND ON)
79+
find_package(CUDAToolkit REQUIRED)
80+
target_link_libraries(spblas INTERFACE CUDA::cudart CUDA::cusparse CUDA::cublas)
81+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_CUSPARSE")
82+
endif()
83+
7684
# turn on/off debug logging
7785
if (LOG_LEVEL)
7886
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLOG_LEVEL=${LOG_LEVEL}") # SPBLAS_DEBUG | SPBLAS_WARNING | SPBLAS_TRACE | SPBLAS_INFO
@@ -137,6 +145,13 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
137145
GIT_TAG 11.1.3)
138146
FetchContent_MakeAvailable(fmt)
139147

148+
if (ENABLE_ROCSPARSE)
149+
find_package(rocthrust REQUIRED)
150+
elseif (ENABLE_CUSPARSE)
151+
# It is required to be compiled on the node with available NVIDIA GPU
152+
find_package(Thrust REQUIRED)
153+
thrust_create_target(Thrust)
154+
endif()
140155
add_subdirectory(examples)
141156
add_subdirectory(test)
142157
endif()

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ brock@slothius:~/src/spblas-reference$ CXX=g++-13 cmake -B build
104104

105105
### Compiling with a Vendor Backend
106106
A vendor backend can be enabled by passing in an `-DENABLE_{BACKEND}=ON` switch
107-
to `cmake`. Currently, oneMKL, ArmPL, and rocSPARSE are the supported vendor
107+
to `cmake`. Currently, oneMKL, ArmPL, rocSPARSE and cuSPARSE are the supported vendor
108108
backends.
109109

110110
### Compiling with oneMKL
@@ -141,6 +141,17 @@ have ROCm installed in a non-standard location.
141141
brock@slothius:~/src/spblas-reference$ cmake -B build -DENABLE_ROCSPARSE=ON -DCMAKE_PREFIX_PATH=/opt/rocm-6.1.2
142142
```
143143

144+
### Compiling with cuSPARSE
145+
In order to compile with cuSPARSE, CUDA must be installed and the install
146+
location of CUDA added to `CMAKE_PREFIX_PATH`. Your package manager will likely
147+
take care of this for you, but you can also manually specify the location if you
148+
have CUDA installed in a non-standard location.
149+
150+
```bash
151+
# Explicitly set the location of CUDA using `CMAKE_PREFIX_PATH`.
152+
brock@slothius:~/src/spblas-reference$ cmake -B build -DENABLE_CUSPARSE=ON -DCMAKE_PREFIX_PATH=/usr/local/cuda-12.6
153+
```
154+
144155
#### Compiling with GCC on Mac OS
145156
There is a known linking issue when compiling with GCC on recent versions of
146157
Mac OS. This will cause a link error inside of `ld::AtomPlacement::findAtom()`.

examples/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ if (NOT SPBLAS_GPU_BACKEND)
1111
add_example(matrix_opt_example)
1212
add_example(spmm_csc)
1313
else()
14-
find_package(rocthrust REQUIRED)
1514
add_subdirectory(device)
1615
endif()
1716

1817
if (ENABLE_ROCSPARSE)
1918
add_subdirectory(rocsparse)
2019
endif()
20+
if (ENABLE_CUSPARSE)
21+
add_subdirectory(cusparse)
22+
endif()

examples/cusparse/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
function(add_cuda_example example_name)
2+
add_executable(${example_name} ${example_name}.cpp)
3+
target_link_libraries(${example_name} spblas fmt)
4+
endfunction()
5+
6+
add_cuda_example(cusparse_simple_spmv)
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#include <iostream>
2+
#include <spblas/spblas.hpp>
3+
4+
#include <cuda_runtime.h>
5+
6+
#include "util.hpp"
7+
8+
#include <fmt/core.h>
9+
#include <fmt/ranges.h>
10+
11+
int main(int argc, char** argv) {
12+
using value_t = float;
13+
using index_t = spblas::index_t;
14+
using offset_t = spblas::offset_t;
15+
16+
index_t m = 100;
17+
index_t n = 100;
18+
index_t nnz_in = 10;
19+
20+
fmt::print("\n\t###########################################################"
21+
"######################");
22+
fmt::print("\n\t### Running SpMV Example:");
23+
fmt::print("\n\t###");
24+
fmt::print("\n\t### y = alpha * A * x");
25+
fmt::print("\n\t###");
26+
fmt::print("\n\t### with ");
27+
fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n,
28+
nnz_in);
29+
fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1);
30+
fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1);
31+
fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
32+
sizeof(spblas::index_t));
33+
fmt::print("\n\t###########################################################"
34+
"######################");
35+
fmt::print("\n");
36+
37+
auto&& [values, rowptr, colind, shape, nnz] =
38+
spblas::generate_csr<value_t, index_t, offset_t>(m, n, nnz_in);
39+
40+
value_t* d_values;
41+
offset_t* d_rowptr;
42+
index_t* d_colind;
43+
44+
CUDA_CHECK(cudaMalloc(&d_values, values.size() * sizeof(value_t)));
45+
CUDA_CHECK(cudaMalloc(&d_rowptr, rowptr.size() * sizeof(offset_t)));
46+
CUDA_CHECK(cudaMalloc(&d_colind, colind.size() * sizeof(index_t)));
47+
48+
CUDA_CHECK(cudaMemcpy(d_values, values.data(),
49+
values.size() * sizeof(value_t), cudaMemcpyDefault));
50+
CUDA_CHECK(cudaMemcpy(d_rowptr, rowptr.data(),
51+
rowptr.size() * sizeof(offset_t), cudaMemcpyDefault));
52+
CUDA_CHECK(cudaMemcpy(d_colind, colind.data(),
53+
colind.size() * sizeof(index_t), cudaMemcpyDefault));
54+
55+
spblas::csr_view<value_t, index_t, offset_t> a(d_values, d_rowptr, d_colind,
56+
shape, nnz);
57+
58+
// Scale every value of `a` by 5 in place.
59+
// scale(5.f, a);
60+
61+
std::vector<value_t> x(n, 1);
62+
std::vector<value_t> y(m, 0);
63+
64+
value_t* d_x;
65+
value_t* d_y;
66+
67+
CUDA_CHECK(cudaMalloc(&d_x, x.size() * sizeof(value_t)));
68+
CUDA_CHECK(cudaMalloc(&d_y, y.size() * sizeof(value_t)));
69+
70+
CUDA_CHECK(
71+
cudaMemcpy(d_x, x.data(), x.size() * sizeof(value_t), cudaMemcpyDefault));
72+
CUDA_CHECK(
73+
cudaMemcpy(d_y, y.data(), y.size() * sizeof(value_t), cudaMemcpyDefault));
74+
75+
std::span<value_t> x_span(d_x, n);
76+
std::span<value_t> y_span(d_y, m);
77+
78+
// y = A * x
79+
spblas::spmv_state_t state;
80+
spblas::multiply(state, a, x_span, y_span);
81+
82+
CUDA_CHECK(
83+
cudaMemcpy(y.data(), d_y, y.size() * sizeof(value_t), cudaMemcpyDefault));
84+
85+
fmt::print("\tExample is completed!\n");
86+
87+
return 0;
88+
}

examples/cusparse/util.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#pragma once
2+
3+
#include <cuda_runtime.h>
4+
5+
#define CUDA_CHECK(expression) \
6+
do { \
7+
const cudaError_t status = expression; \
8+
if (status != cudaSuccess) { \
9+
std::cerr << "CUDA error " << status << ": " \
10+
<< cudaGetErrorString(status) << " at " << __FILE__ << ":" \
11+
<< __LINE__ << std::endl; \
12+
} \
13+
} while (false)

examples/device/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
function(add_device_example example_name)
2+
add_executable(${example_name} ${example_name}.cpp)
23
if (ENABLE_ROCSPARSE)
34
set_source_files_properties(${example_name}.cpp PROPERTIES LANGUAGE HIP)
4-
# elseif (ENABLE_CUSPARSE)
5-
# cuSPARSE linking details will go here.
5+
elseif (ENABLE_CUSPARSE)
6+
target_link_libraries(${example_name} Thrust)
67
else()
78
message(FATAL_ERROR "Device backend not found.")
89
endif()
9-
add_executable(${example_name} ${example_name}.cpp)
1010
target_link_libraries(${example_name} spblas fmt)
1111
endfunction()
1212

include/spblas/backend/backend.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@
2121
#ifdef SPBLAS_ENABLE_ROCSPARSE
2222
#include <spblas/vendor/rocsparse/rocsparse.hpp>
2323
#endif
24+
25+
#ifdef SPBLAS_ENABLE_CUSPARSE
26+
#include <spblas/vendor/cusparse/cusparse.hpp>
27+
#endif

include/spblas/detail/types.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#include <spblas/vendor/rocsparse/types.hpp>
2020
#endif
2121

22+
#ifdef SPBLAS_ENABLE_CUSPARSE
23+
#include <spblas/vendor/cusparse/types.hpp>
24+
#endif
25+
2226
namespace spblas {
2327

2428
#ifndef SPBLAS_VENDOR_BACKEND

0 commit comments

Comments
 (0)