|
| 1 | +#include <iostream> |
| 2 | +#include <spblas/spblas.hpp> |
| 3 | + |
| 4 | +#include <cuda_runtime.h> |
| 5 | + |
| 6 | +#include "util.hpp" |
| 7 | + |
| 8 | +#include <fmt/core.h> |
| 9 | +#include <fmt/ranges.h> |
| 10 | + |
| 11 | +int main(int argc, char** argv) { |
| 12 | + using value_t = float; |
| 13 | + using index_t = spblas::index_t; |
| 14 | + using offset_t = spblas::offset_t; |
| 15 | + |
| 16 | + index_t m = 100; |
| 17 | + index_t n = 100; |
| 18 | + index_t nnz_in = 10; |
| 19 | + |
| 20 | + fmt::print("\n\t###########################################################" |
| 21 | + "######################"); |
| 22 | + fmt::print("\n\t### Running SpMV Example:"); |
| 23 | + fmt::print("\n\t###"); |
| 24 | + fmt::print("\n\t### y = alpha * A * x"); |
| 25 | + fmt::print("\n\t###"); |
| 26 | + fmt::print("\n\t### with "); |
| 27 | + fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n, |
| 28 | + nnz_in); |
| 29 | + fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1); |
| 30 | + fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1); |
| 31 | + fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", |
| 32 | + sizeof(spblas::index_t)); |
| 33 | + fmt::print("\n\t###########################################################" |
| 34 | + "######################"); |
| 35 | + fmt::print("\n"); |
| 36 | + |
| 37 | + auto&& [values, rowptr, colind, shape, nnz] = |
| 38 | + spblas::generate_csr<value_t, index_t, offset_t>(m, n, nnz_in); |
| 39 | + |
| 40 | + value_t* d_values; |
| 41 | + offset_t* d_rowptr; |
| 42 | + index_t* d_colind; |
| 43 | + |
| 44 | + CUDA_CHECK(cudaMalloc(&d_values, values.size() * sizeof(value_t))); |
| 45 | + CUDA_CHECK(cudaMalloc(&d_rowptr, rowptr.size() * sizeof(offset_t))); |
| 46 | + CUDA_CHECK(cudaMalloc(&d_colind, colind.size() * sizeof(index_t))); |
| 47 | + |
| 48 | + CUDA_CHECK(cudaMemcpy(d_values, values.data(), |
| 49 | + values.size() * sizeof(value_t), cudaMemcpyDefault)); |
| 50 | + CUDA_CHECK(cudaMemcpy(d_rowptr, rowptr.data(), |
| 51 | + rowptr.size() * sizeof(offset_t), cudaMemcpyDefault)); |
| 52 | + CUDA_CHECK(cudaMemcpy(d_colind, colind.data(), |
| 53 | + colind.size() * sizeof(index_t), cudaMemcpyDefault)); |
| 54 | + |
| 55 | + spblas::csr_view<value_t, index_t, offset_t> a(d_values, d_rowptr, d_colind, |
| 56 | + shape, nnz); |
| 57 | + |
| 58 | + // Scale every value of `a` by 5 in place. |
| 59 | + // scale(5.f, a); |
| 60 | + |
| 61 | + std::vector<value_t> x(n, 1); |
| 62 | + std::vector<value_t> y(m, 0); |
| 63 | + |
| 64 | + value_t* d_x; |
| 65 | + value_t* d_y; |
| 66 | + |
| 67 | + CUDA_CHECK(cudaMalloc(&d_x, x.size() * sizeof(value_t))); |
| 68 | + CUDA_CHECK(cudaMalloc(&d_y, y.size() * sizeof(value_t))); |
| 69 | + |
| 70 | + CUDA_CHECK( |
| 71 | + cudaMemcpy(d_x, x.data(), x.size() * sizeof(value_t), cudaMemcpyDefault)); |
| 72 | + CUDA_CHECK( |
| 73 | + cudaMemcpy(d_y, y.data(), y.size() * sizeof(value_t), cudaMemcpyDefault)); |
| 74 | + |
| 75 | + std::span<value_t> x_span(d_x, n); |
| 76 | + std::span<value_t> y_span(d_y, m); |
| 77 | + |
| 78 | + // y = A * x |
| 79 | + spblas::spmv_state_t state; |
| 80 | + spblas::multiply(state, a, x_span, y_span); |
| 81 | + |
| 82 | + CUDA_CHECK( |
| 83 | + cudaMemcpy(y.data(), d_y, y.size() * sizeof(value_t), cudaMemcpyDefault)); |
| 84 | + |
| 85 | + fmt::print("\tExample is completed!\n"); |
| 86 | + |
| 87 | + return 0; |
| 88 | +} |
0 commit comments