Skip to content

Commit adad98f

Browse files
committed
still trying to debug gpu * and travis config
1 parent 73d82e3 commit adad98f

File tree

6 files changed

+34
-33
lines changed

6 files changed

+34
-33
lines changed

.travis.yml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,7 @@ git:
77
addons:
88
apt:
99
update: true
10-
sources:
11-
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
12-
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
13-
- sourceline: 'ppa:ubuntu-toolchain-r/test'
1410
packages:
15-
- clang-10
16-
- gcc-10
17-
- g++-10
1811
- libomp-dev
1912
- mpic++
2013

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
U_CXX=g++-10 -std=c++0x
1+
U_CXX=$(CXX) -std=c++0x
22
U_LDLIBS=$(LDLIBS)
33
U_CCFLAGS=$(CCFLAGS)
44
CRUN=
@@ -21,10 +21,11 @@ openmp:
2121
$(eval U_LDLIBS+=-fopenmp)
2222

2323
gpu-nvidia: openmp
24-
$(eval U_CCFLAGS+=-DTARGET=1)
24+
$(eval U_CXX=g++-10 -std=c++0x)
2525
$(eval U_LDLIBS+=-fno-stack-protector -foffload=nvptx-none)
2626

2727
gpu-amd: openmp
28+
$(eval U_CXX=g++-10 -std=c++0x)
2829
$(eval U_CCFLAGS+=-DTARGET=1)
2930
$(eval U_LDLIBS+=-fno-stack-protector -foffload=amdgcn-amdhsa="-march=$(AMDGPU)")
3031

performance_testing/functions.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "../src/binary_arithmetic.hpp"
22

3+
#include <stdio.h> //for testing
34
void multiplication_mat_vect(Matrix mat, Vector vect) {
5+
printf("!! height : %d\n", mat.height); //for testing
46
mat * vect;
57
}
68

src/binary_arithmetic.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Matrix::Matrix(uint16_t _height, uint16_t _width) : height(_height), width(_widt
3333
MPI_Comm_size(MPI_COMM_WORLD, &size);
3434
#endif
3535
#if defined(_OPENMP) && defined(TARGET)
36-
auto *this_blocks = blocks;
36+
uint64_t *this_blocks = blocks;
3737
#pragma omp target enter data map(alloc:this_blocks[:_height * _width])
3838
#endif
3939
}
@@ -46,7 +46,7 @@ Matrix::Matrix(uint16_t _size) : height(_size), width(_size) {
4646
MPI_Comm_size(MPI_COMM_WORLD, &size);
4747
#endif
4848
#if defined(_OPENMP) && defined(TARGET)
49-
auto *this_blocks = blocks;
49+
uint64_t *this_blocks = blocks;
5050
#pragma omp target enter data map(alloc:this_blocks[:_size * _size])
5151
#endif
5252
}
@@ -59,7 +59,7 @@ Vector::Vector(uint16_t _size): height(_size) {
5959
MPI_Comm_size(MPI_COMM_WORLD, &size);
6060
#endif
6161
#if defined(_OPENMP) && defined(TARGET)
62-
auto *this_blocks = blocks;
62+
uint8_t *this_blocks = blocks;
6363
#pragma omp target enter data map(alloc:this_blocks[:_size])
6464
#endif
6565
}
@@ -82,7 +82,7 @@ Matrix::Matrix(Matrix const& other) : height(other.height), width(other.width) {
8282

8383
#if defined(_OPENMP) && defined(TARGET)
8484
uint16_t _size = height * width;
85-
auto *this_blocks = blocks;
85+
uint64_t *this_blocks = blocks;
8686
#pragma omp target enter data map(to:this_blocks[:_size])
8787
#endif
8888
}
@@ -99,7 +99,7 @@ Vector::Vector(Vector const& other) : height(other.height) {
9999

100100
#if defined(_OPENMP) && defined(TARGET)
101101
uint16_t _height = height;
102-
auto *this_blocks = blocks;
102+
uint8_t *this_blocks = blocks;
103103
#pragma omp target enter data map(to:this_blocks[:_height])
104104
#endif
105105
}
@@ -113,7 +113,7 @@ destructors
113113
Matrix::~Matrix(){
114114
#if defined(_OPENMP) && defined(TARGET)
115115
uint16_t _size = height * width;
116-
auto *this_blocks = blocks;
116+
uint64_t *this_blocks = blocks;
117117
#pragma omp target exit data map(delete:this_blocks[:_size])
118118
#endif
119119
free(blocks);
@@ -122,7 +122,7 @@ Matrix::~Matrix(){
122122
Vector::~Vector(){
123123
#if defined(_OPENMP) && defined(TARGET)
124124
uint16_t _height = height;
125-
auto *this_blocks = blocks;
125+
uint8_t *this_blocks = blocks;
126126
#pragma omp target exit data map(delete:this_blocks[:_height])
127127
#endif
128128
free(blocks);

src/binary_arithmetic.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,13 @@ class Matrix {
104104
_OPENMP_GPU_PRAGMA("omp declare target")
105105
inline uint64_t transpose_block(uint64_t const& block) const;
106106
inline uint8_t multiply_block_byte(uint64_t const& block, uint8_t const& vect) const;
107-
inline uint64_t multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
108-
uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
109-
uint8_t const& vect) const;
110107
inline uint64_t multiply_block_block(uint64_t const& block_left, uint64_t const& block_right) const;
108+
109+
#if defined(_OPENMP) && defined(TARGET)
110+
inline uint64_t multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
111+
uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
112+
uint8_t const& vect) const;
113+
#endif
111114
_OPENMP_GPU_PRAGMA("omp end declare target")
112115

113116
//for comparaisons

src/member/block_arithmetic.inl

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,6 @@ inline uint8_t Matrix::multiply_block_byte(uint64_t const& block, uint8_t const
3939
return sum;
4040
}
4141

42-
inline uint64_t Matrix::multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
43-
uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
44-
uint8_t const& vect) const {
45-
uint64_t res = multiply_block_byte(block7, vect);
46-
res = (res << 8) | multiply_block_byte(block6, vect);
47-
res = (res << 8) | multiply_block_byte(block5, vect);
48-
res = (res << 8) | multiply_block_byte(block4, vect);
49-
res = (res << 8) | multiply_block_byte(block3, vect);
50-
res = (res << 8) | multiply_block_byte(block2, vect);
51-
res = (res << 8) | multiply_block_byte(block1, vect);
52-
53-
return (res << 8) | multiply_block_byte(block0, vect);
54-
}
55-
5642
inline uint64_t Matrix::multiply_block_block(uint64_t const& block_left, uint64_t const& block_right) const { //changed to acomodate the switch in block indices, check the readme
5743
uint64_t res = 0;
5844
//uint64_t block_right_t = transpose_block(block_right);
@@ -72,3 +58,19 @@ inline uint64_t Vector::multiply_byte_byte(uint8_t const& vect_left, uint8_t co
7258

7359
return res;
7460
}
61+
62+
#if defined(_OPENMP) && defined(TARGET)
63+
inline uint64_t Matrix::multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
64+
uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
65+
uint8_t const& vect) const {
66+
uint64_t res = multiply_block_byte(block7, vect);
67+
res = (res << 8) | multiply_block_byte(block6, vect);
68+
res = (res << 8) | multiply_block_byte(block5, vect);
69+
res = (res << 8) | multiply_block_byte(block4, vect);
70+
res = (res << 8) | multiply_block_byte(block3, vect);
71+
res = (res << 8) | multiply_block_byte(block2, vect);
72+
res = (res << 8) | multiply_block_byte(block1, vect);
73+
74+
return (res << 8) | multiply_block_byte(block0, vect);
75+
}
76+
#endif

0 commit comments

Comments
 (0)