still trying to debug gpu * and travis config

jolatechno · jolatechno · commit adad98f7071b · 2020-12-30T16:58:58.000+01:00
diff --git a/.travis.yml b/.travis.yml
@@ -7,14 +7,7 @@ git:
 addons:
   apt:
     update: true
-    sources:
-      - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
-        key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
-      - sourceline: 'ppa:ubuntu-toolchain-r/test'
     packages:
-      - clang-10
-      - gcc-10
-      - g++-10
       - libomp-dev
       - mpic++
 
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-U_CXX=g++-10 -std=c++0x
+U_CXX=$(CXX) -std=c++0x
 U_LDLIBS=$(LDLIBS)
 U_CCFLAGS=$(CCFLAGS)
 CRUN=
@@ -21,10 +21,11 @@ openmp:
 	$(eval U_LDLIBS+=-fopenmp)
 
 gpu-nvidia: openmp
-	$(eval U_CCFLAGS+=-DTARGET=1)
+	$(eval U_CXX=g++-10 -std=c++0x) 
 	$(eval U_LDLIBS+=-fno-stack-protector -foffload=nvptx-none)
 
 gpu-amd:	openmp
+	$(eval U_CXX=g++-10 -std=c++0x)
 	$(eval U_CCFLAGS+=-DTARGET=1)
 	$(eval U_LDLIBS+=-fno-stack-protector -foffload=amdgcn-amdhsa="-march=$(AMDGPU)")
 
diff --git a/performance_testing/functions.hpp b/performance_testing/functions.hpp
@@ -1,6 +1,8 @@
 #include "../src/binary_arithmetic.hpp"
 
+#include <stdio.h> //for testing
 void multiplication_mat_vect(Matrix mat, Vector vect) {
+  printf("!! height : %d\n", mat.height); //for testing
   mat * vect;
 }
 
diff --git a/src/binary_arithmetic.cpp b/src/binary_arithmetic.cpp
@@ -33,7 +33,7 @@ Matrix::Matrix(uint16_t _height, uint16_t _width) : height(_height), width(_widt
     MPI_Comm_size(MPI_COMM_WORLD, &size);
   #endif
   #if defined(_OPENMP) && defined(TARGET)
-    auto *this_blocks = blocks;
+    uint64_t *this_blocks = blocks;
     #pragma omp target enter data map(alloc:this_blocks[:_height * _width])
   #endif
 }
@@ -46,7 +46,7 @@ Matrix::Matrix(uint16_t _size) : height(_size), width(_size) {
     MPI_Comm_size(MPI_COMM_WORLD, &size);
   #endif
   #if defined(_OPENMP) && defined(TARGET)
-    auto *this_blocks = blocks;
+    uint64_t *this_blocks = blocks;
     #pragma omp target enter data map(alloc:this_blocks[:_size * _size])
   #endif
 }
@@ -59,7 +59,7 @@ Vector::Vector(uint16_t _size): height(_size) {
     MPI_Comm_size(MPI_COMM_WORLD, &size);
   #endif
   #if defined(_OPENMP) && defined(TARGET)
-    auto *this_blocks = blocks;
+    uint8_t *this_blocks = blocks;
     #pragma omp target enter data map(alloc:this_blocks[:_size])
   #endif
 }
@@ -82,7 +82,7 @@ Matrix::Matrix(Matrix const& other) : height(other.height), width(other.width) {
 
   #if defined(_OPENMP) && defined(TARGET)
     uint16_t _size = height * width;
-    auto *this_blocks = blocks;
+    uint64_t *this_blocks = blocks;
     #pragma omp target enter data map(to:this_blocks[:_size])
   #endif
 }
@@ -99,7 +99,7 @@ Vector::Vector(Vector const& other) : height(other.height) {
 
   #if defined(_OPENMP) && defined(TARGET)
     uint16_t _height = height;
-    auto *this_blocks = blocks;
+    uint8_t *this_blocks = blocks;
     #pragma omp target enter data map(to:this_blocks[:_height])
   #endif
 }
@@ -113,7 +113,7 @@ destructors
 Matrix::~Matrix(){
   #if defined(_OPENMP) && defined(TARGET)
     uint16_t _size = height * width;
-    auto *this_blocks = blocks;
+    uint64_t *this_blocks = blocks;
     #pragma omp target exit data map(delete:this_blocks[:_size])
   #endif
   free(blocks);
@@ -122,7 +122,7 @@ Matrix::~Matrix(){
 Vector::~Vector(){
   #if defined(_OPENMP) && defined(TARGET)
     uint16_t _height = height;
-    auto *this_blocks = blocks;
+    uint8_t *this_blocks = blocks;
     #pragma omp target exit data map(delete:this_blocks[:_height])
   #endif
   free(blocks);
diff --git a/src/binary_arithmetic.hpp b/src/binary_arithmetic.hpp
@@ -104,10 +104,13 @@ class Matrix {
     _OPENMP_GPU_PRAGMA("omp declare target")
     inline uint64_t transpose_block(uint64_t const& block) const;
     inline uint8_t multiply_block_byte(uint64_t const& block, uint8_t const& vect) const;
-    inline uint64_t multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
-      uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
-      uint8_t const& vect) const;
     inline uint64_t multiply_block_block(uint64_t const& block_left, uint64_t const& block_right) const;
+
+    #if defined(_OPENMP) && defined(TARGET)
+      inline uint64_t multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
+        uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
+        uint8_t const& vect) const;
+    #endif
     _OPENMP_GPU_PRAGMA("omp end declare target")
 
     //for comparaisons
diff --git a/src/member/block_arithmetic.inl b/src/member/block_arithmetic.inl
@@ -39,20 +39,6 @@ inline  uint8_t Matrix::multiply_block_byte(uint64_t const& block, uint8_t const
   return sum;
 }
 
-inline uint64_t Matrix::multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
-  uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
-  uint8_t const& vect) const {
-  uint64_t res = multiply_block_byte(block7, vect);
-  res = (res << 8) | multiply_block_byte(block6, vect);
-  res = (res << 8) | multiply_block_byte(block5, vect);
-  res = (res << 8) | multiply_block_byte(block4, vect);
-  res = (res << 8) | multiply_block_byte(block3, vect);
-  res = (res << 8) | multiply_block_byte(block2, vect);
-  res = (res << 8) | multiply_block_byte(block1, vect);
-
-  return (res << 8) | multiply_block_byte(block0, vect);
-}
-
 inline  uint64_t Matrix::multiply_block_block(uint64_t const& block_left, uint64_t const& block_right) const { //changed to acomodate the switch in block indices, check the readme
   uint64_t res = 0;
   //uint64_t block_right_t = transpose_block(block_right);
@@ -72,3 +58,19 @@ inline  uint64_t Vector::multiply_byte_byte(uint8_t const& vect_left, uint8_t co
 
   return res;
 }
+
+#if defined(_OPENMP) && defined(TARGET)
+  inline uint64_t Matrix::multiply_block_word(uint64_t const& block0, uint64_t const& block1, uint64_t const& block2, uint64_t const& block3, \
+    uint64_t const& block4, uint64_t const& block5, uint64_t const& block6, uint64_t const& block7, \
+    uint8_t const& vect) const {
+    uint64_t res = multiply_block_byte(block7, vect);
+    res = (res << 8) | multiply_block_byte(block6, vect);
+    res = (res << 8) | multiply_block_byte(block5, vect);
+    res = (res << 8) | multiply_block_byte(block4, vect);
+    res = (res << 8) | multiply_block_byte(block3, vect);
+    res = (res << 8) | multiply_block_byte(block2, vect);
+    res = (res << 8) | multiply_block_byte(block1, vect);
+
+    return (res << 8) | multiply_block_byte(block0, vect);
+  }
+#endif

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,8 @@`
`1`	`1`	`#include "../src/binary_arithmetic.hpp"`
`2`	`2`
	`3`	`+#include <stdio.h> //for testing`
`3`	`4`	`void multiplication_mat_vect(Matrix mat, Vector vect) {`
	`5`	`+ printf("!! height : %d\n", mat.height); //for testing`
`4`	`6`	`mat * vect;`
`5`	`7`	`}`
`6`	`8`