@@ -47,20 +47,20 @@ constexpr int ELTS_PER_THREAD = 16;
4747constexpr int TILE_DIM_Y = 64 ;
4848constexpr int TILE_DIM_X = 64 ;
4949
50- static_assert (ELTS_PER_THREAD == SCALE_DIM && " Hardcoded and fixed parameter\0 " );
50+ static_assert (ELTS_PER_THREAD == SCALE_DIM && " Hardcoded and fixed parameter" );
5151
52- static_assert ((THREADS_NUM * ELTS_PER_THREAD <= TILE_DIM_Y * TILE_DIM_X) &&
53- " Unbalanced threads workload\0 " );
52+ static_assert ((THREADS_NUM * ELTS_PER_THREAD <= TILE_DIM_Y * TILE_DIM_X),
53+ " Unbalanced threads workload" );
5454
55- static_assert (( TunableConfig::CHUNK_DIM_Y % TILE_DIM_Y == 0 ) &&
56- " Chunk size Y must be evenly divisible by the tile size Y\0 " );
57- static_assert (( TunableConfig::CHUNK_DIM_X % TILE_DIM_X == 0 ) &&
58- " Chunk size X must be evenly divisible by the tile size X\0 " );
55+ static_assert (TunableConfig::CHUNK_DIM_Y % TILE_DIM_Y == 0 ,
56+ " Chunk size Y must be evenly divisible by the tile size Y" );
57+ static_assert (TunableConfig::CHUNK_DIM_X % TILE_DIM_X == 0 ,
58+ " Chunk size X must be evenly divisible by the tile size X" );
5959
60- static_assert (( TILE_DIM_Y % SCALE_DIM == 0 ) &&
61- " Tile size Y must be evenly divisible by the scale dim\0 " );
62- static_assert (( TILE_DIM_X % SCALE_DIM == 0 ) &&
63- " Tile size X must be evenly divisible by the scale dim\0 " );
60+ static_assert (TILE_DIM_Y % SCALE_DIM == 0 ,
61+ " Tile size Y must be evenly divisible by the scale dim" );
62+ static_assert (TILE_DIM_X % SCALE_DIM == 0 ,
63+ " Tile size X must be evenly divisible by the scale dim" );
6464
6565constexpr int TILES_Y = TunableConfig::CHUNK_DIM_Y / TILE_DIM_Y;
6666constexpr int TILES_X = TunableConfig::CHUNK_DIM_X / TILE_DIM_X;
@@ -114,19 +114,19 @@ constexpr int THREADS_Y_TR = THREADS_NUM / THREADS_X_TR;
114114
115115constexpr int ITERATIONS_NORMAL = BUFF_DIM_Y / THREADS_Y_ROWWISE;
116116constexpr int ITERATIONS_TR = SCALES_PER_TILE_Y / THREADS_Y_TR;
117- static_assert (ITERATIONS_TR >= 1 && " Number of transpose iterations should be >=1\0 " );
118- static_assert (( SCALES_PER_TILE_Y % THREADS_Y_TR == 0 ) &&
119- " Partial transpose iterations are not supported\0 " );
117+ static_assert (ITERATIONS_TR >= 1 , " Number of transpose iterations should be >=1" );
118+ static_assert (SCALES_PER_TILE_Y % THREADS_Y_TR == 0 ,
119+ " Partial transpose iterations are not supported" );
120120
121121constexpr int BUFF_OUT_IT_OFFSET = BUFF_OUT_TR_DIM_X / ITERATIONS_TR / STAGES;
122122
123- static_assert (BUFF_DIM_Y >= SCALE_DIM &&
123+ static_assert (BUFF_DIM_Y >= SCALE_DIM,
124124 " Number of buffer rows must be greater or equal to the size of the columwise "
125- " scaling block\0 " );
125+ " scaling block" );
126126static_assert (TunableConfig::CHUNK_DIM_Y >= BUFF_DIM_Y);
127- static_assert (BUFF_DIM_Y >= THREADS_Y_ROWWISE &&
127+ static_assert (BUFF_DIM_Y >= THREADS_Y_ROWWISE,
128128 " Number of buffer rows must be greater or equal to the number of rowwise "
129- " processing threads in Y dimension\0 " );
129+ " processing threads in Y dimension" );
130130
131131// Number of 4-bit elements that span 32 banks (4-byte each) of shared memory
132132constexpr int TOTAL_BANKS_WIDTH = (32 * 4 * 8 ) / 4 ; // 256
0 commit comments