1212#include < limits>
1313
1414using namespace org ::ttldtor::bits;
15+ using namespace std ::literals;
1516
1617// NOLINTNEXTLINE
1718
18- TEST_CASE (" tutorial_fast_v2" ) {
19- uint64_t x = 1 ;
20- ankerl::nanobench::Bench ().run (" ++x" , [&]() {
21- ankerl::nanobench::doNotOptimizeAway (x += 1 );
22- });
19+ TEST_CASE (" bench_sal_sar_vs_builtin" ) {
20+ constexpr size_t N = 1u << 15 ;
21+
22+ std::mt19937_64 rng{0xB00B00 };
23+ // For comparison with builtin << we use unsigned types and non-negative shifts
24+ std::uniform_int_distribution<uint32_t > u32dist{0u , 0x7FFF'FFFFu };
25+ std::uniform_int_distribution<uint64_t > u64dist{0ull , (1ull << 50 )};
26+ std::uniform_int_distribution<unsigned > shldist{0u , 63u };
27+
28+ // For comparison with builtin >> we use signed types and non-negative shifts
29+ std::uniform_int_distribution<int32_t > s32dist{-(1 << 29 ), (1 << 29 )};
30+ std::uniform_int_distribution<int64_t > s64dist{-(1ll << 50 ), (1ll << 50 )};
31+ std::uniform_int_distribution<unsigned > shrdist{0u , 63u };
32+
33+ std::vector<uint32_t > u32 (N);
34+ std::vector<unsigned > shl32 (N);
35+ std::vector<uint64_t > u64 (N);
36+ std::vector<unsigned > shl64 (N);
37+
38+ std::vector<int32_t > s32 (N);
39+ std::vector<unsigned > shr32 (N);
40+ std::vector<int64_t > s64 (N);
41+ std::vector<unsigned > shr64 (N);
42+
43+ for (size_t i = 0 ; i < N; ++i) {
44+ u32 [i] = u32dist (rng);
45+ shl32[i] = shldist (rng) % 32 ;
46+ u64 [i] = u64dist (rng);
47+ shl64[i] = shldist (rng) % 64 ;
48+
49+ s32[i] = s32dist (rng);
50+ shr32[i] = shrdist (rng) % 32 ;
51+ s64[i] = s64dist (rng);
52+ shr64[i] = shrdist (rng) % 64 ;
53+ }
54+
55+ auto createBench = [&](auto title) {
56+ ankerl::nanobench::Bench bench;
57+
58+ bench.title (title)
59+ .unit (" op" )
60+ .batch (N)
61+ .warmup (100 )
62+ .minEpochTime (150ms)
63+ .minEpochIterations (60000 )
64+ .relative (true )
65+ .performanceCounters (true );
66+
67+ return bench;
68+ };
69+
70+ auto runSalBench = [&](auto & bench, const auto & typeName, const auto & values, const auto & shifts) {
71+ bench.run (" builtin << (" s + typeName + " )" , [&] {
72+ uint32_t acc = 0 ;
73+ for (size_t i = 0 ; i < N; ++i) {
74+ acc ^= values[i] << shifts[i];
75+ }
76+ ankerl::nanobench::doNotOptimizeAway (acc);
77+ });
78+
79+ bench.run (" sal<" s + typeName + " >" , [&] {
80+ uint32_t acc = 0 ;
81+ for (size_t i = 0 ; i < N; ++i) {
82+ acc ^= sal (values[i], shifts[i]);
83+ }
84+ ankerl::nanobench::doNotOptimizeAway (acc);
85+ });
86+ };
87+
88+
89+ auto runSarBench = [&](auto & bench, const auto & typeName, const auto & values, const auto & shifts) {
90+ bench.run (" builtin >> (" s + typeName + " )" , [&] {
91+ uint32_t acc = 0 ;
92+ for (size_t i = 0 ; i < N; ++i) {
93+ acc ^= values[i] >> shifts[i];
94+ }
95+ ankerl::nanobench::doNotOptimizeAway (acc);
96+ });
97+
98+ bench.run (" sar<" s + typeName + " >" , [&] {
99+ uint32_t acc = 0 ;
100+ for (size_t i = 0 ; i < N; ++i) {
101+ acc ^= sar (values[i], shifts[i]);
102+ }
103+ ankerl::nanobench::doNotOptimizeAway (acc);
104+ });
105+ };
106+
107+ auto salBench1 = createBench (" Compare sal vs builtin << (uint32_t)" );
108+ runSalBench (salBench1, " uint32_t" , u32 , shl32);
109+
110+ auto salBench2 = createBench (" Compare sal vs builtin << (uint64_t)" );
111+ runSalBench (salBench2, " uint64_t" , u64 , shl64);
112+
113+ auto sarBench1 = createBench (" Compare sar vs builtin >> (uint32_t)" );
114+ runSarBench (sarBench1, " uint32_t" , u32 , shl32);
115+
116+ auto sarBench2 = createBench (" Compare sar vs builtin >> (uint64_t)" );
117+ runSarBench (sarBench2, " uint64_t" , u64 , shl64);
23118}
0 commit comments