Skip to content

Commit 2e291a9

Browse files
committed
gosthash2012: Switch Streebog implementations dynamically
Currently, only switch between see2 and ref implementations. This should affect only i686, since x86_64 always have SSE2 unless compiled with `-mno-sse2'. This version of dynamic dispatch would work only on GCC-10 / Clang-3, otherwise fallback to static dispatch like before. Signed-off-by: Vitaly Chikunov <vt@altlinux.org>
1 parent a9d11b3 commit 2e291a9

File tree

3 files changed

+43
-7
lines changed

3 files changed

+43
-7
lines changed

gosthash2012.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,24 @@ _internal
114114
void g(union uint512_u *h, const union uint512_u * RESTRICT N,
115115
const union uint512_u * RESTRICT m)
116116
{
117-
#if defined __GOST3411_HAS_SSE2__
117+
#ifdef __GOST3411_DISPATCH__
118+
# if defined __GOST3411_HAS_SSE2__
119+
if (__builtin_cpu_supports("sse2"))
120+
return g_sse2(h, N, m);
121+
# elif defined __GOST3411_HAS_REF__
122+
g_ref(h, N, m);
123+
# else
124+
# error "No implementation of g() is selected."
125+
# endif
126+
#else /* !__GOST3411_DISPATCH__ */
127+
# if defined __GOST3411_HAS_SSE2__ && defined __SSE2__
118128
g_sse2(h, N, m);
119-
#elif defined __GOST3411_HAS_REF__
129+
# elif defined __GOST3411_HAS_REF__
120130
g_ref(h, N, m);
121-
#else
131+
# else
122132
# error "No implementation of g() is selected."
123-
#endif
133+
# endif
134+
#endif /* !__GOST3411_DISPATCH__ */
124135
}
125136

126137
static INLINE void stage2(gost2012_hash_ctx * CTX, const union uint512_u *data)

gosthash2012.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
/* Can be undef'd to disable ref impl. */
1414
#define __GOST3411_HAS_REF__
1515

16-
#if defined __SSE2__
16+
#if defined __x86_64__ || defined __i386__
1717
# define __GOST3411_HAS_SSE2__
18-
# if !defined __x86_64__ && !defined __e2k__
18+
#elif defined __SSE2__
19+
# define __GOST3411_HAS_SSE2__
20+
# if !defined __e2k__
1921
/*
2022
* x86-64 bit Linux and Windows ABIs provide malloc function that returns
2123
* 16-byte alignment memory buffers required by SSE load/store instructions.
@@ -32,6 +34,14 @@
3234
# if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
3335
# undef __GOST3411_HAS_SSE2__
3436
# endif
37+
# ifdef __x86_64__
38+
/*
39+
* On x86_64 there is always SSE2, so no need to even build reference
40+
* implementation. But only if SSE2 is actually compiled, since it could
41+
* be disabled with -mno-sse2.
42+
*/
43+
# undef __GOST3411_HAS_REF__
44+
# endif
3545
#endif
3646

3747
#ifndef L_ENDIAN
@@ -58,6 +68,15 @@
5868
# define _internal
5969
#endif
6070

71+
/* '__has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.' */
72+
#ifndef __has_builtin
73+
# define __has_builtin(x) 0
74+
#else
75+
# if __has_builtin(__builtin_cpu_supports)
76+
# define __GOST3411_DISPATCH__
77+
# endif
78+
#endif
79+
6180
ALIGN(16)
6281
typedef union uint512_u {
6382
unsigned long long QWORD[8];

gosthash2012_sse2.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@
3131
# define _mm_cvtm64_si64(v) (long long) v
3232
#endif
3333

34-
#ifdef __SSE3__
34+
/*
35+
* We cannot just use SSE3 instructions in SSE2 implementation if dynamic
36+
* dispatch is used. SSE3 belongs to different microarchitecture level
37+
* (x86_64-v2) than SSE2 (x86_64 baseline). If there is x86_64-v2 CPU then
38+
* SSE4.1 implementation should be used.
39+
*/
40+
#if defined __SSE3__ && !defined __GOST3411_DISPATCH__
3541
/*
3642
* "This intrinsic may perform better than _mm_loadu_si128 when
3743
* the data crosses a cache line boundary."

0 commit comments

Comments
 (0)