File tree Expand file tree Collapse file tree 3 files changed +43
-7
lines changed
Expand file tree Collapse file tree 3 files changed +43
-7
lines changed Original file line number Diff line number Diff line change @@ -114,13 +114,24 @@ _internal
114114void g (union uint512_u * h , const union uint512_u * RESTRICT N ,
115115 const union uint512_u * RESTRICT m )
116116{
117- #if defined __GOST3411_HAS_SSE2__
117+ #ifdef __GOST3411_DISPATCH__
118+ # if defined __GOST3411_HAS_SSE2__
119+ if (__builtin_cpu_supports ("sse2" ))
120+ return g_sse2 (h , N , m );
121+ # elif defined __GOST3411_HAS_REF__
122+ g_ref (h , N , m );
123+ # else
124+ # error "No implementation of g() is selected."
125+ # endif
126+ #else /* !__GOST3411_DISPATCH__ */
127+ # if defined __GOST3411_HAS_SSE2__ && defined __SSE2__
118128 g_sse2 (h , N , m );
119- #elif defined __GOST3411_HAS_REF__
129+ # elif defined __GOST3411_HAS_REF__
120130 g_ref (h , N , m );
121- #else
131+ # else
122132# error "No implementation of g() is selected."
123- #endif
133+ # endif
134+ #endif /* !__GOST3411_DISPATCH__ */
124135}
125136
126137static INLINE void stage2 (gost2012_hash_ctx * CTX , const union uint512_u * data )
Original file line number Diff line number Diff line change 1313/* Can be undef'd to disable ref impl. */
1414#define __GOST3411_HAS_REF__
1515
16- #if defined __SSE2__
16+ #if defined __x86_64__ || defined __i386__
1717# define __GOST3411_HAS_SSE2__
18- # if !defined __x86_64__ && !defined __e2k__
18+ #elif defined __SSE2__
19+ # define __GOST3411_HAS_SSE2__
20+ # if !defined __e2k__
1921/*
2022 * x86-64 bit Linux and Windows ABIs provide malloc function that returns
2123 * 16-byte alignment memory buffers required by SSE load/store instructions.
3234# if (__GNUC__ < 4 ) || (__GNUC__ == 4 && __GNUC_MINOR__ < 2 )
3335# undef __GOST3411_HAS_SSE2__
3436# endif
37+ # ifdef __x86_64__
38+ /*
39+ * On x86_64 there is always SSE2, so no need to even build reference
40+ * implementation. But only if SSE2 is actually compiled, since it could
41+ * be disabled with -mno-sse2.
42+ */
43+ # undef __GOST3411_HAS_REF__
44+ # endif
3545#endif
3646
3747#ifndef L_ENDIAN
5868# define _internal
5969#endif
6070
71+ /* '__has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.' */
72+ #ifndef __has_builtin
73+ # define __has_builtin (x ) 0
74+ #else
75+ # if __has_builtin (__builtin_cpu_supports )
76+ # define __GOST3411_DISPATCH__
77+ # endif
78+ #endif
79+
6180ALIGN (16 )
6281typedef union uint512_u {
6382 unsigned long long QWORD [8 ];
Original file line number Diff line number Diff line change 3131# define _mm_cvtm64_si64 (v ) (long long) v
3232#endif
3333
34- #ifdef __SSE3__
34+ /*
35+ * We cannot just use SSE3 instructions in SSE2 implementation if dynamic
36+ * dispatch is used. SSE3 belongs to different microarchitecture level
37+ * (x86_64-v2) than SSE2 (x86_64 baseline). If there is x86_64-v2 CPU then
38+ * SSE4.1 implementation should be used.
39+ */
40+ #if defined __SSE3__ && !defined __GOST3411_DISPATCH__
3541/*
3642 * "This intrinsic may perform better than _mm_loadu_si128 when
3743 * the data crosses a cache line boundary."
You can’t perform that action at this time.
0 commit comments