Skip to content

Commit a9d11b3

Browse files
committed
gosthash2012: Prepare for multi-arch Streebog dispatcher
This implementation is functionally exact to previous code (just rearrangement), in preparation to run-time dispatch use. Signed-off-by: Vitaly Chikunov <vt@altlinux.org>
1 parent bb614dc commit a9d11b3

File tree

5 files changed

+94
-67
lines changed

5 files changed

+94
-67
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ set(GOST_HASH_2012_SOURCE_FILES
126126
gosthash2012.h
127127
gosthash2012_const.h
128128
gosthash2012_precalc.h
129-
gosthash2012_ref.h
130-
gosthash2012_sse2.h
129+
gosthash2012_ref.c
130+
gosthash2012_sse2.c
131131
)
132132

133133
set(GOST_GRASSHOPPER_SOURCE_FILES

gosthash2012.c

Lines changed: 8 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -110,55 +110,16 @@ static INLINE void add512(union uint512_u * RESTRICT x,
110110
#endif /* __GOST3411_BIG_ENDIAN__ */
111111
}
112112

113-
static void g(union uint512_u *h, const union uint512_u * RESTRICT N,
114-
const union uint512_u * RESTRICT m)
113+
_internal
114+
void g(union uint512_u *h, const union uint512_u * RESTRICT N,
115+
const union uint512_u * RESTRICT m)
115116
{
116-
#ifdef __GOST3411_HAS_SSE2__
117-
__m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */
118-
__m128i xmm1, xmm3, xmm5, xmm7; /* XMMR1-quadruple */
119-
unsigned int i;
120-
121-
LOAD(N, xmm0, xmm2, xmm4, xmm6);
122-
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
123-
124-
ULOAD(m, xmm1, xmm3, xmm5, xmm7);
125-
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
126-
127-
for (i = 0; i < 11; i++)
128-
ROUND128(i, xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
129-
130-
XLPS128M((&C[11]), xmm0, xmm2, xmm4, xmm6);
131-
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
132-
133-
X128M(h, xmm0, xmm2, xmm4, xmm6);
134-
ULOAD(m, xmm1, xmm3, xmm5, xmm7);
135-
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
136-
137-
STORE(h, xmm0, xmm2, xmm4, xmm6);
138-
# ifdef __i386__
139-
/* Restore the Floating-point status on the CPU */
140-
/* This is only required on MMX, but EXTRACT32 is using MMX */
141-
_mm_empty();
142-
# endif
117+
#if defined __GOST3411_HAS_SSE2__
118+
g_sse2(h, N, m);
119+
#elif defined __GOST3411_HAS_REF__
120+
g_ref(h, N, m);
143121
#else
144-
union uint512_u Ki, data;
145-
unsigned int i;
146-
147-
XLPS(h, N, (&data));
148-
149-
/* Starting E() */
150-
Ki = data;
151-
XLPS((&Ki), ((const union uint512_u *)&m[0]), (&data));
152-
153-
for (i = 0; i < 11; i++)
154-
ROUND(i, (&Ki), (&data));
155-
156-
XLPS((&Ki), (&C[11]), (&Ki));
157-
X((&Ki), (&data), (&data));
158-
/* E() done */
159-
160-
X((&data), h, (&data));
161-
X((&data), m, h);
122+
# error "No implementation of g() is selected."
162123
#endif
163124
}
164125

gosthash2012.h

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@
1010

1111
#include <string.h>
1212

13-
#ifdef __SSE2__
13+
/* Can be undef'd to disable ref impl. */
14+
#define __GOST3411_HAS_REF__
15+
16+
#if defined __SSE2__
1417
# define __GOST3411_HAS_SSE2__
15-
# if !defined(__x86_64__) && !defined(__e2k__)
18+
# if !defined __x86_64__ && !defined __e2k__
1619
/*
1720
* x86-64 bit Linux and Windows ABIs provide malloc function that returns
1821
* 16-byte alignment memory buffers required by SSE load/store instructions.
@@ -35,12 +38,6 @@
3538
# define __GOST3411_BIG_ENDIAN__
3639
#endif
3740

38-
#if defined __GOST3411_HAS_SSE2__
39-
# include "gosthash2012_sse2.h"
40-
#else
41-
# include "gosthash2012_ref.h"
42-
#endif
43-
4441
# if defined(__GNUC__) || defined(__clang__)
4542
# define RESTRICT __restrict__
4643
# else
@@ -53,6 +50,14 @@
5350
# define ALIGN(x) __attribute__ ((__aligned__(x)))
5451
#endif
5552

53+
#ifdef __GNUC__
54+
# define _target(x) __attribute__((target(x)))
55+
# define _internal __attribute__ ((visibility ("internal")))
56+
#else
57+
# define _target(x)
58+
# define _internal
59+
#endif
60+
5661
ALIGN(16)
5762
typedef union uint512_u {
5863
unsigned long long QWORD[8];
@@ -77,3 +82,14 @@ void init_gost2012_hash_ctx(gost2012_hash_ctx * CTX,
7782
void gost2012_hash_block(gost2012_hash_ctx * CTX,
7883
const unsigned char *data, size_t len);
7984
void gost2012_finish_hash(gost2012_hash_ctx * CTX, unsigned char *digest);
85+
86+
#ifdef __GOST3411_HAS_REF__
87+
_internal
88+
void g_ref(union uint512_u *h, const union uint512_u * RESTRICT N,
89+
const union uint512_u * RESTRICT m);
90+
#endif
91+
#ifdef __GOST3411_HAS_SSE2__
92+
_internal _target("sse2")
93+
void g_sse2(union uint512_u *h, const union uint512_u * RESTRICT N,
94+
const union uint512_u * RESTRICT m);
95+
#endif
Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@
88
*
99
*/
1010

11-
#ifdef __GOST3411_HAS_SSE2__
12-
# error "GOST R 34.11-2012: portable implementation disabled in config.h"
13-
#endif
14-
15-
# pragma message "Use regular implementation"
11+
#include "gosthash2012.h"
12+
#ifdef __GOST3411_HAS_REF__
1613

1714
#define X(x, y, z) { \
1815
z->QWORD[0] = x->QWORD[0] ^ y->QWORD[0]; \
@@ -70,3 +67,27 @@
7067
XLPS(Ki, (&C[i]), Ki); \
7168
XLPS(Ki, data, data); \
7269
}
70+
71+
void g_ref(union uint512_u *h, const union uint512_u * RESTRICT N,
72+
const union uint512_u * RESTRICT m)
73+
{
74+
union uint512_u Ki, data;
75+
unsigned int i;
76+
77+
XLPS(h, N, (&data));
78+
79+
/* Starting E() */
80+
Ki = data;
81+
XLPS((&Ki), ((const union uint512_u *)&m[0]), (&data));
82+
83+
for (i = 0; i < 11; i++)
84+
ROUND(i, (&Ki), (&data));
85+
86+
XLPS((&Ki), (&C[11]), (&Ki));
87+
X((&Ki), (&data), (&data));
88+
/* E() done */
89+
90+
X((&data), h, (&data));
91+
X((&data), m, h);
92+
}
93+
#endif /* __GOST3411_HAS_REF__ */
Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@
88
*
99
*/
1010

11-
#ifndef __GOST3411_HAS_SSE2__
12-
# error "GOST R 34.11-2012: SSE2 not enabled"
13-
#endif
14-
15-
# pragma message "Use SIMD implementation"
11+
#include "gosthash2012.h"
12+
#ifdef __GOST3411_HAS_SSE2__
1613

1714
#include <mmintrin.h>
1815
#include <emmintrin.h>
@@ -212,3 +209,35 @@
212209
XLPS128M((&C[i]), xmm0, xmm2, xmm4, xmm6); \
213210
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); \
214211
}
212+
213+
void g_sse2(union uint512_u *h, const union uint512_u * RESTRICT N,
214+
const union uint512_u * RESTRICT m)
215+
{
216+
__m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */
217+
__m128i xmm1, xmm3, xmm5, xmm7; /* XMMR1-quadruple */
218+
unsigned int i;
219+
220+
LOAD(N, xmm0, xmm2, xmm4, xmm6);
221+
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
222+
223+
ULOAD(m, xmm1, xmm3, xmm5, xmm7);
224+
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
225+
226+
for (i = 0; i < 11; i++)
227+
ROUND128(i, xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
228+
229+
XLPS128M((&C[11]), xmm0, xmm2, xmm4, xmm6);
230+
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
231+
232+
X128M(h, xmm0, xmm2, xmm4, xmm6);
233+
ULOAD(m, xmm1, xmm3, xmm5, xmm7);
234+
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
235+
236+
STORE(h, xmm0, xmm2, xmm4, xmm6);
237+
# ifdef __i386__
238+
/* Restore the Floating-point status on the CPU */
239+
/* This is only required on MMX, but EXTRACT32 is using MMX */
240+
_mm_empty();
241+
# endif
242+
}
243+
#endif /* __GOST3411_HAS_SSE2__ */

0 commit comments

Comments
 (0)