1 /**
2 * SHA intrinsics.
3 * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=SHA
4 * 
5 * Copyright: Guillaume Piolat 2021.
6 *            Johan Engelen 2021.
7 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8 */
9 module inteli.shaintrin;
10 
11 // SHA instructions
12 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA
13 // Note: this header will work whether you have SHA enabled or not.
14 // With LDC, use "dflags-ldc": ["-mattr=+sha"] or equivalent to actively
15 // generate SHA instructions.
16 // With GDC, use "dflags-gdc": ["-msha"] or equivalent to generate SHA instructions.
17 
18 public import inteli.types;
19 import inteli.internals;
20 
21 
22 
23 nothrow @nogc:
24 
25 /+
26 /// Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from a and b, and store the result in dst.
27 __m128i _mm_sha1nexte_epu32(__m128i a, __m128i b) @trusted
28 {
29     static if (SHA_builtins)
30     {
31         return __builtin_ia32_sha1nexte(cast(int4) a, cast(int4) b);
32     }
33     else
34     {
35         assert(0);
36     }
37 }
38 unittest
39 {
40 }
41 +/
42 
43 /+
44 /// Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in a and the previous message values in b, and store the result in dst.
45 __m128i _mm_sha1msg1_epu32(__m128i a, __m128i b) @trusted
46 {
47     static if (SHA_builtins)
48     {
49         return __builtin_ia32_sha1msg1(cast(int4) a, cast(int4) b);
50     }
51     else
52     {
53         assert(0);
54     }
55 }
56 unittest
57 {
58 }
59 +/
60 
61 /+
62 /// Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable a, add that value to the scheduled values (unsigned 32-bit integers) in b, and store the result in dst.
63 __m128i _mm_sha1msg2_epu32(__m128i a, __m128i b) @trusted
64 {
65     static if (SHA_builtins)
66     {
67         return __builtin_ia32_sha1msg2(cast(int4) a, cast(int4) b);
68     }
69     else
70     {
71         assert(0);
72     }
73 }
74 unittest
75 {
76 }
77 +/
78 
79 /+
80 /// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from a and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from b, and store the updated SHA1 state (A,B,C,D) in dst. func contains the logic functions and round constants.
81 __m128i _mm_sha1rnds4_epu32(__m128i a, __m128i b, const int func) @trusted
82 {
83     static if (SHA_builtins)
84     {
85         return __builtin_ia32_sha1rnds4(cast(int4) a, cast(int4) b, func);
86     }
87     else
88     {
89         assert(0);
90     }
91 
92 }
93 +/
94 
95 /// Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result.
96 __m128i _mm_sha256msg1_epu32(__m128i a, __m128i b) @trusted
97 {
98     static if (GDC_or_LDC_with_SHA)
99     {
100         return __builtin_ia32_sha256msg1(cast(int4) a, cast(int4) b);
101     }
102     else
103     {
104         static uint sigma0(uint x) nothrow @nogc @safe
105         { 
106             return bitwiseRotateRight_uint(x, 7) ^ bitwiseRotateRight_uint(x, 18) ^ x >> 3;
107         }
108 
109         int4 dst;
110         int4 a4 = cast(int4) a;
111         int4 b4 = cast(int4) b;
112         uint W4 = b4.array[0];
113         uint W3 = a4.array[3];
114         uint W2 = a4.array[2];
115         uint W1 = a4.array[1];
116         uint W0 = a4.array[0];
117         dst.ptr[3] = W3 + sigma0(W4);
118         dst.ptr[2] = W2 + sigma0(W3);
119         dst.ptr[1] = W1 + sigma0(W2);
120         dst.ptr[0] = W0 + sigma0(W1);
121         return cast(__m128i) dst;
122     }
123 }
124 unittest
125 {
126     __m128i a = [15, 20, 130, 12345];
127     __m128i b = [15, 20, 130, 12345];
128     __m128i result = _mm_sha256msg1_epu32(a, b);
129     assert(result.array == [671416337, 69238821, 2114864873, 503574586]);
130 }
131 
132 /// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from k, and return the updated SHA256 state (A,B,E,F).
133 __m128i _mm_sha256msg2_epu32(__m128i a, __m128i b) @trusted
134 {
135     static if (GDC_or_LDC_with_SHA)
136     {
137         return __builtin_ia32_sha256msg2(cast(int4) a, cast(int4) b);
138     }
139     else
140     {
141         static uint sigma1(uint x) nothrow @nogc @safe
142         { 
143             return bitwiseRotateRight_uint(x, 17) ^ bitwiseRotateRight_uint(x, 19) ^ x >> 10; 
144         }
145 
146         int4 dst;
147         int4 a4 = cast(int4) a;
148         int4 b4 = cast(int4) b;
149         uint W14 = b4.array[2];
150         uint W15 = b4.array[3];
151         uint W16 = a4.array[0] + sigma1(W14);
152         uint W17 = a4.array[1] + sigma1(W15);
153         uint W18 = a4.array[2] + sigma1(W16);
154         uint W19 = a4.array[3] + sigma1(W17);
155         dst.ptr[3] = W19;
156         dst.ptr[2] = W18;
157         dst.ptr[1] = W17;
158         dst.ptr[0] = W16;
159         return cast(__m128i) dst;
160     }
161 }
162 unittest
163 {
164     __m128i a = [15, 20, 130, 12345];
165     __m128i b = [15, 20, 130, 12345];
166     __m128i result = _mm_sha256msg2_epu32(a, b);
167     assert(result.array == [5324815, 505126944, -2012842764, -1542210977]);
168 }
169 
170 /// Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result.
171 __m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k) @trusted
172 {
173     // TODO: the pragma(inline) false prevent a DMD 1.100
174     //       regression in Linux + x86_64 + -b release-unittest, report that
175 
176     version(DigitalMars)
177     {
178         enum bool workaround = true;
179     }
180     else
181     {
182         enum bool workaround = false;
183     }
184 
185     static if (GDC_or_LDC_with_SHA)
186     {
187         return __builtin_ia32_sha256rnds2(cast(int4) a, cast(int4) b, cast(int4) k);
188     }
189     else
190     {
191         static uint Ch(uint x, uint y, uint z) nothrow @nogc @safe
192         { 
193             static if (workaround) pragma (inline, false);
194             return z ^ (x & (y ^ z)); 
195         }
196         
197         static uint Maj(uint x, uint y, uint z) nothrow @nogc @safe
198         { 
199             static if (workaround) pragma (inline, false);
200             return (x & y) | (z & (x ^ y)); 
201         }
202 
203         static uint sum0(uint x) nothrow @nogc @safe
204         { 
205             static if (workaround) pragma (inline, false);
206             return bitwiseRotateRight_uint(x, 2) ^ bitwiseRotateRight_uint(x, 13) ^ bitwiseRotateRight_uint(x, 22); 
207         }
208 
209         static uint sum1(uint x) nothrow @nogc @safe
210         { 
211             static if (workaround) pragma (inline, false);
212             return bitwiseRotateRight_uint(x, 6) ^ bitwiseRotateRight_uint(x, 11) ^ bitwiseRotateRight_uint(x, 25); 
213         }
214 
215         int4 dst;
216         int4 a4 = cast(int4) a;
217         int4 b4 = cast(int4) b;
218         int4 k4 = cast(int4) k;
219 
220         const A0 = b4.array[3];
221         const B0 = b4.array[2];
222         const C0 = a4.array[3];
223         const D0 = a4.array[2];
224         const E0 = b4.array[1];
225         const F0 = b4.array[0];
226         const G0 = a4.array[1];
227         const H0 = a4.array[0];
228         const W_K0 = k4.array[0];
229         const W_K1 = k4.array[1];
230         const A1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + Maj(A0, B0, C0) + sum0(A0);
231         const B1 = A0;
232         const C1 = B0;
233         const D1 = C0;
234         const E1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + D0;
235         const F1 = E0;
236         const G1 = F0;
237         const H1 = G0;
238         const A2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + Maj(A1, B1, C1) + sum0(A1);
239         const B2 = A1;
240         const C2 = B1;
241         const D2 = C1;
242         const E2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + D1;
243         const F2 = E1;
244         const G2 = F1;
245         const H2 = G1;
246 
247         dst.ptr[3] = A2;
248         dst.ptr[2] = B2;
249         dst.ptr[1] = E2;
250         dst.ptr[0] = F2;
251 
252         return cast(__m128i) dst;
253     }
254 }
255 unittest
256 {
257     __m128i a = [15, 20, 130, 12345];
258     __m128i b = [15, 20, 130, 12345];
259     __m128i k = [15, 20, 130, 12345];
260     __m128i result = _mm_sha256rnds2_epu32(a, b, k);
261     assert(result.array == [1384123044, -2050674062, 327754346, 956342016]);
262 }
263 
264 private uint bitwiseRotateRight_uint(const uint value, const uint count) @safe
265 {
266     assert(count < 8 * uint.sizeof);
267     return cast(uint) ((value >> count) | (value << (uint.sizeof * 8 - count)));
268 }