1 /**
2 * SHA intrinsics.
3 * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=SHA
4 * 
5 * Copyright: Guillaume Piolat 2021.
6 *            Johan Engelen 2021.
7 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8 */
9 module inteli.shaintrin;
10 
11 // SHA instructions
12 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA
13 // Note: this header will work whether you have SHA enabled or not.
14 // With LDC, use "dflags-ldc": ["-mattr=+sha"] or equivalent to actively
15 // generate SHA instructions.
16 // With GDC, use "dflags-gdc": ["-msha"] or equivalent to generate SHA instructions.
17 
18 public import inteli.types;
19 import inteli.internals;
20 
21 static if (LDC_with_SHA)
22 {
23     private enum SHA_builtins = true;
24 }
25 else static if (GDC_with_SHA)
26 {
27     private enum SHA_builtins = true;
28 }
29 else
30 {
31     private enum SHA_builtins = false;
32 }
33 
34 nothrow @nogc:
35 
36 /+
37 /// Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from a and b, and store the result in dst.
38 __m128i _mm_sha1nexte_epu32(__m128i a, __m128i b) @trusted
39 {
40     static if (SHA_builtins)
41     {
42         return __builtin_ia32_sha1nexte(cast(int4) a, cast(int4) b);
43     }
44     else
45     {
46         assert(0);
47     }
48 }
49 unittest
50 {
51 }
52 +/
53 
54 /+
55 /// Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in a and the previous message values in b, and store the result in dst.
56 __m128i _mm_sha1msg1_epu32(__m128i a, __m128i b) @trusted
57 {
58     static if (SHA_builtins)
59     {
60         return __builtin_ia32_sha1msg1(cast(int4) a, cast(int4) b);
61     }
62     else
63     {
64         assert(0);
65     }
66 }
67 unittest
68 {
69 }
70 +/
71 
72 /+
73 /// Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable a, add that value to the scheduled values (unsigned 32-bit integers) in b, and store the result in dst.
74 __m128i _mm_sha1msg2_epu32(__m128i a, __m128i b) @trusted
75 {
76     static if (SHA_builtins)
77     {
78         return __builtin_ia32_sha1msg2(cast(int4) a, cast(int4) b);
79     }
80     else
81     {
82         assert(0);
83     }
84 }
85 unittest
86 {
87 }
88 +/
89 
90 /+
91 /// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from a and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from b, and store the updated SHA1 state (A,B,C,D) in dst. func contains the logic functions and round constants.
92 __m128i _mm_sha1rnds4_epu32(__m128i a, __m128i b, const int func) @trusted
93 {
94     static if (SHA_builtins)
95     {
96         return __builtin_ia32_sha1rnds4(cast(int4) a, cast(int4) b, func);
97     }
98     else
99     {
100         assert(0);
101     }
102 
103 }
104 +/
105 
106 /// Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result.
107 __m128i _mm_sha256msg1_epu32(__m128i a, __m128i b) @trusted
108 {
109     static if (SHA_builtins)
110     {
111         return __builtin_ia32_sha256msg1(cast(int4) a, cast(int4) b);
112     }
113     else
114     {
115         static uint sigma0(uint x) nothrow @nogc @safe
116         { 
117             return bitwiseRotateRight_uint(x, 7) ^ bitwiseRotateRight_uint(x, 18) ^ x >> 3;
118         }
119 
120         int4 dst;
121         int4 a4 = cast(int4) a;
122         int4 b4 = cast(int4) b;
123         uint W4 = b4.array[0];
124         uint W3 = a4.array[3];
125         uint W2 = a4.array[2];
126         uint W1 = a4.array[1];
127         uint W0 = a4.array[0];
128         dst.ptr[3] = W3 + sigma0(W4);
129         dst.ptr[2] = W2 + sigma0(W3);
130         dst.ptr[1] = W1 + sigma0(W2);
131         dst.ptr[0] = W0 + sigma0(W1);
132         return cast(__m128i) dst;
133     }
134 }
135 unittest
136 {
137     __m128i a = [15, 20, 130, 12345];
138     __m128i b = [15, 20, 130, 12345];
139     __m128i result = _mm_sha256msg1_epu32(a, b);
140     assert(result.array == [671416337, 69238821, 2114864873, 503574586]);
141 }
142 
143 /// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from k, and return the updated SHA256 state (A,B,E,F).
144 __m128i _mm_sha256msg2_epu32(__m128i a, __m128i b) @trusted
145 {
146     static if (SHA_builtins)
147     {
148         return __builtin_ia32_sha256msg2(cast(int4) a, cast(int4) b);
149     }
150     else
151     {
152         static uint sigma1(uint x) nothrow @nogc @safe
153         { 
154             return bitwiseRotateRight_uint(x, 17) ^ bitwiseRotateRight_uint(x, 19) ^ x >> 10; 
155         }
156 
157         int4 dst;
158         int4 a4 = cast(int4) a;
159         int4 b4 = cast(int4) b;
160         uint W14 = b4.array[2];
161         uint W15 = b4.array[3];
162         uint W16 = a4.array[0] + sigma1(W14);
163         uint W17 = a4.array[1] + sigma1(W15);
164         uint W18 = a4.array[2] + sigma1(W16);
165         uint W19 = a4.array[3] + sigma1(W17);
166         dst.ptr[3] = W19;
167         dst.ptr[2] = W18;
168         dst.ptr[1] = W17;
169         dst.ptr[0] = W16;
170         return cast(__m128i) dst;
171     }
172 }
173 unittest
174 {
175     __m128i a = [15, 20, 130, 12345];
176     __m128i b = [15, 20, 130, 12345];
177     __m128i result = _mm_sha256msg2_epu32(a, b);
178     assert(result.array == [5324815, 505126944, -2012842764, -1542210977]);
179 }
180 
181 /// Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result.
182 __m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k) @trusted
183 {
184     // TODO: the pragma(inline) false prevent a DMD 1.100
185     //       regression in Linux + x86_64 + -b release-unittest, report that
186 
187     version(DigitalMars)
188     {
189         enum bool workaround = true;
190     }
191     else
192     {
193         enum bool workaround = false;
194     }
195 
196 
197     static if (SHA_builtins)
198     {
199         return __builtin_ia32_sha256rnds2(cast(int4) a, cast(int4) b, cast(int4) k);
200     }
201     else
202     {
203         static uint Ch(uint x, uint y, uint z) nothrow @nogc @safe
204         { 
205             static if (workaround) pragma (inline, false);
206             return z ^ (x & (y ^ z)); 
207         }
208         
209         static uint Maj(uint x, uint y, uint z) nothrow @nogc @safe
210         { 
211             static if (workaround) pragma (inline, false);
212             return (x & y) | (z & (x ^ y)); 
213         }
214 
215         static uint sum0(uint x) nothrow @nogc @safe
216         { 
217             static if (workaround) pragma (inline, false);
218             return bitwiseRotateRight_uint(x, 2) ^ bitwiseRotateRight_uint(x, 13) ^ bitwiseRotateRight_uint(x, 22); 
219         }
220 
221         static uint sum1(uint x) nothrow @nogc @safe
222         { 
223             static if (workaround) pragma (inline, false);
224             return bitwiseRotateRight_uint(x, 6) ^ bitwiseRotateRight_uint(x, 11) ^ bitwiseRotateRight_uint(x, 25); 
225         }
226 
227         int4 dst;
228         int4 a4 = cast(int4) a;
229         int4 b4 = cast(int4) b;
230         int4 k4 = cast(int4) k;
231 
232         const A0 = b4.array[3];
233         const B0 = b4.array[2];
234         const C0 = a4.array[3];
235         const D0 = a4.array[2];
236         const E0 = b4.array[1];
237         const F0 = b4.array[0];
238         const G0 = a4.array[1];
239         const H0 = a4.array[0];
240         const W_K0 = k4.array[0];
241         const W_K1 = k4.array[1];
242         const A1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + Maj(A0, B0, C0) + sum0(A0);
243         const B1 = A0;
244         const C1 = B0;
245         const D1 = C0;
246         const E1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + D0;
247         const F1 = E0;
248         const G1 = F0;
249         const H1 = G0;
250         const A2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + Maj(A1, B1, C1) + sum0(A1);
251         const B2 = A1;
252         const C2 = B1;
253         const D2 = C1;
254         const E2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + D1;
255         const F2 = E1;
256         const G2 = F1;
257         const H2 = G1;
258 
259         dst.ptr[3] = A2;
260         dst.ptr[2] = B2;
261         dst.ptr[1] = E2;
262         dst.ptr[0] = F2;
263 
264         return cast(__m128i) dst;
265     }
266 }
267 unittest
268 {
269     __m128i a = [15, 20, 130, 12345];
270     __m128i b = [15, 20, 130, 12345];
271     __m128i k = [15, 20, 130, 12345];
272     __m128i result = _mm_sha256rnds2_epu32(a, b, k);
273     assert(result.array == [1384123044, -2050674062, 327754346, 956342016]);
274 }
275 
276 private uint bitwiseRotateRight_uint(const uint value, const uint count) @safe
277 {
278     assert(count < 8 * uint.sizeof);
279     return cast(uint) ((value >> count) | (value << (uint.sizeof * 8 - count)));
280 }