1 /** 2 * SHA intrinsics. 3 * 4 * Copyright: Guillaume Piolat 2021. 5 * Johan Engelen 2021. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 */ 8 module inteli.shaintrin; 9 10 // SHA instructions 11 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA 12 // Note: this header will work whether you have SHA enabled or not. 13 // With LDC, use "dflags-ldc": ["-mattr=+sha"] or equivalent to actively 14 // generate SHA instructions. 15 16 public import inteli.types; 17 import inteli.internals; 18 19 static if (LDC_with_SHA) 20 { 21 private enum SHA_builtins = true; 22 } 23 else static if (GDC_with_SHA) 24 { 25 private enum SHA_builtins = true; 26 } 27 else 28 { 29 private enum SHA_builtins = false; 30 } 31 32 nothrow @nogc: 33 34 /+ 35 /// Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from a and b, and store the result in dst. 36 __m128i _mm_sha1nexte_epu32(__m128i a, __m128i b) @trusted 37 { 38 static if (SHA_builtins) 39 { 40 return __builtin_ia32_sha1nexte(cast(int4) a, cast(int4) b); 41 } 42 else 43 { 44 assert(0); 45 } 46 } 47 unittest 48 { 49 } 50 +/ 51 52 /+ 53 /// Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in a and the previous message values in b, and store the result in dst. 54 __m128i _mm_sha1msg1_epu32(__m128i a, __m128i b) @trusted 55 { 56 static if (SHA_builtins) 57 { 58 return __builtin_ia32_sha1msg1(cast(int4) a, cast(int4) b); 59 } 60 else 61 { 62 assert(0); 63 } 64 } 65 unittest 66 { 67 } 68 +/ 69 70 /+ 71 /// Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable a, add that value to the scheduled values (unsigned 32-bit integers) in b, and store the result in dst. 72 __m128i _mm_sha1msg2_epu32(__m128i a, __m128i b) @trusted 73 { 74 static if (SHA_builtins) 75 { 76 return __builtin_ia32_sha1msg2(cast(int4) a, cast(int4) b); 77 } 78 else 79 { 80 assert(0); 81 } 82 } 83 unittest 84 { 85 } 86 +/ 87 88 /+ 89 /// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from a and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from b, and store the updated SHA1 state (A,B,C,D) in dst. func contains the logic functions and round constants. 90 __m128i _mm_sha1rnds4_epu32(__m128i a, __m128i b, const int func) @trusted 91 { 92 static if (SHA_builtins) 93 { 94 return __builtin_ia32_sha1rnds4(cast(int4) a, cast(int4) b, func); 95 } 96 else 97 { 98 assert(0); 99 } 100 101 } 102 +/ 103 104 /// Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 105 __m128i _mm_sha256msg1_epu32(__m128i a, __m128i b) @trusted 106 { 107 static if (SHA_builtins) 108 { 109 return __builtin_ia32_sha256msg1(cast(int4) a, cast(int4) b); 110 } 111 else 112 { 113 import core.bitop : ror; 114 static uint sigma0(uint x) { return ror(x, 7) ^ ror(x, 18) ^ x >> 3; } 115 116 int4 dst; 117 int4 a4 = cast(int4) a; 118 int4 b4 = cast(int4) b; 119 uint W4 = b4.array[0]; 120 uint W3 = a4.array[3]; 121 uint W2 = a4.array[2]; 122 uint W1 = a4.array[1]; 123 uint W0 = a4.array[0]; 124 dst.ptr[3] = W3 + sigma0(W4); 125 dst.ptr[2] = W2 + sigma0(W3); 126 dst.ptr[1] = W1 + sigma0(W2); 127 dst.ptr[0] = W0 + sigma0(W1); 128 return cast(__m128i) dst; 129 } 130 } 131 unittest 132 { 133 __m128i a = [15, 20, 130, 12345]; 134 __m128i b = [15, 20, 130, 12345]; 135 __m128i result = _mm_sha256msg1_epu32(a, b); 136 assert(result.array == [671416337, 69238821, 2114864873, 503574586]); 137 } 138 139 /// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from k, and return the updated SHA256 state (A,B,E,F). 140 __m128i _mm_sha256msg2_epu32(__m128i a, __m128i b) @trusted 141 { 142 static if (SHA_builtins) 143 { 144 return __builtin_ia32_sha256msg2(cast(int4) a, cast(int4) b); 145 } 146 else 147 { 148 import core.bitop : ror; 149 static uint sigma1(uint x) { return ror(x, 17) ^ ror(x, 19) ^ x >> 10; } 150 151 int4 dst; 152 int4 a4 = cast(int4) a; 153 int4 b4 = cast(int4) b; 154 uint W14 = b4.array[2]; 155 uint W15 = b4.array[3]; 156 uint W16 = a4.array[0] + sigma1(W14); 157 uint W17 = a4.array[1] + sigma1(W15); 158 uint W18 = a4.array[2] + sigma1(W16); 159 uint W19 = a4.array[3] + sigma1(W17); 160 dst.ptr[3] = W19; 161 dst.ptr[2] = W18; 162 dst.ptr[1] = W17; 163 dst.ptr[0] = W16; 164 return cast(__m128i) dst; 165 } 166 } 167 unittest 168 { 169 __m128i a = [15, 20, 130, 12345]; 170 __m128i b = [15, 20, 130, 12345]; 171 __m128i result = _mm_sha256msg2_epu32(a, b); 172 assert(result.array == [5324815, 505126944, -2012842764, -1542210977]); 173 } 174 175 /// Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 176 __m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k) @trusted 177 { 178 static if (SHA_builtins) 179 { 180 return __builtin_ia32_sha256rnds2(cast(int4) a, cast(int4) b, cast(int4) k); 181 } 182 else 183 { 184 import core.bitop : ror; 185 static uint Ch(uint x, uint y, uint z) { return z ^ (x & (y ^ z)); } 186 static uint Maj(uint x, uint y, uint z) { return (x & y) | (z & (x ^ y)); } 187 static uint sum0(uint x) { return ror(x, 2) ^ ror(x, 13) ^ ror(x, 22); } 188 static uint sum1(uint x) { return ror(x, 6) ^ ror(x, 11) ^ ror(x, 25); } 189 190 int4 dst; 191 int4 a4 = cast(int4) a; 192 int4 b4 = cast(int4) b; 193 int4 k4 = cast(int4) k; 194 195 const A0 = b4.array[3]; 196 const B0 = b4.array[2]; 197 const C0 = a4.array[3]; 198 const D0 = a4.array[2]; 199 const E0 = b4.array[1]; 200 const F0 = b4.array[0]; 201 const G0 = a4.array[1]; 202 const H0 = a4.array[0]; 203 const W_K0 = k4.array[0]; 204 const W_K1 = k4.array[1]; 205 const A1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + Maj(A0, B0, C0) + sum0(A0); 206 const B1 = A0; 207 const C1 = B0; 208 const D1 = C0; 209 const E1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + D0; 210 const F1 = E0; 211 const G1 = F0; 212 const H1 = G0; 213 const A2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + Maj(A1, B1, C1) + sum0(A1); 214 const B2 = A1; 215 const C2 = B1; 216 const D2 = C1; 217 const E2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + D1; 218 const F2 = E1; 219 const G2 = F1; 220 const H2 = G1; 221 222 dst.ptr[3] = A2; 223 dst.ptr[2] = B2; 224 dst.ptr[1] = E2; 225 dst.ptr[0] = F2; 226 227 return cast(__m128i) dst; 228 } 229 } 230 unittest 231 { 232 __m128i a = [15, 20, 130, 12345]; 233 __m128i b = [15, 20, 130, 12345]; 234 __m128i k = [15, 20, 130, 12345]; 235 __m128i result = _mm_sha256rnds2_epu32(a, b, k); 236 assert(result.array == [1384123044, -2050674062, 327754346, 956342016]); 237 }