1 /** 2 * SHA intrinsics. 3 * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=SHA 4 * 5 * Copyright: Guillaume Piolat 2021. 6 * Johan Engelen 2021. 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 */ 9 module inteli.shaintrin; 10 11 // SHA instructions 12 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA 13 // Note: this header will work whether you have SHA enabled or not. 14 // With LDC, use "dflags-ldc": ["-mattr=+sha"] or equivalent to actively 15 // generate SHA instructions. 16 // With GDC, use "dflags-gdc": ["-msha"] or equivalent to generate SHA instructions. 17 18 public import inteli.types; 19 import inteli.internals; 20 21 static if (LDC_with_SHA) 22 { 23 private enum SHA_builtins = true; 24 } 25 else static if (GDC_with_SHA) 26 { 27 private enum SHA_builtins = true; 28 } 29 else 30 { 31 private enum SHA_builtins = false; 32 } 33 34 nothrow @nogc: 35 36 /+ 37 /// Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from a and b, and store the result in dst. 38 __m128i _mm_sha1nexte_epu32(__m128i a, __m128i b) @trusted 39 { 40 static if (SHA_builtins) 41 { 42 return __builtin_ia32_sha1nexte(cast(int4) a, cast(int4) b); 43 } 44 else 45 { 46 assert(0); 47 } 48 } 49 unittest 50 { 51 } 52 +/ 53 54 /+ 55 /// Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in a and the previous message values in b, and store the result in dst. 56 __m128i _mm_sha1msg1_epu32(__m128i a, __m128i b) @trusted 57 { 58 static if (SHA_builtins) 59 { 60 return __builtin_ia32_sha1msg1(cast(int4) a, cast(int4) b); 61 } 62 else 63 { 64 assert(0); 65 } 66 } 67 unittest 68 { 69 } 70 +/ 71 72 /+ 73 /// Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable a, add that value to the scheduled values (unsigned 32-bit integers) in b, and store the result in dst. 74 __m128i _mm_sha1msg2_epu32(__m128i a, __m128i b) @trusted 75 { 76 static if (SHA_builtins) 77 { 78 return __builtin_ia32_sha1msg2(cast(int4) a, cast(int4) b); 79 } 80 else 81 { 82 assert(0); 83 } 84 } 85 unittest 86 { 87 } 88 +/ 89 90 /+ 91 /// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from a and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from b, and store the updated SHA1 state (A,B,C,D) in dst. func contains the logic functions and round constants. 92 __m128i _mm_sha1rnds4_epu32(__m128i a, __m128i b, const int func) @trusted 93 { 94 static if (SHA_builtins) 95 { 96 return __builtin_ia32_sha1rnds4(cast(int4) a, cast(int4) b, func); 97 } 98 else 99 { 100 assert(0); 101 } 102 103 } 104 +/ 105 106 /// Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 107 __m128i _mm_sha256msg1_epu32(__m128i a, __m128i b) @trusted 108 { 109 static if (SHA_builtins) 110 { 111 return __builtin_ia32_sha256msg1(cast(int4) a, cast(int4) b); 112 } 113 else 114 { 115 static uint sigma0(uint x) nothrow @nogc @safe 116 { 117 return bitwiseRotateRight_uint(x, 7) ^ bitwiseRotateRight_uint(x, 18) ^ x >> 3; 118 } 119 120 int4 dst; 121 int4 a4 = cast(int4) a; 122 int4 b4 = cast(int4) b; 123 uint W4 = b4.array[0]; 124 uint W3 = a4.array[3]; 125 uint W2 = a4.array[2]; 126 uint W1 = a4.array[1]; 127 uint W0 = a4.array[0]; 128 dst.ptr[3] = W3 + sigma0(W4); 129 dst.ptr[2] = W2 + sigma0(W3); 130 dst.ptr[1] = W1 + sigma0(W2); 131 dst.ptr[0] = W0 + sigma0(W1); 132 return cast(__m128i) dst; 133 } 134 } 135 unittest 136 { 137 __m128i a = [15, 20, 130, 12345]; 138 __m128i b = [15, 20, 130, 12345]; 139 __m128i result = _mm_sha256msg1_epu32(a, b); 140 assert(result.array == [671416337, 69238821, 2114864873, 503574586]); 141 } 142 143 /// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from k, and return the updated SHA256 state (A,B,E,F). 144 __m128i _mm_sha256msg2_epu32(__m128i a, __m128i b) @trusted 145 { 146 static if (SHA_builtins) 147 { 148 return __builtin_ia32_sha256msg2(cast(int4) a, cast(int4) b); 149 } 150 else 151 { 152 static uint sigma1(uint x) nothrow @nogc @safe 153 { 154 return bitwiseRotateRight_uint(x, 17) ^ bitwiseRotateRight_uint(x, 19) ^ x >> 10; 155 } 156 157 int4 dst; 158 int4 a4 = cast(int4) a; 159 int4 b4 = cast(int4) b; 160 uint W14 = b4.array[2]; 161 uint W15 = b4.array[3]; 162 uint W16 = a4.array[0] + sigma1(W14); 163 uint W17 = a4.array[1] + sigma1(W15); 164 uint W18 = a4.array[2] + sigma1(W16); 165 uint W19 = a4.array[3] + sigma1(W17); 166 dst.ptr[3] = W19; 167 dst.ptr[2] = W18; 168 dst.ptr[1] = W17; 169 dst.ptr[0] = W16; 170 return cast(__m128i) dst; 171 } 172 } 173 unittest 174 { 175 __m128i a = [15, 20, 130, 12345]; 176 __m128i b = [15, 20, 130, 12345]; 177 __m128i result = _mm_sha256msg2_epu32(a, b); 178 assert(result.array == [5324815, 505126944, -2012842764, -1542210977]); 179 } 180 181 /// Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 182 __m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k) @trusted 183 { 184 // TODO: the pragma(inline) false prevent a DMD 1.100 185 // regression in Linux + x86_64 + -b release-unittest, report that 186 187 version(DigitalMars) 188 { 189 enum bool workaround = true; 190 } 191 else 192 { 193 enum bool workaround = false; 194 } 195 196 197 static if (SHA_builtins) 198 { 199 return __builtin_ia32_sha256rnds2(cast(int4) a, cast(int4) b, cast(int4) k); 200 } 201 else 202 { 203 static uint Ch(uint x, uint y, uint z) nothrow @nogc @safe 204 { 205 static if (workaround) pragma (inline, false); 206 return z ^ (x & (y ^ z)); 207 } 208 209 static uint Maj(uint x, uint y, uint z) nothrow @nogc @safe 210 { 211 static if (workaround) pragma (inline, false); 212 return (x & y) | (z & (x ^ y)); 213 } 214 215 static uint sum0(uint x) nothrow @nogc @safe 216 { 217 static if (workaround) pragma (inline, false); 218 return bitwiseRotateRight_uint(x, 2) ^ bitwiseRotateRight_uint(x, 13) ^ bitwiseRotateRight_uint(x, 22); 219 } 220 221 static uint sum1(uint x) nothrow @nogc @safe 222 { 223 static if (workaround) pragma (inline, false); 224 return bitwiseRotateRight_uint(x, 6) ^ bitwiseRotateRight_uint(x, 11) ^ bitwiseRotateRight_uint(x, 25); 225 } 226 227 int4 dst; 228 int4 a4 = cast(int4) a; 229 int4 b4 = cast(int4) b; 230 int4 k4 = cast(int4) k; 231 232 const A0 = b4.array[3]; 233 const B0 = b4.array[2]; 234 const C0 = a4.array[3]; 235 const D0 = a4.array[2]; 236 const E0 = b4.array[1]; 237 const F0 = b4.array[0]; 238 const G0 = a4.array[1]; 239 const H0 = a4.array[0]; 240 const W_K0 = k4.array[0]; 241 const W_K1 = k4.array[1]; 242 const A1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + Maj(A0, B0, C0) + sum0(A0); 243 const B1 = A0; 244 const C1 = B0; 245 const D1 = C0; 246 const E1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + D0; 247 const F1 = E0; 248 const G1 = F0; 249 const H1 = G0; 250 const A2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + Maj(A1, B1, C1) + sum0(A1); 251 const B2 = A1; 252 const C2 = B1; 253 const D2 = C1; 254 const E2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + D1; 255 const F2 = E1; 256 const G2 = F1; 257 const H2 = G1; 258 259 dst.ptr[3] = A2; 260 dst.ptr[2] = B2; 261 dst.ptr[1] = E2; 262 dst.ptr[0] = F2; 263 264 return cast(__m128i) dst; 265 } 266 } 267 unittest 268 { 269 __m128i a = [15, 20, 130, 12345]; 270 __m128i b = [15, 20, 130, 12345]; 271 __m128i k = [15, 20, 130, 12345]; 272 __m128i result = _mm_sha256rnds2_epu32(a, b, k); 273 assert(result.array == [1384123044, -2050674062, 327754346, 956342016]); 274 } 275 276 private uint bitwiseRotateRight_uint(const uint value, const uint count) @safe 277 { 278 assert(count < 8 * uint.sizeof); 279 return cast(uint) ((value >> count) | (value << (uint.sizeof * 8 - count))); 280 }