1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module inteli.types; 8 9 10 pure: 11 nothrow: 12 @nogc: 13 14 version(GNU) 15 { 16 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 17 18 version(X86_64) 19 { 20 enum MMXSizedVectorsAreEmulated = false; 21 enum SSESizedVectorsAreEmulated = false; 22 23 // TODO: use D_AVX and D_AVX2 eventually to detect AVX? 24 enum AVXSizedVectorsAreEmulated = true; 25 26 import gcc.builtins; 27 28 // TODO: for performance, replace that anywhere possible by a GDC intrinsic 29 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted 30 { 31 enum Count = Vec.array.length; 32 static assert(mask.length == Count); 33 34 Vec r = void; 35 foreach(int i, m; mask) 36 { 37 static assert (m < Count * 2); 38 int ind = cast(int)m; 39 if (ind < Count) 40 r.ptr[i] = a.array[ind]; 41 else 42 r.ptr[i] = b.array[ind - Count]; 43 } 44 return r; 45 } 46 } 47 else 48 { 49 enum MMXSizedVectorsAreEmulated = true; 50 enum SSESizedVectorsAreEmulated = true; 51 enum AVXSizedVectorsAreEmulated = true; 52 } 53 } 54 else version(LDC) 55 { 56 public import ldc.simd; 57 58 // Use this alias to mention it should only be used with LDC, 59 // for example when emulated shufflevector would just be wasteful. 60 alias shufflevectorLDC = shufflevector; 61 62 enum MMXSizedVectorsAreEmulated = false; 63 enum SSESizedVectorsAreEmulated = false; 64 enum AVXSizedVectorsAreEmulated = false; 65 } 66 else version(DigitalMars) 67 { 68 public import core.simd; 69 70 // Note: turning this true is desirable, 71 // and leads to many bugs being discovered upstream. 72 // the fact that it works relies on many workardounds. 73 // in particular intel-intrinsics with this on is a honeypot for DMD backend bugs. 74 // What happends next is that contributors end up on a DMD bug in their PR. 75 // 76 // Failed attempts: xxx 77 // 78 static if (__VERSION__ >= 2100) 79 { 80 enum bool tryToEnableCoreSimdWithDMD = true; 81 } 82 else 83 { 84 enum bool tryToEnableCoreSimdWithDMD = false; 85 } 86 87 version(D_SIMD) 88 { 89 enum MMXSizedVectorsAreEmulated = true; 90 enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD; 91 92 // Note: with DMD, AVX-sized vectors can't be enabled yet. 93 /// On linux + x86_64, this will fail since a few operands seem to be missing. 94 version(D_AVX) 95 enum AVXSizedVectorsAreEmulated = true; 96 else 97 enum AVXSizedVectorsAreEmulated = true; 98 } 99 else 100 { 101 // Some DMD 32-bit targets don't have D_SIMD 102 enum MMXSizedVectorsAreEmulated = true; 103 enum SSESizedVectorsAreEmulated = true; 104 enum AVXSizedVectorsAreEmulated = true; 105 } 106 } 107 108 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated; 109 110 version(GNU) 111 enum bool DefineGenericLoadStoreUnaligned = false; 112 else 113 enum bool DefineGenericLoadStoreUnaligned = CoreSimdIsEmulated; 114 115 116 static if (CoreSimdIsEmulated) 117 { 118 // core.simd is emulated in some capacity: introduce `VectorOps` 119 120 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 121 { 122 enum Count = N; 123 alias Base = BaseType; 124 125 BaseType* ptr() return pure nothrow @nogc 126 { 127 return array.ptr; 128 } 129 130 // Unary operators 131 VectorType opUnary(string op)() pure nothrow @safe @nogc 132 { 133 VectorType res = void; 134 mixin("res.array[] = " ~ op ~ "array[];"); 135 return res; 136 } 137 138 // Binary operators 139 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 140 { 141 VectorType res = void; 142 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 143 return res; 144 } 145 146 // Assigning a BaseType value 147 void opAssign(BaseType e) pure nothrow @safe @nogc 148 { 149 array[] = e; 150 } 151 152 // Assigning a static array 153 void opAssign(ArrayType v) pure nothrow @safe @nogc 154 { 155 array[] = v[]; 156 } 157 158 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 159 { 160 mixin("array[] " ~ op ~ "= other.array[];"); 161 } 162 163 // Assigning a dyn array 164 this(ArrayType v) pure nothrow @safe @nogc 165 { 166 array[] = v[]; 167 } 168 169 // Broadcast constructor 170 this(BaseType x) pure nothrow @safe @nogc 171 { 172 array[] = x; 173 } 174 175 /// We can't support implicit conversion but do support explicit casting. 176 /// "Vector types of the same size can be implicitly converted among each other." 177 /// Casting to another vector type is always just a raw copy. 178 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 179 if (VecDest.sizeof == VectorType.sizeof) 180 { 181 VecDest dest = void; 182 // Copy 183 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 184 return dest; 185 } 186 187 ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc 188 { 189 return array[i]; 190 } 191 192 } 193 } 194 else 195 { 196 public import core.simd; 197 198 // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can 199 // And LDC sometimes need those unsigned vector types for some intrinsics. 200 // For internal use only. 201 package alias ushort8 = Vector!(ushort[8]); 202 package alias ubyte8 = Vector!(ubyte[8]); 203 package alias ubyte16 = Vector!(ubyte[16]); 204 } 205 206 static if (DefineGenericLoadStoreUnaligned) 207 { 208 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe if (Vec.sizeof < 32) 209 { 210 enum size_t Count = Vec.array.length; 211 static assert(mask.length == Count); 212 213 Vec r = void; 214 foreach(int i, m; mask) 215 { 216 static assert (m < Count * 2); 217 enum int ind = cast(int)m; 218 static if (ind < Count) 219 r.array[i] = a.array[ind]; 220 else 221 r.array[i] = b.array[ind-Count]; 222 } 223 return r; 224 } 225 } 226 227 // Emulate ldc.simd cmpMask and other masks. 228 // Note: these should be deprecated on non-LDC, 229 // since it's slower to generate that code. 230 version(LDC) 231 {} 232 else 233 { 234 private template BaseType(V) 235 { 236 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 237 } 238 239 private template TrueMask(V) 240 { 241 alias Elem = BaseType!V; 242 243 static if (is(Elem == float)) 244 { 245 immutable uint m1 = 0xffffffff; 246 enum Elem TrueMask = *cast(float*)(&m1); 247 } 248 else static if (is(Elem == double)) 249 { 250 immutable ulong m1 = 0xffffffff_ffffffff; 251 enum Elem TrueMask = *cast(double*)(&m1); 252 } 253 else // integer case 254 { 255 enum Elem TrueMask = -1; 256 } 257 } 258 259 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 260 { 261 enum size_t Count = Vec.array.length; 262 Vec result; 263 foreach(int i; 0..Count) 264 { 265 bool cond = a.array[i] == b.array[i]; 266 result.ptr[i] = cond ? TrueMask!Vec : 0; 267 } 268 return result; 269 } 270 271 Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison 272 { 273 enum size_t Count = Vec.array.length; 274 Vec result; 275 foreach(int i; 0..Count) 276 { 277 bool cond = a.array[i] != b.array[i]; 278 result.ptr[i] = cond ? TrueMask!Vec : 0; 279 } 280 return result; 281 } 282 283 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 284 { 285 enum size_t Count = Vec.array.length; 286 Vec result; 287 foreach(int i; 0..Count) 288 { 289 bool cond = a.array[i] > b.array[i]; 290 result.ptr[i] = cond ? TrueMask!Vec : 0; 291 } 292 return result; 293 } 294 } 295 296 unittest 297 { 298 float4 a = [1, 3, 5, 7]; 299 float4 b = [2, 3, 4, 5]; 300 int4 c = cast(int4)(greaterMask!float4(a, b)); 301 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 302 assert(c.array == correct); 303 } 304 305 static if (MMXSizedVectorsAreEmulated) 306 { 307 /// MMX-like SIMD types 308 struct float2 309 { 310 float[2] array; 311 mixin VectorOps!(float2, float[2]); 312 } 313 314 struct byte8 315 { 316 byte[8] array; 317 mixin VectorOps!(byte8, byte[8]); 318 } 319 320 struct short4 321 { 322 short[4] array; 323 mixin VectorOps!(short4, short[4]); 324 } 325 326 struct int2 327 { 328 int[2] array; 329 mixin VectorOps!(int2, int[2]); 330 } 331 332 struct long1 333 { 334 long[1] array; 335 mixin VectorOps!(long1, long[1]); 336 } 337 } 338 else 339 { 340 // For this compiler, defining MMX-sized vectors is working. 341 public import core.simd; 342 alias Vector!(long [1]) long1; 343 alias Vector!(float[2]) float2; 344 alias Vector!(int [2]) int2; 345 alias Vector!(short[4]) short4; 346 alias Vector!(byte [8]) byte8; 347 } 348 349 static assert(float2.sizeof == 8); 350 static assert(byte8.sizeof == 8); 351 static assert(short4.sizeof == 8); 352 static assert(int2.sizeof == 8); 353 static assert(long1.sizeof == 8); 354 355 356 static if (SSESizedVectorsAreEmulated) 357 { 358 /// SSE-like SIMD types 359 360 struct float4 361 { 362 float[4] array; 363 mixin VectorOps!(float4, float[4]); 364 } 365 366 struct byte16 367 { 368 byte[16] array; 369 mixin VectorOps!(byte16, byte[16]); 370 } 371 372 struct short8 373 { 374 short[8] array; 375 mixin VectorOps!(short8, short[8]); 376 } 377 378 struct int4 379 { 380 int[4] array; 381 mixin VectorOps!(int4, int[4]); 382 } 383 384 struct long2 385 { 386 long[2] array; 387 mixin VectorOps!(long2, long[2]); 388 } 389 390 struct double2 391 { 392 double[2] array; 393 mixin VectorOps!(double2, double[2]); 394 } 395 } 396 397 static assert(float4.sizeof == 16); 398 static assert(byte16.sizeof == 16); 399 static assert(short8.sizeof == 16); 400 static assert(int4.sizeof == 16); 401 static assert(long2.sizeof == 16); 402 static assert(double2.sizeof == 16); 403 404 405 static if (AVXSizedVectorsAreEmulated) 406 { 407 /// AVX-like SIMD types 408 409 struct float8 410 { 411 float[8] array; 412 mixin VectorOps!(float8, float[8]); 413 } 414 415 struct byte32 416 { 417 byte[32] array; 418 mixin VectorOps!(byte32, byte[32]); 419 } 420 421 struct short16 422 { 423 short[16] array; 424 mixin VectorOps!(short16, short[16]); 425 } 426 427 struct int8 428 { 429 int[8] array; 430 mixin VectorOps!(int8, int[8]); 431 } 432 433 struct long4 434 { 435 long[4] array; 436 mixin VectorOps!(long4, long[4]); 437 } 438 439 struct double4 440 { 441 double[4] array; 442 mixin VectorOps!(double4, double[4]); 443 } 444 } 445 else 446 { 447 public import core.simd; 448 } 449 static assert(float8.sizeof == 32); 450 static assert(byte32.sizeof == 32); 451 static assert(short16.sizeof == 32); 452 static assert(int8.sizeof == 32); 453 static assert(long4.sizeof == 32); 454 static assert(double4.sizeof == 32); 455 456 457 458 459 alias __m256 = float8; 460 alias __m256i = long4; // long long __vector with ICC, GCC, and clang 461 alias __m256d = double4; 462 alias __m128 = float4; 463 alias __m128i = int4; 464 alias __m128d = double2; 465 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 466 467 int _MM_SHUFFLE2(int x, int y) pure @safe 468 { 469 assert(x >= 0 && x <= 1); 470 assert(y >= 0 && y <= 1); 471 return (x << 1) | y; 472 } 473 474 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 475 { 476 assert(x >= 0 && x <= 3); 477 assert(y >= 0 && y <= 3); 478 assert(z >= 0 && z <= 3); 479 assert(w >= 0 && w <= 3); 480 return (z<<6) | (y<<4) | (x<<2) | w; 481 } 482 483 // test assignment from scalar to vector type 484 unittest 485 { 486 float4 A = 3.0f; 487 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 488 assert(A.array == correctA); 489 490 int2 B = 42; 491 int[2] correctB = [42, 42]; 492 assert(B.array == correctB); 493 }