1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module inteli.types; 8 9 10 pure: 11 nothrow: 12 @nogc: 13 14 version(GNU) 15 { 16 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 17 18 version(X86_64) 19 { 20 enum MMXSizedVectorsAreEmulated = false; 21 enum SSESizedVectorsAreEmulated = false; 22 23 import gcc.builtins; 24 25 float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4)) 26 { 27 return __builtin_ia32_loadups(pvec); 28 } 29 30 double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2)) 31 { 32 return __builtin_ia32_loadupd(pvec); 33 } 34 35 byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16)) 36 { 37 return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 38 } 39 40 short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8)) 41 { 42 return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 43 } 44 45 int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4)) 46 { 47 return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 48 } 49 50 long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2)) 51 { 52 return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 53 } 54 55 void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4)) 56 { 57 __builtin_ia32_storeups(pvec, v); 58 } 59 60 void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2)) 61 { 62 __builtin_ia32_storeupd(pvec, v); 63 } 64 65 void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16)) 66 { 67 __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v); 68 } 69 70 void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8)) 71 { 72 __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v); 73 } 74 75 void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4)) 76 { 77 __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v); 78 } 79 80 void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2)) 81 { 82 __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v); 83 } 84 85 // TODO: for performance, replace that anywhere possible by a GDC intrinsic 86 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted 87 { 88 enum Count = Vec.array.length; 89 static assert(mask.length == Count); 90 91 Vec r = void; 92 foreach(int i, m; mask) 93 { 94 static assert (m < Count * 2); 95 int ind = cast(int)m; 96 if (ind < Count) 97 r.ptr[i] = a.array[ind]; 98 else 99 r.ptr[i] = b.array[ind - Count]; 100 } 101 return r; 102 } 103 } 104 else 105 { 106 enum MMXSizedVectorsAreEmulated = true; 107 enum SSESizedVectorsAreEmulated = true; 108 } 109 } 110 else version(LDC) 111 { 112 public import ldc.simd; 113 114 // Use this alias to mention it should only be used with LDC, 115 // for example when emulated shufflevector would just be wasteful. 116 alias shufflevectorLDC = shufflevector; 117 118 enum MMXSizedVectorsAreEmulated = false; 119 enum SSESizedVectorsAreEmulated = false; 120 } 121 else version(DigitalMars) 122 { 123 public import core.simd; 124 125 version(D_SIMD) 126 { 127 enum MMXSizedVectorsAreEmulated = true; 128 129 static if (__VERSION__ >= 2096) 130 { 131 enum SSESizedVectorsAreEmulated = true; // Still doesn't work well in DMD 2.096 because of DMD bugs 132 } 133 else 134 { 135 // Before DMD 2.096, blocked by https://issues.dlang.org/show_bug.cgi?id=21474 136 enum SSESizedVectorsAreEmulated = true; 137 } 138 } 139 else 140 { 141 // Some DMD 32-bit targets don't have D_SIMD 142 enum MMXSizedVectorsAreEmulated = true; 143 enum SSESizedVectorsAreEmulated = true; 144 } 145 } 146 147 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated; 148 149 static if (CoreSimdIsEmulated) 150 { 151 // core.simd is emulated in some capacity: introduce `VectorOps` 152 153 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 154 { 155 enum Count = N; 156 alias Base = BaseType; 157 158 BaseType* ptr() return pure nothrow @nogc 159 { 160 return array.ptr; 161 } 162 163 // Unary operators 164 VectorType opUnary(string op)() pure nothrow @safe @nogc 165 { 166 VectorType res = void; 167 mixin("res.array[] = " ~ op ~ "array[];"); 168 return res; 169 } 170 171 // Binary operators 172 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 173 { 174 VectorType res = void; 175 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 176 return res; 177 } 178 179 // Assigning a BaseType value 180 void opAssign(BaseType e) pure nothrow @safe @nogc 181 { 182 array[] = e; 183 } 184 185 // Assigning a static array 186 void opAssign(ArrayType v) pure nothrow @safe @nogc 187 { 188 array[] = v[]; 189 } 190 191 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 192 { 193 mixin("array[] " ~ op ~ "= other.array[];"); 194 } 195 196 // Assigning a dyn array 197 this(ArrayType v) pure nothrow @safe @nogc 198 { 199 array[] = v[]; 200 } 201 202 // Broadcast constructor 203 this(BaseType x) pure nothrow @safe @nogc 204 { 205 array[] = x; 206 } 207 208 /// We can't support implicit conversion but do support explicit casting. 209 /// "Vector types of the same size can be implicitly converted among each other." 210 /// Casting to another vector type is always just a raw copy. 211 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 212 if (VecDest.sizeof == VectorType.sizeof) 213 { 214 VecDest dest = void; 215 // Copy 216 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 217 return dest; 218 } 219 220 ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc 221 { 222 return array[i]; 223 } 224 225 } 226 227 // they just weren't interesting enough, use v.array[i] instead. 228 deprecated auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted 229 { 230 static assert(Vec.sizeof == Vec2.sizeof); 231 import core.stdc.string: memcpy; 232 Vec v = void; 233 memcpy(&v, &vec, Vec2.sizeof); 234 return v.array[index]; 235 } 236 237 // they just weren't interesting enough, use v.ptr[i] = x instead. 238 deprecated auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted 239 { 240 static assert(Vec.sizeof == Vec2.sizeof); 241 import core.stdc.string: memcpy; 242 Vec v = void; 243 memcpy(&v, &vec, Vec2.sizeof); 244 v.array[index] = e; 245 return v; 246 } 247 248 template loadUnaligned(Vec) 249 { 250 // Note: can't be @safe with this signature 251 Vec loadUnaligned(const(BaseType!Vec)* pvec) @trusted 252 { 253 enum bool isVector = ( (Vec.sizeof == 8) && (!MMXSizedVectorsAreEmulated) 254 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) ); 255 256 static if (isVector) 257 { 258 // PERF DMD 259 /* enabling this need to move loadUnaligned and storeUnaligned to internals.d 260 static if (DMD_with_DSIMD && Vec.sizeof == 8) 261 { 262 static if (is(Vec == double2)) 263 return cast(Vec)__simd(XMM.LODUPD, *pvec); 264 else static if (is(Vec == float4)) 265 return cast(Vec)__simd(XMM.LODUPS, *pvec); 266 else 267 return cast(Vec)__simd(XMM.LODDQU, *pvec); 268 } 269 else */ 270 { 271 enum size_t Count = Vec.array.length; 272 Vec result; 273 foreach(int i; 0..Count) 274 { 275 result.ptr[i] = pvec[i]; 276 } 277 return result; 278 } 279 } 280 else 281 { 282 // Since this vector is emulated, it doesn't have alignement constraints 283 // and as such we can just cast it. 284 return *cast(Vec*)(pvec); 285 } 286 } 287 } 288 289 template storeUnaligned(Vec) 290 { 291 // Note: can't be @safe with this signature 292 void storeUnaligned(Vec v, BaseType!Vec* pvec) @trusted 293 { 294 enum bool isVector = ( (Vec.sizeof == 8) && (!MMXSizedVectorsAreEmulated) 295 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) ); 296 297 static if (isVector) 298 { 299 // PERF DMD 300 /* enabling this need to move loadUnaligned and storeUnaligned to internals.d 301 static if (DMD_with_DSIMD && Vec.sizeof == 8) 302 { 303 static if (is(Vec == double2)) 304 __simd_sto(XMM.STOUPD, *pvec, value); 305 else static if (is(Vec == float4)) 306 __simd_sto(XMM.STOUPS, *pvec, value); 307 else 308 __simd_sto(XMM.STODQU, *pvec, value); 309 } 310 else*/ 311 { 312 enum size_t Count = Vec.array.length; 313 foreach(int i; 0..Count) 314 pvec[i] = v.array[i]; 315 } 316 } 317 else 318 { 319 *cast(Vec*)(pvec) = v; 320 } 321 } 322 } 323 324 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe 325 { 326 enum size_t Count = Vec.array.length; 327 static assert(mask.length == Count); 328 329 Vec r = void; 330 foreach(int i, m; mask) 331 { 332 static assert (m < Count * 2); 333 enum int ind = cast(int)m; 334 static if (ind < Count) 335 r.array[i] = a.array[ind]; 336 else 337 r.array[i] = b.array[ind-Count]; 338 } 339 return r; 340 } 341 } 342 else 343 { 344 public import core.simd; 345 346 // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can 347 // And LDC sometimes need those unsigned vector types for some intrinsics. 348 // For internal use only. 349 package alias ushort8 = Vector!(ushort[8]); 350 package alias ubyte8 = Vector!(ubyte[8]); 351 package alias ubyte16 = Vector!(ubyte[16]); 352 } 353 354 // Emulate ldc.simd cmpMask 355 version(LDC) 356 {} 357 else 358 { 359 private template BaseType(V) 360 { 361 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 362 } 363 364 private template TrueMask(V) 365 { 366 alias Elem = BaseType!V; 367 368 static if (is(Elem == float)) 369 { 370 immutable uint m1 = 0xffffffff; 371 enum Elem TrueMask = *cast(float*)(&m1); 372 } 373 else static if (is(Elem == double)) 374 { 375 immutable ulong m1 = 0xffffffff_ffffffff; 376 enum Elem TrueMask = *cast(double*)(&m1); 377 } 378 else // integer case 379 { 380 enum Elem TrueMask = -1; 381 } 382 } 383 384 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 385 { 386 enum size_t Count = Vec.array.length; 387 Vec result; 388 foreach(int i; 0..Count) 389 { 390 bool cond = a.array[i] == b.array[i]; 391 result.ptr[i] = cond ? TrueMask!Vec : 0; 392 } 393 return result; 394 } 395 396 Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison 397 { 398 enum size_t Count = Vec.array.length; 399 Vec result; 400 foreach(int i; 0..Count) 401 { 402 bool cond = a.array[i] != b.array[i]; 403 result.ptr[i] = cond ? TrueMask!Vec : 0; 404 } 405 return result; 406 } 407 408 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 409 { 410 enum size_t Count = Vec.array.length; 411 Vec result; 412 foreach(int i; 0..Count) 413 { 414 bool cond = a.array[i] > b.array[i]; 415 result.ptr[i] = cond ? TrueMask!Vec : 0; 416 } 417 return result; 418 } 419 420 Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oge" comparison 421 { 422 enum size_t Count = Vec.array.length; 423 Vec result; 424 foreach(int i; 0..Count) 425 { 426 bool cond = a.array[i] > b.array[i]; 427 result.ptr[i] = cond ? TrueMask!Vec : 0; 428 } 429 return result; 430 } 431 } 432 433 unittest 434 { 435 float4 a = [1, 3, 5, 7]; 436 float4 b = [2, 3, 4, 5]; 437 int4 c = cast(int4)(greaterMask!float4(a, b)); 438 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 439 assert(c.array == correct); 440 } 441 442 static if (MMXSizedVectorsAreEmulated) 443 { 444 /// MMX-like SIMD types 445 struct float2 446 { 447 float[2] array; 448 mixin VectorOps!(float2, float[2]); 449 } 450 451 struct byte8 452 { 453 byte[8] array; 454 mixin VectorOps!(byte8, byte[8]); 455 } 456 457 struct short4 458 { 459 short[4] array; 460 mixin VectorOps!(short4, short[4]); 461 } 462 463 struct int2 464 { 465 int[2] array; 466 mixin VectorOps!(int2, int[2]); 467 } 468 469 struct long1 470 { 471 long[1] array; 472 mixin VectorOps!(long1, long[1]); 473 } 474 } 475 else 476 { 477 // For this compiler, defining MMX-sized vectors is working. 478 public import core.simd; 479 alias Vector!(long [1]) long1; 480 alias Vector!(float[2]) float2; 481 alias Vector!(int [2]) int2; 482 alias Vector!(short[4]) short4; 483 alias Vector!(byte [8]) byte8; 484 } 485 486 static assert(float2.sizeof == 8); 487 static assert(byte8.sizeof == 8); 488 static assert(short4.sizeof == 8); 489 static assert(int2.sizeof == 8); 490 static assert(long1.sizeof == 8); 491 492 493 static if (SSESizedVectorsAreEmulated) 494 { 495 /// SSE-like SIMD types 496 497 struct float4 498 { 499 float[4] array; 500 mixin VectorOps!(float4, float[4]); 501 } 502 503 struct byte16 504 { 505 byte[16] array; 506 mixin VectorOps!(byte16, byte[16]); 507 } 508 509 struct short8 510 { 511 short[8] array; 512 mixin VectorOps!(short8, short[8]); 513 } 514 515 struct int4 516 { 517 int[4] array; 518 mixin VectorOps!(int4, int[4]); 519 } 520 521 struct long2 522 { 523 long[2] array; 524 mixin VectorOps!(long2, long[2]); 525 } 526 527 struct double2 528 { 529 double[2] array; 530 mixin VectorOps!(double2, double[2]); 531 } 532 } 533 534 static assert(float4.sizeof == 16); 535 static assert(byte16.sizeof == 16); 536 static assert(short8.sizeof == 16); 537 static assert(int4.sizeof == 16); 538 static assert(long2.sizeof == 16); 539 static assert(double2.sizeof == 16); 540 541 542 543 544 545 alias __m128 = float4; 546 alias __m128i = int4; 547 alias __m128d = double2; 548 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 549 550 int _MM_SHUFFLE2(int x, int y) pure @safe 551 { 552 assert(x >= 0 && x <= 1); 553 assert(y >= 0 && y <= 1); 554 return (x << 1) | y; 555 } 556 557 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 558 { 559 assert(x >= 0 && x <= 3); 560 assert(y >= 0 && y <= 3); 561 assert(z >= 0 && z <= 3); 562 assert(w >= 0 && w <= 3); 563 return (z<<6) | (y<<4) | (x<<2) | w; 564 } 565 566 // test assignment from scalar to vector type 567 unittest 568 { 569 float4 A = 3.0f; 570 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 571 assert(A.array == correctA); 572 573 int2 B = 42; 574 int[2] correctB = [42, 42]; 575 assert(B.array == correctB); 576 }