1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module inteli.types; 8 9 10 pure: 11 nothrow: 12 @nogc: 13 14 version(GNU) 15 { 16 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 17 18 version(X86_64) 19 { 20 enum MMXSizedVectorsAreEmulated = false; 21 enum SSESizedVectorsAreEmulated = false; 22 23 import gcc.builtins; 24 25 float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4)) 26 { 27 return __builtin_ia32_loadups(pvec); 28 } 29 30 double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2)) 31 { 32 return __builtin_ia32_loadupd(pvec); 33 } 34 35 byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16)) 36 { 37 return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 38 } 39 40 short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8)) 41 { 42 return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 43 } 44 45 int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4)) 46 { 47 return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 48 } 49 50 long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2)) 51 { 52 return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 53 } 54 55 void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4)) 56 { 57 __builtin_ia32_storeups(pvec, v); 58 } 59 60 void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2)) 61 { 62 __builtin_ia32_storeupd(pvec, v); 63 } 64 65 void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16)) 66 { 67 __builtin_ia32_storedqu(cast(char*)pvec, v); 68 } 69 70 void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8)) 71 { 72 __builtin_ia32_storedqu(cast(char*)pvec, v); 73 } 74 75 void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4)) 76 { 77 __builtin_ia32_storedqu(cast(char*)pvec, v); 78 } 79 80 void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2)) 81 { 82 __builtin_ia32_storedqu(cast(char*)pvec, v); 83 } 84 85 // TODO: for performance, replace that anywhere possible by a GDC intrinsic 86 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted 87 { 88 enum Count = Vec.array.length; 89 static assert(mask.length == Count); 90 91 Vec r = void; 92 foreach(int i, m; mask) 93 { 94 static assert (m < Count * 2); 95 int ind = cast(int)m; 96 if (ind < Count) 97 r.ptr[i] = a.array[ind]; 98 else 99 r.ptr[i] = b.array[ind - Count]; 100 } 101 return r; 102 } 103 } 104 else 105 { 106 enum MMXSizedVectorsAreEmulated = true; 107 enum SSESizedVectorsAreEmulated = true; 108 } 109 } 110 else version(LDC) 111 { 112 public import ldc.simd; 113 114 // Use this alias to mention it should only be used with LDC, 115 // for example when emulated shufflevector would just be wasteful. 116 alias shufflevectorLDC = shufflevector; 117 118 enum MMXSizedVectorsAreEmulated = false; 119 enum SSESizedVectorsAreEmulated = false; 120 } 121 else version(DigitalMars) 122 { 123 public import core.simd; 124 125 version(D_SIMD) 126 { 127 enum MMXSizedVectorsAreEmulated = true; 128 enum SSESizedVectorsAreEmulated = true; // Should be false, but it is blocked by https://issues.dlang.org/show_bug.cgi?id=21474 129 } 130 else 131 { 132 // Some DMD 32-bit targets don't have D_SIMD 133 enum MMXSizedVectorsAreEmulated = true; 134 enum SSESizedVectorsAreEmulated = true; 135 } 136 } 137 138 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated; 139 140 static if (CoreSimdIsEmulated) 141 { 142 // core.simd is emulated in some capacity: introduce `VectorOps` 143 144 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 145 { 146 enum Count = N; 147 alias Base = BaseType; 148 149 BaseType* ptr() return pure nothrow @nogc 150 { 151 return array.ptr; 152 } 153 154 // Unary operators 155 VectorType opUnary(string op)() pure nothrow @safe @nogc 156 { 157 VectorType res = void; 158 mixin("res.array[] = " ~ op ~ "array[];"); 159 return res; 160 } 161 162 // Binary operators 163 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 164 { 165 VectorType res = void; 166 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 167 return res; 168 } 169 170 // Assigning a static array 171 void opAssign(ArrayType v) pure nothrow @safe @nogc 172 { 173 array[] = v[]; 174 } 175 176 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 177 { 178 mixin("array[] " ~ op ~ "= other.array[];"); 179 } 180 181 // Assigning a dyn array 182 this(ArrayType v) pure nothrow @safe @nogc 183 { 184 array[] = v[]; 185 } 186 187 // Broadcast constructor 188 this(BaseType x) pure nothrow @safe @nogc 189 { 190 array[] = x; 191 } 192 193 /// We can't support implicit conversion but do support explicit casting. 194 /// "Vector types of the same size can be implicitly converted among each other." 195 /// Casting to another vector type is always just a raw copy. 196 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 197 if (VecDest.sizeof == VectorType.sizeof) 198 { 199 VecDest dest = void; 200 // Copy 201 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 202 return dest; 203 } 204 205 ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc 206 { 207 return array[i]; 208 } 209 210 } 211 212 // they just weren't interesting enough, use v.array[i] instead. 213 deprecated auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted 214 { 215 static assert(Vec.sizeof == Vec2.sizeof); 216 import core.stdc.string: memcpy; 217 Vec v = void; 218 memcpy(&v, &vec, Vec2.sizeof); 219 return v.array[index]; 220 } 221 222 // they just weren't interesting enough, use v.ptr[i] = x instead. 223 deprecated auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted 224 { 225 static assert(Vec.sizeof == Vec2.sizeof); 226 import core.stdc.string: memcpy; 227 Vec v = void; 228 memcpy(&v, &vec, Vec2.sizeof); 229 v.array[index] = e; 230 return v; 231 } 232 233 template loadUnaligned(Vec) 234 { 235 // Note: can't be @safe with this signature 236 Vec loadUnaligned(const(BaseType!Vec)* pvec) @trusted 237 { 238 enum bool isVector = ( (Vec.sizeof == 8) && (!MMXSizedVectorsAreEmulated) 239 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) ); 240 241 static if (isVector) 242 { 243 // PERF: there is probably something faster to do for this compiler (DMD). 244 // Avoid this on DMD in the future. 245 enum size_t Count = Vec.array.length; 246 Vec result; 247 foreach(int i; 0..Count) 248 { 249 result.ptr[i] = pvec[i]; 250 } 251 return result; 252 } 253 else 254 { 255 // Since this vector is emulated, it doesn't have alignement constraints 256 // and as such we can just cast it. 257 return *cast(Vec*)(pvec); 258 } 259 } 260 } 261 262 template storeUnaligned(Vec) 263 { 264 // Note: can't be @safe with this signature 265 void storeUnaligned(Vec v, BaseType!Vec* pvec) @trusted 266 { 267 enum bool isVector = ( (Vec.sizeof == 8) && (!MMXSizedVectorsAreEmulated) 268 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) ); 269 270 static if (isVector) 271 { 272 // PERF: there is probably something faster to do for this compiler (DMD). 273 // Avoid this on DMD in the future. 274 enum size_t Count = Vec.array.length; 275 foreach(int i; 0..Count) 276 pvec[i] = v.array[i]; 277 } 278 else 279 { 280 *cast(Vec*)(pvec) = v; 281 } 282 } 283 } 284 285 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe 286 { 287 enum size_t Count = Vec.array.length; 288 static assert(mask.length == Count); 289 290 Vec r = void; 291 foreach(int i, m; mask) 292 { 293 static assert (m < Count * 2); 294 int ind = cast(int)m; 295 if (ind < Count) 296 r.array[i] = a.array[ind]; 297 else 298 r.array[i] = b.array[ind-Count]; 299 } 300 return r; 301 } 302 } 303 else 304 { 305 public import core.simd; 306 } 307 308 // Emulate ldc.simd cmpMask 309 version(LDC) 310 {} 311 else 312 { 313 private template BaseType(V) 314 { 315 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 316 } 317 318 private template TrueMask(V) 319 { 320 alias Elem = BaseType!V; 321 322 static if (is(Elem == float)) 323 { 324 immutable uint m1 = 0xffffffff; 325 enum Elem TrueMask = *cast(float*)(&m1); 326 } 327 else static if (is(Elem == double)) 328 { 329 immutable ulong m1 = 0xffffffff_ffffffff; 330 enum Elem TrueMask = *cast(double*)(&m1); 331 } 332 else // integer case 333 { 334 enum Elem TrueMask = -1; 335 } 336 } 337 338 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 339 { 340 enum size_t Count = Vec.array.length; 341 Vec result; 342 foreach(int i; 0..Count) 343 { 344 bool cond = a.array[i] == b.array[i]; 345 result.ptr[i] = cond ? TrueMask!Vec : 0; 346 } 347 return result; 348 } 349 350 Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison 351 { 352 enum size_t Count = Vec.array.length; 353 Vec result; 354 foreach(int i; 0..Count) 355 { 356 bool cond = a.array[i] != b.array[i]; 357 result.ptr[i] = cond ? TrueMask!Vec : 0; 358 } 359 return result; 360 } 361 362 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 363 { 364 enum size_t Count = Vec.array.length; 365 Vec result; 366 foreach(int i; 0..Count) 367 { 368 bool cond = a.array[i] > b.array[i]; 369 result.ptr[i] = cond ? TrueMask!Vec : 0; 370 } 371 return result; 372 } 373 374 Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oge" comparison 375 { 376 enum size_t Count = Vec.array.length; 377 Vec result; 378 foreach(int i; 0..Count) 379 { 380 bool cond = a.array[i] > b.array[i]; 381 result.ptr[i] = cond ? TrueMask!Vec : 0; 382 } 383 return result; 384 } 385 } 386 387 unittest 388 { 389 float4 a = [1, 3, 5, 7]; 390 float4 b = [2, 3, 4, 5]; 391 int4 c = cast(int4)(greaterMask!float4(a, b)); 392 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 393 assert(c.array == correct); 394 } 395 396 static if (MMXSizedVectorsAreEmulated) 397 { 398 /// MMX-like SIMD types 399 struct float2 400 { 401 float[2] array; 402 mixin VectorOps!(float2, float[2]); 403 404 private static float allOnes() pure nothrow @nogc @trusted 405 { 406 uint m1 = 0xffffffff; 407 return *cast(float*)(&m1); 408 } 409 } 410 411 struct byte8 412 { 413 byte[8] array; 414 mixin VectorOps!(byte8, byte[8]); 415 } 416 417 struct short4 418 { 419 short[4] array; 420 mixin VectorOps!(short4, short[4]); 421 } 422 423 struct int2 424 { 425 int[2] array; 426 mixin VectorOps!(int2, int[2]); 427 } 428 429 struct long1 430 { 431 long[1] array; 432 mixin VectorOps!(long1, long[1]); 433 } 434 } 435 else 436 { 437 // For this compiler, defining MMX-sized vectors is working. 438 public import core.simd; 439 alias Vector!(long [1]) long1; 440 alias Vector!(float[2]) float2; 441 alias Vector!(int [2]) int2; 442 alias Vector!(short[4]) short4; 443 alias Vector!(byte [8]) byte8; 444 } 445 446 static assert(float2.sizeof == 8); 447 static assert(byte8.sizeof == 8); 448 static assert(short4.sizeof == 8); 449 static assert(int2.sizeof == 8); 450 static assert(long1.sizeof == 8); 451 452 453 static if (SSESizedVectorsAreEmulated) 454 { 455 /// SSE-like SIMD types 456 457 struct float4 458 { 459 float[4] array; 460 mixin VectorOps!(float4, float[4]); 461 } 462 463 struct byte16 464 { 465 byte[16] array; 466 mixin VectorOps!(byte16, byte[16]); 467 } 468 469 struct short8 470 { 471 short[8] array; 472 mixin VectorOps!(short8, short[8]); 473 } 474 475 struct int4 476 { 477 int[4] array; 478 mixin VectorOps!(int4, int[4]); 479 } 480 481 struct long2 482 { 483 long[2] array; 484 mixin VectorOps!(long2, long[2]); 485 } 486 487 struct double2 488 { 489 double[2] array; 490 mixin VectorOps!(double2, double[2]); 491 } 492 } 493 494 static assert(float4.sizeof == 16); 495 static assert(byte16.sizeof == 16); 496 static assert(short8.sizeof == 16); 497 static assert(int4.sizeof == 16); 498 static assert(long2.sizeof == 16); 499 static assert(double2.sizeof == 16); 500 501 502 503 504 505 alias __m128 = float4; 506 alias __m128i = int4; 507 alias __m128d = double2; 508 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 509 510 int _MM_SHUFFLE2(int x, int y) pure @safe 511 { 512 assert(x >= 0 && x <= 1); 513 assert(y >= 0 && y <= 1); 514 return (x << 1) | y; 515 } 516 517 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 518 { 519 assert(x >= 0 && x <= 3); 520 assert(y >= 0 && y <= 3); 521 assert(z >= 0 && z <= 3); 522 assert(w >= 0 && w <= 3); 523 return (z<<6) | (y<<4) | (x<<2) | w; 524 } 525 526 // test assignment from scalar to vector type 527 unittest 528 { 529 float4 A = 3.0f; 530 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 531 assert(A.array == correctA); 532 533 int2 B = 42; 534 int[2] correctB = [42, 42]; 535 assert(B.array == correctB); 536 }