1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Auburn Sounds 2016-2018, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 * Authors: Guillaume Piolat 7 */ 8 module inteli.types; 9 10 version(GNU) 11 { 12 version(X86_64) 13 { 14 enum CoreSimdIsEmulated = false; 15 16 public import core.simd; 17 import gcc.builtins; 18 19 // Declare vector types that correspond to MMX types 20 // Because they are expressible in IR anyway. 21 alias Vector!(long [1]) long1; 22 alias Vector!(float[2]) float2; 23 alias Vector!(int [2]) int2; 24 alias Vector!(short[4]) short4; 25 alias Vector!(byte [8]) byte8; 26 27 float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4)) 28 { 29 return __builtin_ia32_loadups(pvec); 30 } 31 32 double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2)) 33 { 34 return __builtin_ia32_loadupd(pvec); 35 } 36 37 byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16)) 38 { 39 return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 40 } 41 42 short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8)) 43 { 44 return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 45 } 46 47 int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4)) 48 { 49 return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 50 } 51 52 long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2)) 53 { 54 return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec); 55 } 56 57 void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4)) 58 { 59 __builtin_ia32_storeups(pvec, v); 60 } 61 62 void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2)) 63 { 64 __builtin_ia32_storeupd(pvec, v); 65 } 66 67 void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16)) 68 { 69 __builtin_ia32_storedqu(cast(char*)pvec, v); 70 } 71 72 void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8)) 73 { 74 __builtin_ia32_storedqu(cast(char*)pvec, v); 75 } 76 77 void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4)) 78 { 79 __builtin_ia32_storedqu(cast(char*)pvec, v); 80 } 81 82 void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2)) 83 { 84 __builtin_ia32_storedqu(cast(char*)pvec, v); 85 } 86 87 // TODO: for performance, replace that anywhere possible by a GDC intrinsic 88 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted 89 { 90 enum Count = Vec.array.length; 91 static assert(mask.length == Count); 92 93 Vec r = void; 94 foreach(int i, m; mask) 95 { 96 static assert (m < Count * 2); 97 int ind = cast(int)m; 98 if (ind < Count) 99 r.ptr[i] = a.array[ind]; 100 else 101 r.ptr[i] = b.array[ind - Count]; 102 } 103 return r; 104 } 105 } 106 else 107 { 108 enum CoreSimdIsEmulated = true; 109 } 110 } 111 else version(LDC) 112 { 113 public import core.simd; 114 public import ldc.simd; 115 116 // Declare vector types that correspond to MMX types 117 // Because they are expressible in IR anyway. 118 alias Vector!(long [1]) long1; 119 alias Vector!(float[2]) float2; 120 alias Vector!(int [2]) int2; 121 alias Vector!(short[4]) short4; 122 alias Vector!(byte [8]) byte8; 123 124 enum CoreSimdIsEmulated = false; 125 } 126 else version(DigitalMars) 127 { 128 enum CoreSimdIsEmulated = true; // TODO: use core.simd with DMD when D_SIMD is defined 129 } 130 131 static if (CoreSimdIsEmulated) 132 { 133 // This is a LDC SIMD emulation layer, for use with other D compilers. 134 // The goal is to be very similar in precision. 135 // The biggest differences are: 136 // 137 // 1. `cast` everywhere. With LDC vector types, short8 is implicitely convertible to int4 138 // but this is sadly impossible in D without D_SIMD (Windows 32-bit). 139 // 140 // 2. `vec.array` is directly writeable. 141 142 nothrow: 143 @nogc: 144 pure: 145 146 147 /// MMX-like SIMD types 148 struct float2 149 { 150 float[2] array; 151 mixin VectorOps!(float2, float[2]); 152 153 enum float TrueMask = allOnes(); 154 enum float FalseMask = 0.0f; 155 156 private static float allOnes() 157 { 158 uint m1 = 0xffffffff; 159 return *cast(float*)(&m1); 160 } 161 } 162 163 struct byte8 164 { 165 byte[8] array; 166 mixin VectorOps!(byte8, byte[8]); 167 enum byte TrueMask = -1; 168 enum byte FalseMask = 0; 169 } 170 171 struct short4 172 { 173 short[4] array; 174 mixin VectorOps!(short4, short[4]); 175 enum short TrueMask = -1; 176 enum short FalseMask = 0; 177 } 178 179 struct int2 180 { 181 int[2] array; 182 mixin VectorOps!(int2, int[2]); 183 enum int TrueMask = -1; 184 enum int FalseMask = 0; 185 } 186 187 struct long1 188 { 189 long[1] array; 190 mixin VectorOps!(long1, long[1]); 191 enum long TrueMask = -1; 192 enum long FalseMask = 0; 193 } 194 195 static assert(float2.sizeof == 8); 196 static assert(byte8.sizeof == 8); 197 static assert(short4.sizeof == 8); 198 static assert(int2.sizeof == 8); 199 static assert(long1.sizeof == 8); 200 201 202 /// SSE-like SIMD types 203 204 struct float4 205 { 206 float[4] array; 207 mixin VectorOps!(float4, float[4]); 208 209 enum float TrueMask = allOnes(); 210 enum float FalseMask = 0.0f; 211 212 private static float allOnes() 213 { 214 uint m1 = 0xffffffff; 215 return *cast(float*)(&m1); 216 } 217 } 218 219 struct byte16 220 { 221 byte[16] array; 222 mixin VectorOps!(byte16, byte[16]); 223 enum byte TrueMask = -1; 224 enum byte FalseMask = 0; 225 } 226 227 struct short8 228 { 229 short[8] array; 230 mixin VectorOps!(short8, short[8]); 231 enum short TrueMask = -1; 232 enum short FalseMask = 0; 233 } 234 235 struct int4 236 { 237 int[4] array; 238 mixin VectorOps!(int4, int[4]); 239 enum int TrueMask = -1; 240 enum int FalseMask = 0; 241 } 242 243 struct long2 244 { 245 long[2] array; 246 mixin VectorOps!(long2, long[2]); 247 enum long TrueMask = -1; 248 enum long FalseMask = 0; 249 } 250 251 struct double2 252 { 253 double[2] array; 254 mixin VectorOps!(double2, double[2]); 255 256 enum double TrueMask = allOnes(); 257 enum double FalseMask = 0.0f; 258 259 private static double allOnes() 260 { 261 ulong m1 = 0xffffffff_ffffffff; 262 return *cast(double*)(&m1); 263 } 264 } 265 266 static assert(float4.sizeof == 16); 267 static assert(byte16.sizeof == 16); 268 static assert(short8.sizeof == 16); 269 static assert(int4.sizeof == 16); 270 static assert(long2.sizeof == 16); 271 static assert(double2.sizeof == 16); 272 273 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 274 { 275 enum Count = N; 276 alias Base = BaseType; 277 278 BaseType* ptr() return pure nothrow @nogc 279 { 280 return array.ptr; 281 } 282 283 // Unary operators 284 VectorType opUnary(string op)() pure nothrow @safe @nogc 285 { 286 VectorType res = void; 287 mixin("res.array[] = " ~ op ~ "array[];"); 288 return res; 289 } 290 291 // Binary operators 292 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 293 { 294 VectorType res = void; 295 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 296 return res; 297 } 298 299 // Assigning a static array 300 void opAssign(ArrayType v) pure nothrow @safe @nogc 301 { 302 array[] = v[]; 303 } 304 305 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 306 { 307 mixin("array[] " ~ op ~ "= other.array[];"); 308 } 309 310 // Assigning a dyn array 311 this(ArrayType v) pure nothrow @safe @nogc 312 { 313 array[] = v[]; 314 } 315 316 // Broadcast constructor 317 this(BaseType x) pure nothrow @safe @nogc 318 { 319 array[] = x; 320 } 321 322 /// We can't support implicit conversion but do support explicit casting. 323 /// "Vector types of the same size can be implicitly converted among each other." 324 /// Casting to another vector type is always just a raw copy. 325 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 326 if (VecDest.sizeof == VectorType.sizeof) 327 { 328 // import core.stdc.string: memcpy; 329 VecDest dest = void; 330 // Copy 331 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 332 return dest; 333 } 334 335 ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc 336 { 337 return array[i]; 338 } 339 340 } 341 342 auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted 343 { 344 static assert(Vec.sizeof == Vec2.sizeof); 345 import core.stdc.string: memcpy; 346 Vec v = void; 347 memcpy(&v, &vec, Vec2.sizeof); 348 return v.array[index]; 349 } 350 351 auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted 352 { 353 static assert(Vec.sizeof == Vec2.sizeof); 354 import core.stdc.string: memcpy; 355 Vec v = void; 356 memcpy(&v, &vec, Vec2.sizeof); 357 v.array[index] = e; 358 return v; 359 } 360 361 // Note: can't be @safe with this signature 362 Vec loadUnaligned(Vec)(const(Vec.Base)* pvec) @trusted 363 { 364 return *cast(Vec*)(pvec); 365 } 366 367 // Note: can't be @safe with this signature 368 void storeUnaligned(Vec)(Vec v, Vec.Base* pvec) @trusted 369 { 370 *cast(Vec*)(pvec) = v; 371 } 372 373 Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe 374 { 375 static assert(mask.length == Vec.Count); 376 377 Vec r = void; 378 foreach(int i, m; mask) 379 { 380 static assert (m < Vec.Count * 2); 381 int ind = cast(int)m; 382 if (ind < Vec.Count) 383 r.array[i] = a.array[ind]; 384 else 385 r.array[i] = b.array[ind-Vec.Count]; 386 } 387 return r; 388 } 389 390 // emulate ldc.simd cmpMask 391 392 Vec equalMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oeq" comparison 393 { 394 alias BaseType = Vec.Base; 395 alias Count = Vec.Count; 396 Vec result; 397 foreach(int i; 0..Count) 398 { 399 bool cond = a.array[i] == b.array[i]; 400 result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask; 401 } 402 return result; 403 } 404 405 Vec notEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "one" comparison 406 { 407 alias BaseType = Vec.Base; 408 alias Count = Vec.Count; 409 Vec result; 410 foreach(int i; 0..Count) 411 { 412 bool cond = a.array[i] != b.array[i]; 413 result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask; 414 } 415 return result; 416 } 417 418 Vec greaterMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "ogt" comparison 419 { 420 alias BaseType = Vec.Base; 421 alias Count = Vec.Count; 422 Vec result; 423 foreach(int i; 0..Count) 424 { 425 bool cond = a.array[i] > b.array[i]; 426 result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask; 427 } 428 return result; 429 } 430 431 Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oge" comparison 432 { 433 alias BaseType = Vec.Base; 434 alias Count = Vec.Count; 435 Vec result; 436 foreach(int i; 0..Count) 437 { 438 bool cond = a.array[i] > b.array[i]; 439 result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask; 440 } 441 return result; 442 } 443 444 unittest 445 { 446 float4 a = [1, 3, 5, 7]; 447 float4 b = [2, 3, 4, 5]; 448 int4 c = cast(int4)(greaterMask!float4(a, b)); 449 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 450 assert(c.array == correct); 451 } 452 } 453 454 nothrow: 455 @nogc: 456 457 alias __m128 = float4; 458 alias __m128i = int4; 459 alias __m128d = double2; 460 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 461 462 int _MM_SHUFFLE2(int x, int y) pure @safe 463 { 464 assert(x >= 0 && x <= 1); 465 assert(y >= 0 && y <= 1); 466 return (x << 1) | y; 467 } 468 469 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 470 { 471 assert(x >= 0 && x <= 3); 472 assert(y >= 0 && y <= 3); 473 assert(z >= 0 && z <= 3); 474 assert(w >= 0 && w <= 3); 475 return (z<<6) | (y<<4) | (x<<2) | w; 476 } 477 478 // test assignment from scalar to vector type 479 unittest 480 { 481 float4 A = 3.0f; 482 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 483 assert(A.array == correctA); 484 485 int2 B = 42; 486 int[2] correctB = [42, 42]; 487 assert(B.array == correctB); 488 }