1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module inteli.types; 8 9 10 pure: 11 nothrow: 12 @nogc: 13 14 version(GNU) 15 { 16 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 17 18 // Future: just detect vectors, do not base upon arch. 19 20 version(X86_64) 21 { 22 enum MMXSizedVectorsAreEmulated = false; 23 enum SSESizedVectorsAreEmulated = false; 24 25 // Does GDC support AVX-sized vectors? 26 static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only. 27 { 28 enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 29 } 30 else 31 { 32 enum AVXSizedVectorsAreEmulated = true; 33 } 34 35 import gcc.builtins; 36 } 37 else 38 { 39 enum MMXSizedVectorsAreEmulated = true; 40 enum SSESizedVectorsAreEmulated = true; 41 enum AVXSizedVectorsAreEmulated = true; 42 } 43 } 44 else version(LDC) 45 { 46 public import ldc.simd; 47 48 // Use this alias to mention it should only be used with LDC, 49 // for example when emulated shufflevector would just be wasteful. 50 alias shufflevectorLDC = shufflevector; 51 52 enum MMXSizedVectorsAreEmulated = false; 53 enum SSESizedVectorsAreEmulated = false; 54 enum AVXSizedVectorsAreEmulated = false; 55 } 56 else version(DigitalMars) 57 { 58 public import core.simd; 59 60 // Note: turning this true is desirable, 61 // and leads to many bugs being discovered upstream. 62 // the fact that it works relies on many workardounds. 63 // in particular intel-intrinsics with this on is a honeypot for DMD backend bugs. 64 // What happends next is that contributors end up on a DMD bug in their PR. 65 // 66 // Failed attempts: xxx 67 // 68 static if (__VERSION__ >= 2100) 69 { 70 enum bool tryToEnableCoreSimdWithDMD = true; 71 } 72 else 73 { 74 enum bool tryToEnableCoreSimdWithDMD = false; 75 } 76 77 version(D_SIMD) 78 { 79 enum MMXSizedVectorsAreEmulated = true; 80 enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD; 81 82 // Note: with DMD, AVX-sized vectors can't be enabled yet. 83 /// On linux + x86_64, this will fail since a few operands seem to be missing. 84 version(D_AVX) 85 enum AVXSizedVectorsAreEmulated = true; 86 else 87 enum AVXSizedVectorsAreEmulated = true; 88 } 89 else 90 { 91 // Some DMD 32-bit targets don't have D_SIMD 92 enum MMXSizedVectorsAreEmulated = true; 93 enum SSESizedVectorsAreEmulated = true; 94 enum AVXSizedVectorsAreEmulated = true; 95 } 96 } 97 98 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated; 99 100 static if (CoreSimdIsEmulated) 101 { 102 // core.simd is emulated in some capacity: introduce `VectorOps` 103 104 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 105 { 106 enum Count = N; 107 alias Base = BaseType; 108 109 BaseType* ptr() return pure nothrow @nogc 110 { 111 return array.ptr; 112 } 113 114 // Unary operators 115 VectorType opUnary(string op)() pure nothrow @safe @nogc 116 { 117 VectorType res = void; 118 mixin("res.array[] = " ~ op ~ "array[];"); 119 return res; 120 } 121 122 // Binary operators 123 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 124 { 125 VectorType res = void; 126 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 127 return res; 128 } 129 130 // Assigning a BaseType value 131 void opAssign(BaseType e) pure nothrow @safe @nogc 132 { 133 array[] = e; 134 } 135 136 // Assigning a static array 137 void opAssign(ArrayType v) pure nothrow @safe @nogc 138 { 139 array[] = v[]; 140 } 141 142 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 143 { 144 mixin("array[] " ~ op ~ "= other.array[];"); 145 } 146 147 // Assigning a dyn array 148 this(ArrayType v) pure nothrow @safe @nogc 149 { 150 array[] = v[]; 151 } 152 153 // Broadcast constructor 154 this(BaseType x) pure nothrow @safe @nogc 155 { 156 array[] = x; 157 } 158 159 /// We can't support implicit conversion but do support explicit casting. 160 /// "Vector types of the same size can be implicitly converted among each other." 161 /// Casting to another vector type is always just a raw copy. 162 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 163 if (VecDest.sizeof == VectorType.sizeof) 164 { 165 VecDest dest = void; 166 // Copy 167 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 168 return dest; 169 } 170 171 ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc 172 { 173 return array[i]; 174 } 175 176 } 177 } 178 else 179 { 180 public import core.simd; 181 182 // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can 183 // And GDC sometimes need those unsigned vector types for some intrinsics. 184 // For internal use only. 185 package alias ushort8 = Vector!(ushort[8]); 186 package alias ubyte8 = Vector!(ubyte[8]); 187 package alias ubyte16 = Vector!(ubyte[16]); 188 189 static if (!AVXSizedVectorsAreEmulated) 190 { 191 package alias ushort16 = Vector!(ushort[16]); 192 package alias ubyte32 = Vector!(ubyte[32]); 193 } 194 } 195 196 // Emulate ldc.simd cmpMask and other masks. 197 // Note: these should be deprecated on non-LDC, 198 // since it's slower to generate that code. 199 version(LDC) 200 {} 201 else 202 { 203 private template BaseType(V) 204 { 205 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 206 } 207 208 private template TrueMask(V) 209 { 210 alias Elem = BaseType!V; 211 212 static if (is(Elem == float)) 213 { 214 immutable uint m1 = 0xffffffff; 215 enum Elem TrueMask = *cast(float*)(&m1); 216 } 217 else static if (is(Elem == double)) 218 { 219 immutable ulong m1 = 0xffffffff_ffffffff; 220 enum Elem TrueMask = *cast(double*)(&m1); 221 } 222 else // integer case 223 { 224 enum Elem TrueMask = -1; 225 } 226 } 227 228 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 229 { 230 enum size_t Count = Vec.array.length; 231 Vec result; 232 foreach(int i; 0..Count) 233 { 234 bool cond = a.array[i] == b.array[i]; 235 result.ptr[i] = cond ? TrueMask!Vec : 0; 236 } 237 return result; 238 } 239 240 Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison 241 { 242 enum size_t Count = Vec.array.length; 243 Vec result; 244 foreach(int i; 0..Count) 245 { 246 bool cond = a.array[i] != b.array[i]; 247 result.ptr[i] = cond ? TrueMask!Vec : 0; 248 } 249 return result; 250 } 251 252 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 253 { 254 enum size_t Count = Vec.array.length; 255 Vec result; 256 foreach(int i; 0..Count) 257 { 258 bool cond = a.array[i] > b.array[i]; 259 result.ptr[i] = cond ? TrueMask!Vec : 0; 260 } 261 return result; 262 } 263 } 264 265 unittest 266 { 267 float4 a = [1, 3, 5, 7]; 268 float4 b = [2, 3, 4, 5]; 269 int4 c = cast(int4)(greaterMask!float4(a, b)); 270 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 271 assert(c.array == correct); 272 } 273 274 static if (MMXSizedVectorsAreEmulated) 275 { 276 /// MMX-like SIMD types 277 struct float2 278 { 279 float[2] array; 280 mixin VectorOps!(float2, float[2]); 281 } 282 283 struct byte8 284 { 285 byte[8] array; 286 mixin VectorOps!(byte8, byte[8]); 287 } 288 289 struct short4 290 { 291 short[4] array; 292 mixin VectorOps!(short4, short[4]); 293 } 294 295 struct int2 296 { 297 int[2] array; 298 mixin VectorOps!(int2, int[2]); 299 } 300 301 struct long1 302 { 303 long[1] array; 304 mixin VectorOps!(long1, long[1]); 305 } 306 } 307 else 308 { 309 // For this compiler, defining MMX-sized vectors is working. 310 public import core.simd; 311 alias Vector!(long [1]) long1; 312 alias Vector!(float[2]) float2; 313 alias Vector!(int [2]) int2; 314 alias Vector!(short[4]) short4; 315 alias Vector!(byte [8]) byte8; 316 } 317 318 static assert(float2.sizeof == 8); 319 static assert(byte8.sizeof == 8); 320 static assert(short4.sizeof == 8); 321 static assert(int2.sizeof == 8); 322 static assert(long1.sizeof == 8); 323 324 325 static if (SSESizedVectorsAreEmulated) 326 { 327 /// SSE-like SIMD types 328 329 struct float4 330 { 331 float[4] array; 332 mixin VectorOps!(float4, float[4]); 333 } 334 335 struct byte16 336 { 337 byte[16] array; 338 mixin VectorOps!(byte16, byte[16]); 339 } 340 341 struct short8 342 { 343 short[8] array; 344 mixin VectorOps!(short8, short[8]); 345 } 346 347 struct int4 348 { 349 int[4] array; 350 mixin VectorOps!(int4, int[4]); 351 } 352 353 struct long2 354 { 355 long[2] array; 356 mixin VectorOps!(long2, long[2]); 357 } 358 359 struct double2 360 { 361 double[2] array; 362 mixin VectorOps!(double2, double[2]); 363 } 364 } 365 366 static assert(float4.sizeof == 16); 367 static assert(byte16.sizeof == 16); 368 static assert(short8.sizeof == 16); 369 static assert(int4.sizeof == 16); 370 static assert(long2.sizeof == 16); 371 static assert(double2.sizeof == 16); 372 373 374 static if (AVXSizedVectorsAreEmulated) 375 { 376 /// AVX-like SIMD types 377 378 struct float8 379 { 380 float[8] array; 381 mixin VectorOps!(float8, float[8]); 382 } 383 384 struct byte32 385 { 386 byte[32] array; 387 mixin VectorOps!(byte32, byte[32]); 388 } 389 390 struct short16 391 { 392 short[16] array; 393 mixin VectorOps!(short16, short[16]); 394 } 395 396 struct int8 397 { 398 int[8] array; 399 mixin VectorOps!(int8, int[8]); 400 } 401 402 struct long4 403 { 404 long[4] array; 405 mixin VectorOps!(long4, long[4]); 406 } 407 408 struct double4 409 { 410 double[4] array; 411 mixin VectorOps!(double4, double[4]); 412 } 413 } 414 else 415 { 416 public import core.simd; 417 } 418 static assert(float8.sizeof == 32); 419 static assert(byte32.sizeof == 32); 420 static assert(short16.sizeof == 32); 421 static assert(int8.sizeof == 32); 422 static assert(long4.sizeof == 32); 423 static assert(double4.sizeof == 32); 424 425 426 427 428 alias __m256 = float8; 429 alias __m256i = long4; // long long __vector with ICC, GCC, and clang 430 alias __m256d = double4; 431 alias __m128 = float4; 432 alias __m128i = int4; 433 alias __m128d = double2; 434 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 435 436 int _MM_SHUFFLE2(int x, int y) pure @safe 437 { 438 assert(x >= 0 && x <= 1); 439 assert(y >= 0 && y <= 1); 440 return (x << 1) | y; 441 } 442 443 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 444 { 445 assert(x >= 0 && x <= 3); 446 assert(y >= 0 && y <= 3); 447 assert(z >= 0 && z <= 3); 448 assert(w >= 0 && w <= 3); 449 return (z<<6) | (y<<4) | (x<<2) | w; 450 } 451 452 // test assignment from scalar to vector type 453 unittest 454 { 455 float4 A = 3.0f; 456 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 457 assert(A.array == correctA); 458 459 int2 B = 42; 460 int[2] correctB = [42, 42]; 461 assert(B.array == correctB); 462 }