1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * cet 2024. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 */ 8 module inteli.types; 9 10 11 pure: 12 nothrow: 13 @nogc: 14 15 version(GNU) 16 { 17 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 18 19 // Future: just detect vectors, do not base upon arch. 20 21 version(X86_64) 22 { 23 enum MMXSizedVectorsAreEmulated = false; 24 enum SSESizedVectorsAreEmulated = false; 25 26 // Does GDC support AVX-sized vectors? 27 static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only. 28 { 29 enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 30 } 31 else 32 { 33 enum AVXSizedVectorsAreEmulated = true; 34 } 35 36 import gcc.builtins; 37 } 38 else 39 { 40 enum MMXSizedVectorsAreEmulated = true; 41 enum SSESizedVectorsAreEmulated = true; 42 enum AVXSizedVectorsAreEmulated = true; 43 } 44 } 45 else version(LDC) 46 { 47 public import ldc.simd; 48 49 // Use this alias to mention it should only be used with LDC, 50 // for example when emulated shufflevector would just be wasteful. 51 alias shufflevectorLDC = shufflevector; 52 53 enum MMXSizedVectorsAreEmulated = false; 54 enum SSESizedVectorsAreEmulated = false; 55 enum AVXSizedVectorsAreEmulated = false; 56 } 57 else version(DigitalMars) 58 { 59 public import core.simd; 60 61 static if (__VERSION__ >= 2100) 62 { 63 // Note: turning this true is very desirable for DMD performance, 64 // but also leads to many bugs being discovered upstream. 65 // The fact that it works at all relies on many workardounds. 66 // In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs, 67 // and a very strong DMD codegen test suite. 68 // What happens typically is that contributors end up on a DMD bug in their PR. 69 // But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions. 70 enum bool tryToEnableCoreSimdWithDMD = true; 71 } 72 else 73 { 74 enum bool tryToEnableCoreSimdWithDMD = false; 75 } 76 77 version(D_SIMD) 78 { 79 enum MMXSizedVectorsAreEmulated = true; 80 enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD; 81 82 // Note: with DMD, AVX-sized vectors can't be enabled yet. 83 // On linux + x86_64, this will fail since a few operands seem to be missing. 84 // FUTURE: enable AVX-sized vectors in DMD. :) 85 // 86 // Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284 87 // Probably other, unreported issues. 88 version(D_AVX) 89 enum AVXSizedVectorsAreEmulated = true; 90 else 91 enum AVXSizedVectorsAreEmulated = true; 92 } 93 else 94 { 95 // Some DMD 32-bit targets don't have D_SIMD 96 enum MMXSizedVectorsAreEmulated = true; 97 enum SSESizedVectorsAreEmulated = true; 98 enum AVXSizedVectorsAreEmulated = true; 99 } 100 } 101 102 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated; 103 104 static if (CoreSimdIsEmulated) 105 { 106 // core.simd is emulated in some capacity: introduce `VectorOps` 107 108 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 109 { 110 enum Count = N; 111 alias Base = BaseType; 112 113 BaseType* ptr() return pure nothrow @nogc 114 { 115 return array.ptr; 116 } 117 118 // Unary operators 119 VectorType opUnary(string op)() pure nothrow @safe @nogc 120 { 121 VectorType res = void; 122 mixin("res.array[] = " ~ op ~ "array[];"); 123 return res; 124 } 125 126 // Binary operators 127 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 128 { 129 VectorType res = void; 130 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 131 return res; 132 } 133 134 // Assigning a BaseType value 135 void opAssign(BaseType e) pure nothrow @safe @nogc 136 { 137 array[] = e; 138 } 139 140 // Assigning a static array 141 void opAssign(ArrayType v) pure nothrow @safe @nogc 142 { 143 array[] = v[]; 144 } 145 146 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 147 { 148 mixin("array[] " ~ op ~ "= other.array[];"); 149 } 150 151 // Assigning a dyn array 152 this(ArrayType v) pure nothrow @safe @nogc 153 { 154 array[] = v[]; 155 } 156 157 // Broadcast constructor 158 this(BaseType x) pure nothrow @safe @nogc 159 { 160 array[] = x; 161 } 162 163 /// We can't support implicit conversion but do support explicit casting. 164 /// "Vector types of the same size can be implicitly converted among each other." 165 /// Casting to another vector type is always just a raw copy. 166 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 167 if (VecDest.sizeof == VectorType.sizeof) 168 { 169 VecDest dest = void; 170 // Copy 171 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 172 return dest; 173 } 174 175 ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc 176 { 177 return array[i]; 178 } 179 180 } 181 } 182 else 183 { 184 public import core.simd; 185 186 // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can 187 // And GDC sometimes need those unsigned vector types for some intrinsics. 188 // For internal use only. 189 package alias ushort8 = Vector!(ushort[8]); 190 package alias ubyte8 = Vector!(ubyte[8]); 191 package alias ubyte16 = Vector!(ubyte[16]); 192 193 static if (!AVXSizedVectorsAreEmulated) 194 { 195 package alias ushort16 = Vector!(ushort[16]); 196 package alias ubyte32 = Vector!(ubyte[32]); 197 } 198 } 199 200 // Emulate ldc.simd cmpMask and other masks. 201 // Note: these should be deprecated on non-LDC, 202 // since it's slower to generate that code. 203 version(LDC) 204 {} 205 else 206 { 207 // TODO: deprecated and write plain versions instead 208 209 private template BaseType(V) 210 { 211 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 212 } 213 214 private template TrueMask(V) 215 { 216 alias Elem = BaseType!V; 217 218 static if (is(Elem == float)) 219 { 220 immutable uint m1 = 0xffffffff; 221 enum Elem TrueMask = *cast(float*)(&m1); 222 } 223 else static if (is(Elem == double)) 224 { 225 immutable ulong m1 = 0xffffffff_ffffffff; 226 enum Elem TrueMask = *cast(double*)(&m1); 227 } 228 else // integer case 229 { 230 enum Elem TrueMask = -1; 231 } 232 } 233 234 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 235 { 236 enum size_t Count = Vec.array.length; 237 Vec result; 238 foreach(int i; 0..Count) 239 { 240 bool cond = a.array[i] == b.array[i]; 241 result.ptr[i] = cond ? TrueMask!Vec : 0; 242 } 243 return result; 244 } 245 246 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 247 { 248 enum size_t Count = Vec.array.length; 249 Vec result; 250 foreach(int i; 0..Count) 251 { 252 bool cond = a.array[i] > b.array[i]; 253 result.ptr[i] = cond ? TrueMask!Vec : 0; 254 } 255 return result; 256 } 257 } 258 259 unittest 260 { 261 float4 a = [1, 3, 5, 7]; 262 float4 b = [2, 3, 4, 5]; 263 int4 c = cast(int4)(greaterMask!float4(a, b)); 264 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 265 assert(c.array == correct); 266 } 267 268 static if (MMXSizedVectorsAreEmulated) 269 { 270 /// MMX-like SIMD types 271 struct float2 272 { 273 float[2] array; 274 mixin VectorOps!(float2, float[2]); 275 } 276 277 struct byte8 278 { 279 byte[8] array; 280 mixin VectorOps!(byte8, byte[8]); 281 } 282 283 struct short4 284 { 285 short[4] array; 286 mixin VectorOps!(short4, short[4]); 287 } 288 289 struct int2 290 { 291 int[2] array; 292 mixin VectorOps!(int2, int[2]); 293 } 294 295 struct long1 296 { 297 long[1] array; 298 mixin VectorOps!(long1, long[1]); 299 } 300 } 301 else 302 { 303 // For this compiler, defining MMX-sized vectors is working. 304 public import core.simd; 305 alias long1 = Vector!(long[1]); 306 alias float2 = Vector!(float[2]); 307 alias int2 = Vector!(int[2]); 308 alias short4 = Vector!(short[4]); 309 alias byte8 = Vector!(byte[8]); 310 } 311 312 static assert(float2.sizeof == 8); 313 static assert(byte8.sizeof == 8); 314 static assert(short4.sizeof == 8); 315 static assert(int2.sizeof == 8); 316 static assert(long1.sizeof == 8); 317 318 319 static if (SSESizedVectorsAreEmulated) 320 { 321 /// SSE-like SIMD types 322 323 struct float4 324 { 325 float[4] array; 326 mixin VectorOps!(float4, float[4]); 327 } 328 329 struct byte16 330 { 331 byte[16] array; 332 mixin VectorOps!(byte16, byte[16]); 333 } 334 335 struct short8 336 { 337 short[8] array; 338 mixin VectorOps!(short8, short[8]); 339 } 340 341 struct int4 342 { 343 int[4] array; 344 mixin VectorOps!(int4, int[4]); 345 } 346 347 struct long2 348 { 349 long[2] array; 350 mixin VectorOps!(long2, long[2]); 351 } 352 353 struct double2 354 { 355 double[2] array; 356 mixin VectorOps!(double2, double[2]); 357 } 358 } 359 360 static assert(float4.sizeof == 16); 361 static assert(byte16.sizeof == 16); 362 static assert(short8.sizeof == 16); 363 static assert(int4.sizeof == 16); 364 static assert(long2.sizeof == 16); 365 static assert(double2.sizeof == 16); 366 367 368 static if (AVXSizedVectorsAreEmulated) 369 { 370 /// AVX-like SIMD types 371 372 struct float8 373 { 374 float[8] array; 375 mixin VectorOps!(float8, float[8]); 376 } 377 378 struct byte32 379 { 380 byte[32] array; 381 mixin VectorOps!(byte32, byte[32]); 382 } 383 384 struct short16 385 { 386 short[16] array; 387 mixin VectorOps!(short16, short[16]); 388 } 389 390 struct int8 391 { 392 int[8] array; 393 mixin VectorOps!(int8, int[8]); 394 } 395 396 struct long4 397 { 398 long[4] array; 399 mixin VectorOps!(long4, long[4]); 400 } 401 402 struct double4 403 { 404 double[4] array; 405 mixin VectorOps!(double4, double[4]); 406 } 407 } 408 else 409 { 410 public import core.simd; 411 } 412 static assert(float8.sizeof == 32); 413 static assert(byte32.sizeof == 32); 414 static assert(short16.sizeof == 32); 415 static assert(int8.sizeof == 32); 416 static assert(long4.sizeof == 32); 417 static assert(double4.sizeof == 32); 418 419 420 421 422 alias __m256 = float8; 423 alias __m256i = long4; // long long __vector with ICC, GCC, and clang 424 alias __m256d = double4; 425 alias __m128 = float4; 426 alias __m128i = int4; 427 alias __m128d = double2; 428 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 429 430 int _MM_SHUFFLE2(int x, int y) pure @safe 431 { 432 assert(x >= 0 && x <= 1); 433 assert(y >= 0 && y <= 1); 434 return (x << 1) | y; 435 } 436 437 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 438 { 439 assert(x >= 0 && x <= 3); 440 assert(y >= 0 && y <= 3); 441 assert(z >= 0 && z <= 3); 442 assert(w >= 0 && w <= 3); 443 return (z<<6) | (y<<4) | (x<<2) | w; 444 } 445 446 // test assignment from scalar to vector type 447 unittest 448 { 449 float4 A = 3.0f; 450 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 451 assert(A.array == correctA); 452 453 int2 B = 42; 454 int[2] correctB = [42, 42]; 455 assert(B.array == correctB); 456 }