1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 *            cet 2024.
6 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 */
8 module inteli.types;
9 
10 
11 pure:
12 nothrow:
13 @nogc:
14 
15 version(GNU)
16 {
17     // Note: for GDC support, be sure to use https://explore.dgnu.org/
18 
19     // Future: just detect vectors, do not base upon arch.
20 
21     version(X86_64)
22     {
23         enum MMXSizedVectorsAreEmulated = false;
24         enum SSESizedVectorsAreEmulated = false;
25 
26         // Does GDC support AVX-sized vectors?
27         static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only.
28         {
29             enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 
30         }
31         else
32         {
33             enum AVXSizedVectorsAreEmulated = true;
34         }
35 
36         import gcc.builtins;
37     }
38     else
39     {
40         enum MMXSizedVectorsAreEmulated = true;
41         enum SSESizedVectorsAreEmulated = true;
42         enum AVXSizedVectorsAreEmulated = true;
43     }
44 }
45 else version(LDC)
46 {
47     public import ldc.simd;
48 
49     // Use this alias to mention it should only be used with LDC,
50     // for example when emulated shufflevector would just be wasteful.
51     alias shufflevectorLDC = shufflevector;
52 
53     enum MMXSizedVectorsAreEmulated = false;
54     enum SSESizedVectorsAreEmulated = false;
55     enum AVXSizedVectorsAreEmulated = false;
56 }
57 else version(DigitalMars)
58 {
59     public import core.simd;
60 
61     static if (__VERSION__ >= 2100)
62     {
63         // Note: turning this true is very desirable for DMD performance,
64         // but also leads to many bugs being discovered upstream.
65         // The fact that it works at all relies on many workardounds.
66         // In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs,
67         // and a very strong DMD codegen test suite.
68         // What happens typically is that contributors end up on a DMD bug in their PR.
69         // But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions.
70         enum bool tryToEnableCoreSimdWithDMD = true;
71     }
72     else
73     {
74         enum bool tryToEnableCoreSimdWithDMD = false;
75     }
76 
77     version(D_SIMD)
78     {
79         enum MMXSizedVectorsAreEmulated = true;
80         enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD;
81 
82         // Note: with DMD, AVX-sized vectors can't be enabled yet.
83         // On linux + x86_64, this will fail since a few operands seem to be missing. 
84         // FUTURE: enable AVX-sized vectors in DMD. :)
85         //
86         // Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284
87         //           Probably other, unreported issues.
88         version(D_AVX)
89             enum AVXSizedVectorsAreEmulated = true;
90         else
91             enum AVXSizedVectorsAreEmulated = true;
92     }
93     else
94     {
95         // Some DMD 32-bit targets don't have D_SIMD
96         enum MMXSizedVectorsAreEmulated = true;
97         enum SSESizedVectorsAreEmulated = true;
98         enum AVXSizedVectorsAreEmulated = true;
99     }
100 }
101 
102 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated;
103 
104 static if (CoreSimdIsEmulated)
105 {
106     // core.simd is emulated in some capacity: introduce `VectorOps`
107 
108     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
109     {
110         enum Count = N;
111         alias Base = BaseType;
112 
113         BaseType* ptr() return pure nothrow @nogc
114         {
115             return array.ptr;
116         }
117 
118         // Unary operators
119         VectorType opUnary(string op)() pure nothrow @safe @nogc
120         {
121             VectorType res = void;
122             mixin("res.array[] = " ~ op ~ "array[];");
123             return res;
124         }
125 
126         // Binary operators
127         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
128         {
129             VectorType res = void;
130             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
131             return res;
132         }
133 
134         // Assigning a BaseType value
135         void opAssign(BaseType e) pure nothrow @safe @nogc
136         {
137             array[] = e;
138         }
139 
140         // Assigning a static array
141         void opAssign(ArrayType v) pure nothrow @safe @nogc
142         {
143             array[] = v[];
144         }
145 
146         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
147         {
148             mixin("array[] "  ~ op ~ "= other.array[];");
149         }
150 
151         // Assigning a dyn array
152         this(ArrayType v) pure nothrow @safe @nogc
153         {
154             array[] = v[];
155         }
156 
157         // Broadcast constructor
158         this(BaseType x) pure nothrow @safe @nogc
159         {
160             array[] = x;
161         }
162 
163         /// We can't support implicit conversion but do support explicit casting.
164         /// "Vector types of the same size can be implicitly converted among each other."
165         /// Casting to another vector type is always just a raw copy.
166         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
167             if (VecDest.sizeof == VectorType.sizeof)
168             {
169                 VecDest dest = void;
170                 // Copy
171                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
172                 return dest;
173             }
174 
175         ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc
176         {
177             return array[i];
178         }
179 
180     }
181 }
182 else
183 {
184     public import core.simd;
185 
186     // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
187     // And GDC sometimes need those unsigned vector types for some intrinsics.
188     // For internal use only.
189     package alias ushort8 = Vector!(ushort[8]);
190     package alias ubyte8  = Vector!(ubyte[8]);
191     package alias ubyte16 = Vector!(ubyte[16]);
192 
193     static if (!AVXSizedVectorsAreEmulated)
194     {
195         package alias ushort16 = Vector!(ushort[16]);
196         package alias ubyte32  = Vector!(ubyte[32]);
197     }
198 }
199 
200 // Emulate ldc.simd cmpMask and other masks.
201 // Note: these should be deprecated on non-LDC, 
202 // since it's slower to generate that code.
203 version(LDC)
204 {} 
205 else
206 {
207     // TODO: deprecated and write plain versions instead
208 
209     private template BaseType(V)
210     {
211         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
212     }
213 
214     private template TrueMask(V)
215     {
216         alias Elem = BaseType!V;
217 
218         static if (is(Elem == float))
219         {
220             immutable uint m1 = 0xffffffff;
221             enum Elem TrueMask = *cast(float*)(&m1);
222         }
223         else static if (is(Elem == double))
224         {
225             immutable ulong m1 = 0xffffffff_ffffffff;
226             enum Elem TrueMask = *cast(double*)(&m1);
227         }
228         else // integer case
229         {
230             enum Elem TrueMask = -1;
231         }
232     }
233 
234     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
235     {
236         enum size_t Count = Vec.array.length;
237         Vec result;
238         foreach(int i; 0..Count)
239         {
240             bool cond = a.array[i] == b.array[i];
241             result.ptr[i] = cond ? TrueMask!Vec : 0;
242         }
243         return result;
244     }
245 
246     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
247     {
248         enum size_t Count = Vec.array.length;
249         Vec result;
250         foreach(int i; 0..Count)
251         {
252             bool cond = a.array[i] > b.array[i];
253             result.ptr[i] = cond ? TrueMask!Vec : 0;
254         }
255         return result;
256     }
257 }
258 
259 unittest
260 {
261     float4 a = [1, 3, 5, 7];
262     float4 b = [2, 3, 4, 5];
263     int4 c = cast(int4)(greaterMask!float4(a, b));
264     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
265     assert(c.array == correct);
266 }
267 
268 static if (MMXSizedVectorsAreEmulated)
269 {
270     /// MMX-like SIMD types
271     struct float2
272     {
273         float[2] array;
274         mixin VectorOps!(float2, float[2]);
275     }
276 
277     struct byte8
278     {
279         byte[8] array;
280         mixin VectorOps!(byte8, byte[8]);
281     }
282 
283     struct short4
284     {
285         short[4] array;
286         mixin VectorOps!(short4, short[4]);
287     }
288 
289     struct int2
290     {
291         int[2] array;
292         mixin VectorOps!(int2, int[2]);
293     }
294 
295     struct long1
296     {
297         long[1] array;
298         mixin VectorOps!(long1, long[1]);
299     }
300 }
301 else
302 {
303     // For this compiler, defining MMX-sized vectors is working.
304     public import core.simd;
305     alias long1 = Vector!(long[1]);
306     alias float2 = Vector!(float[2]);
307     alias int2 = Vector!(int[2]);
308     alias short4 = Vector!(short[4]);
309     alias byte8 = Vector!(byte[8]);
310 }
311 
312 static assert(float2.sizeof == 8);
313 static assert(byte8.sizeof == 8);
314 static assert(short4.sizeof == 8);
315 static assert(int2.sizeof == 8);
316 static assert(long1.sizeof == 8);
317 
318 
319 static if (SSESizedVectorsAreEmulated)
320 {
321     /// SSE-like SIMD types
322 
323     struct float4
324     {
325         float[4] array;
326         mixin VectorOps!(float4, float[4]);
327     }
328 
329     struct byte16
330     {
331         byte[16] array;
332         mixin VectorOps!(byte16, byte[16]);
333     }
334 
335     struct short8
336     {
337         short[8] array;
338         mixin VectorOps!(short8, short[8]);
339     }
340 
341     struct int4
342     {
343         int[4] array;
344         mixin VectorOps!(int4, int[4]);
345     }
346 
347     struct long2
348     {
349         long[2] array;
350         mixin VectorOps!(long2, long[2]);
351     }
352 
353     struct double2
354     {
355         double[2] array;
356         mixin VectorOps!(double2, double[2]);
357     }
358 }
359 
360 static assert(float4.sizeof == 16);
361 static assert(byte16.sizeof == 16);
362 static assert(short8.sizeof == 16);
363 static assert(int4.sizeof == 16);
364 static assert(long2.sizeof == 16);
365 static assert(double2.sizeof == 16);
366 
367 
368 static if (AVXSizedVectorsAreEmulated)
369 {
370     /// AVX-like SIMD types
371 
372     struct float8
373     {
374         float[8] array;
375         mixin VectorOps!(float8, float[8]);
376     }
377 
378     struct byte32
379     {
380         byte[32] array;
381         mixin VectorOps!(byte32, byte[32]);
382     }
383 
384     struct short16
385     {
386         short[16] array;
387         mixin VectorOps!(short16, short[16]);
388     }
389 
390     struct int8
391     {
392         int[8] array;
393         mixin VectorOps!(int8, int[8]);
394     }
395 
396     struct long4
397     {
398         long[4] array;
399         mixin VectorOps!(long4, long[4]);
400     }
401 
402     struct double4
403     {
404         double[4] array;
405         mixin VectorOps!(double4, double[4]);
406     }
407 }
408 else
409 {
410     public import core.simd;    
411 }
412 static assert(float8.sizeof == 32);
413 static assert(byte32.sizeof == 32);
414 static assert(short16.sizeof == 32);
415 static assert(int8.sizeof == 32);
416 static assert(long4.sizeof == 32);
417 static assert(double4.sizeof == 32);
418 
419 
420 
421 
422 alias __m256 = float8;
423 alias __m256i = long4; // long long __vector with ICC, GCC, and clang
424 alias __m256d = double4;
425 alias __m128 = float4;
426 alias __m128i = int4;
427 alias __m128d = double2;
428 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
429 
430 int _MM_SHUFFLE2(int x, int y) pure @safe
431 {
432     assert(x >= 0 && x <= 1);
433     assert(y >= 0 && y <= 1);
434     return (x << 1) | y;
435 }
436 
437 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
438 {
439     assert(x >= 0 && x <= 3);
440     assert(y >= 0 && y <= 3);
441     assert(z >= 0 && z <= 3);
442     assert(w >= 0 && w <= 3);
443     return (z<<6) | (y<<4) | (x<<2) | w;
444 }
445 
446 // test assignment from scalar to vector type
447 unittest
448 {
449     float4 A = 3.0f;
450     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
451     assert(A.array == correctA);
452 
453     int2 B = 42;
454     int[2] correctB = [42, 42];
455     assert(B.array == correctB);
456 }