1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module inteli.types;
8 
9 
10 pure:
11 nothrow:
12 @nogc:
13 
14 version(GNU)
15 {
16     // Note: for GDC support, be sure to use https://explore.dgnu.org/
17 
18     version(X86_64)
19     {
20         enum MMXSizedVectorsAreEmulated = false;
21         enum SSESizedVectorsAreEmulated = false;
22 
23         import gcc.builtins;
24 
25         float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4))
26         {
27             return __builtin_ia32_loadups(pvec);
28         }
29 
30         double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2))
31         {
32             return __builtin_ia32_loadupd(pvec);
33         }
34 
35         byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16))
36         {
37             return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
38         }
39 
40         short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8))
41         {
42             return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
43         }
44 
45         int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4))
46         {
47             return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
48         }
49 
50         long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2))
51         {
52             return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
53         }
54 
55         void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4))
56         {
57             __builtin_ia32_storeups(pvec, v);
58         }
59 
60         void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2))
61         {
62             __builtin_ia32_storeupd(pvec, v);
63         }
64 
65         void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16))
66         {
67             __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v);
68         }
69 
70         void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8))
71         {
72             __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v);
73         }
74 
75         void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4))
76         {
77             __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v);
78         }
79 
80         void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2))
81         {
82             __builtin_ia32_storedqu(cast(char*)pvec, cast(ubyte16)v);
83         }
84 
85         // TODO: for performance, replace that anywhere possible by a GDC intrinsic
86         Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted
87         {
88             enum Count = Vec.array.length;
89             static assert(mask.length == Count);
90 
91             Vec r = void;
92             foreach(int i, m; mask)
93             {
94                 static assert (m < Count * 2);
95                 int ind = cast(int)m;
96                 if (ind < Count)
97                     r.ptr[i] = a.array[ind];
98                 else
99                     r.ptr[i] = b.array[ind - Count];
100             }
101             return r;
102         }
103     }
104     else
105     {
106         enum MMXSizedVectorsAreEmulated = true;
107         enum SSESizedVectorsAreEmulated = true;
108     }
109 }
110 else version(LDC)
111 {
112     public import ldc.simd;
113 
114     // Use this alias to mention it should only be used with LDC,
115     // for example when emulated shufflevector would just be wasteful.
116     alias shufflevectorLDC = shufflevector; 
117 
118     enum MMXSizedVectorsAreEmulated = false;
119     enum SSESizedVectorsAreEmulated = false;
120 }
121 else version(DigitalMars)
122 {
123     public import core.simd;
124 
125     version(D_SIMD)
126     {
127         enum MMXSizedVectorsAreEmulated = true;
128 
129         static if (__VERSION__ >= 2096)
130         {
131             enum SSESizedVectorsAreEmulated = true; // Still doesn't work well in DMD 2.096 because of DMD bugs
132         }
133         else
134         {
135             // Before DMD 2.096, blocked by https://issues.dlang.org/show_bug.cgi?id=21474
136             enum SSESizedVectorsAreEmulated = true; 
137         }
138     }
139     else
140     {
141         // Some DMD 32-bit targets don't have D_SIMD
142         enum MMXSizedVectorsAreEmulated = true;
143         enum SSESizedVectorsAreEmulated = true;
144     }
145 }
146 
147 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated;
148 
149 static if (CoreSimdIsEmulated)
150 {
151     // core.simd is emulated in some capacity: introduce `VectorOps`
152 
153     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
154     {
155         enum Count = N;
156         alias Base = BaseType;
157 
158         BaseType* ptr() return pure nothrow @nogc
159         {
160             return array.ptr;
161         }
162 
163         // Unary operators
164         VectorType opUnary(string op)() pure nothrow @safe @nogc
165         {
166             VectorType res = void;
167             mixin("res.array[] = " ~ op ~ "array[];");
168             return res;
169         }
170 
171         // Binary operators
172         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
173         {
174             VectorType res = void;
175             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
176             return res;
177         }
178 
179         // Assigning a BaseType value
180         void opAssign(BaseType e) pure nothrow @safe @nogc
181         {
182             array[] = e;
183         }
184 
185         // Assigning a static array
186         void opAssign(ArrayType v) pure nothrow @safe @nogc
187         {
188             array[] = v[];
189         }
190 
191         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
192         {
193             mixin("array[] "  ~ op ~ "= other.array[];");
194         }
195 
196         // Assigning a dyn array
197         this(ArrayType v) pure nothrow @safe @nogc
198         {
199             array[] = v[];
200         }
201 
202         // Broadcast constructor
203         this(BaseType x) pure nothrow @safe @nogc
204         {
205             array[] = x;
206         }
207 
208         /// We can't support implicit conversion but do support explicit casting.
209         /// "Vector types of the same size can be implicitly converted among each other."
210         /// Casting to another vector type is always just a raw copy.
211         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
212             if (VecDest.sizeof == VectorType.sizeof)
213             {
214                 VecDest dest = void;
215                 // Copy
216                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
217                 return dest;
218             }
219 
220         ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc
221         {
222             return array[i];
223         }
224 
225     }
226 
227     // they just weren't interesting enough, use v.array[i] instead.
228     deprecated auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted
229     {
230         static assert(Vec.sizeof == Vec2.sizeof);
231         import core.stdc.string: memcpy;
232         Vec v = void;
233         memcpy(&v, &vec, Vec2.sizeof);
234         return v.array[index];
235     }
236 
237     // they just weren't interesting enough, use v.ptr[i] = x instead.
238     deprecated auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted
239     {
240         static assert(Vec.sizeof == Vec2.sizeof);
241         import core.stdc.string: memcpy;
242         Vec v = void;
243         memcpy(&v, &vec, Vec2.sizeof);
244         v.array[index] = e;
245         return v;
246     }
247 
248     template loadUnaligned(Vec)
249     {
250         // Note: can't be @safe with this signature
251         Vec loadUnaligned(const(BaseType!Vec)* pvec) @trusted
252         {
253             enum bool isVector = ( (Vec.sizeof == 8)  && (!MMXSizedVectorsAreEmulated)
254                                 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) );
255 
256             static if (isVector)
257             {
258                 // PERF DMD
259                 /* enabling this need to move loadUnaligned and storeUnaligned to internals.d
260                 static if (DMD_with_DSIMD && Vec.sizeof == 8)
261                 {
262                     static if (is(Vec == double2))
263                         return cast(Vec)__simd(XMM.LODUPD, *pvec);
264                     else static if (is(Vec == float4))
265                         return cast(Vec)__simd(XMM.LODUPS, *pvec);
266                     else
267                         return cast(Vec)__simd(XMM.LODDQU, *pvec);
268                 }
269                 else */
270                 {
271                     enum size_t Count = Vec.array.length;
272                     Vec result;
273                     foreach(int i; 0..Count)
274                     {
275                         result.ptr[i] = pvec[i];
276                     }
277                     return result;
278                 }
279             }
280             else
281             {
282                 // Since this vector is emulated, it doesn't have alignement constraints
283                 // and as such we can just cast it.
284                 return *cast(Vec*)(pvec);
285             }
286         }
287     }
288 
289     template storeUnaligned(Vec)
290     {
291         // Note: can't be @safe with this signature
292         void storeUnaligned(Vec v, BaseType!Vec* pvec) @trusted
293         {
294             enum bool isVector = ( (Vec.sizeof == 8)  && (!MMXSizedVectorsAreEmulated)
295                                 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) );
296 
297             static if (isVector)
298             {
299                 // PERF DMD
300                 /* enabling this need to move loadUnaligned and storeUnaligned to internals.d
301                 static if (DMD_with_DSIMD && Vec.sizeof == 8)
302                 {
303                     static if (is(Vec == double2))
304                         __simd_sto(XMM.STOUPD, *pvec, value);
305                     else static if (is(Vec == float4))
306                         __simd_sto(XMM.STOUPS, *pvec, value);
307                     else
308                         __simd_sto(XMM.STODQU, *pvec, value);
309                 }
310                 else*/
311                 {
312                     enum size_t Count = Vec.array.length;
313                     foreach(int i; 0..Count)
314                         pvec[i] = v.array[i];
315                 }
316             }
317             else
318             {
319                 *cast(Vec*)(pvec) = v;
320             }
321         }
322     }
323 
324     Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe
325     {
326         enum size_t Count = Vec.array.length;
327         static assert(mask.length == Count);
328 
329         Vec r = void;
330         foreach(int i, m; mask)
331         {
332             static assert (m < Count * 2);
333             enum int ind = cast(int)m;
334             static if (ind < Count)
335                 r.array[i] = a.array[ind];
336             else
337                 r.array[i] = b.array[ind-Count];
338         }
339         return r;
340     }
341 }
342 else
343 {
344     public import core.simd;
345 
346     // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
347     // And LDC sometimes need those unsigned vector types for some intrinsics.
348     // For internal use only.
349     package alias ushort8 = Vector!(ushort[8]);
350     package alias ubyte8  = Vector!(ubyte[8]);
351     package alias ubyte16 = Vector!(ubyte[16]);
352 }
353 
354 // Emulate ldc.simd cmpMask
355 version(LDC)
356 {} 
357 else
358 {
359     private template BaseType(V)
360     {
361         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
362     }
363 
364     private template TrueMask(V)
365     {
366         alias Elem = BaseType!V;
367 
368         static if (is(Elem == float))
369         {
370             immutable uint m1 = 0xffffffff;
371             enum Elem TrueMask = *cast(float*)(&m1);
372         }
373         else static if (is(Elem == double))
374         {
375             immutable ulong m1 = 0xffffffff_ffffffff;
376             enum Elem TrueMask = *cast(double*)(&m1);
377         }
378         else // integer case
379         {
380             enum Elem TrueMask = -1;
381         }
382     }
383 
384     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
385     {
386         enum size_t Count = Vec.array.length;
387         Vec result;
388         foreach(int i; 0..Count)
389         {
390             bool cond = a.array[i] == b.array[i];
391             result.ptr[i] = cond ? TrueMask!Vec : 0;
392         }
393         return result;
394     }
395 
396     Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison
397     {
398         enum size_t Count = Vec.array.length;
399         Vec result;
400         foreach(int i; 0..Count)
401         {
402             bool cond = a.array[i] != b.array[i];
403             result.ptr[i] = cond ? TrueMask!Vec : 0;
404         }
405         return result;
406     }
407 
408     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
409     {
410         enum size_t Count = Vec.array.length;
411         Vec result;
412         foreach(int i; 0..Count)
413         {
414             bool cond = a.array[i] > b.array[i];
415             result.ptr[i] = cond ? TrueMask!Vec : 0;
416         }
417         return result;
418     }
419 
420     Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oge" comparison
421     {
422         enum size_t Count = Vec.array.length;
423         Vec result;
424         foreach(int i; 0..Count)
425         {
426             bool cond = a.array[i] > b.array[i];
427             result.ptr[i] = cond ? TrueMask!Vec : 0;
428         }
429         return result;
430     }
431 }
432 
433 unittest
434 {
435     float4 a = [1, 3, 5, 7];
436     float4 b = [2, 3, 4, 5];
437     int4 c = cast(int4)(greaterMask!float4(a, b));
438     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
439     assert(c.array == correct);
440 }
441 
442 static if (MMXSizedVectorsAreEmulated)
443 {
444     /// MMX-like SIMD types
445     struct float2
446     {
447         float[2] array;
448         mixin VectorOps!(float2, float[2]);
449     }
450 
451     struct byte8
452     {
453         byte[8] array;
454         mixin VectorOps!(byte8, byte[8]);
455     }
456 
457     struct short4
458     {
459         short[4] array;
460         mixin VectorOps!(short4, short[4]);
461     }
462 
463     struct int2
464     {
465         int[2] array;
466         mixin VectorOps!(int2, int[2]);
467     }
468 
469     struct long1
470     {
471         long[1] array;
472         mixin VectorOps!(long1, long[1]);
473     }
474 }
475 else
476 {
477     // For this compiler, defining MMX-sized vectors is working.
478     public import core.simd;
479     alias Vector!(long [1]) long1;
480     alias Vector!(float[2]) float2;
481     alias Vector!(int  [2]) int2;
482     alias Vector!(short[4]) short4;
483     alias Vector!(byte [8]) byte8;
484 }
485 
486 static assert(float2.sizeof == 8);
487 static assert(byte8.sizeof == 8);
488 static assert(short4.sizeof == 8);
489 static assert(int2.sizeof == 8);
490 static assert(long1.sizeof == 8);
491 
492 
493 static if (SSESizedVectorsAreEmulated)
494 {
495     /// SSE-like SIMD types
496 
497     struct float4
498     {
499         float[4] array;
500         mixin VectorOps!(float4, float[4]);
501     }
502 
503     struct byte16
504     {
505         byte[16] array;
506         mixin VectorOps!(byte16, byte[16]);
507     }
508 
509     struct short8
510     {
511         short[8] array;
512         mixin VectorOps!(short8, short[8]);
513     }
514 
515     struct int4
516     {
517         int[4] array;
518         mixin VectorOps!(int4, int[4]);
519     }
520 
521     struct long2
522     {
523         long[2] array;
524         mixin VectorOps!(long2, long[2]);
525     }
526 
527     struct double2
528     {
529         double[2] array;
530         mixin VectorOps!(double2, double[2]);
531     }
532 }
533 
534 static assert(float4.sizeof == 16);
535 static assert(byte16.sizeof == 16);
536 static assert(short8.sizeof == 16);
537 static assert(int4.sizeof == 16);
538 static assert(long2.sizeof == 16);
539 static assert(double2.sizeof == 16);
540 
541 
542 
543 
544 
545 alias __m128 = float4;
546 alias __m128i = int4;
547 alias __m128d = double2;
548 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
549 
550 int _MM_SHUFFLE2(int x, int y) pure @safe
551 {
552     assert(x >= 0 && x <= 1);
553     assert(y >= 0 && y <= 1);
554     return (x << 1) | y;
555 }
556 
557 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
558 {
559     assert(x >= 0 && x <= 3);
560     assert(y >= 0 && y <= 3);
561     assert(z >= 0 && z <= 3);
562     assert(w >= 0 && w <= 3);
563     return (z<<6) | (y<<4) | (x<<2) | w;
564 }
565 
566 // test assignment from scalar to vector type
567 unittest
568 {
569     float4 A = 3.0f;
570     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
571     assert(A.array == correctA);
572 
573     int2 B = 42;
574     int[2] correctB = [42, 42];
575     assert(B.array == correctB);
576 }