1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module inteli.types;
8 
9 
10 pure:
11 nothrow:
12 @nogc:
13 
14 version(GNU)
15 {
16     // Note: for GDC support, be sure to use https://explore.dgnu.org/
17 
18     version(X86_64)
19     {
20         enum MMXSizedVectorsAreEmulated = false;
21         enum SSESizedVectorsAreEmulated = false;
22 
23         import gcc.builtins;
24 
25         float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4))
26         {
27             return __builtin_ia32_loadups(pvec);
28         }
29 
30         double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2))
31         {
32             return __builtin_ia32_loadupd(pvec);
33         }
34 
35         byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16))
36         {
37             return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
38         }
39 
40         short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8))
41         {
42             return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
43         }
44 
45         int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4))
46         {
47             return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
48         }
49 
50         long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2))
51         {
52             return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
53         }
54 
55         void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4))
56         {
57             __builtin_ia32_storeups(pvec, v);
58         }
59 
60         void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2))
61         {
62             __builtin_ia32_storeupd(pvec, v);
63         }
64 
65         void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16))
66         {
67             __builtin_ia32_storedqu(cast(char*)pvec, v);
68         }
69 
70         void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8))
71         {
72             __builtin_ia32_storedqu(cast(char*)pvec, v);
73         }
74 
75         void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4))
76         {
77             __builtin_ia32_storedqu(cast(char*)pvec, v);
78         }
79 
80         void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2))
81         {
82             __builtin_ia32_storedqu(cast(char*)pvec, v);
83         }
84 
85         // TODO: for performance, replace that anywhere possible by a GDC intrinsic
86         Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted
87         {
88             enum Count = Vec.array.length;
89             static assert(mask.length == Count);
90 
91             Vec r = void;
92             foreach(int i, m; mask)
93             {
94                 static assert (m < Count * 2);
95                 int ind = cast(int)m;
96                 if (ind < Count)
97                     r.ptr[i] = a.array[ind];
98                 else
99                     r.ptr[i] = b.array[ind - Count];
100             }
101             return r;
102         }
103     }
104     else
105     {
106         enum MMXSizedVectorsAreEmulated = true;
107         enum SSESizedVectorsAreEmulated = true;
108     }
109 }
110 else version(LDC)
111 {
112     public import ldc.simd;
113 
114     // Use this alias to mention it should only be used with LDC,
115     // for example when emulated shufflevector would just be wasteful.
116     alias shufflevectorLDC = shufflevector; 
117 
118     enum MMXSizedVectorsAreEmulated = false;
119     enum SSESizedVectorsAreEmulated = false;
120 }
121 else version(DigitalMars)
122 {
123     public import core.simd;
124 
125     version(D_SIMD)
126     {
127         enum MMXSizedVectorsAreEmulated = true;
128         enum SSESizedVectorsAreEmulated = true; // Should be false, but it is blocked by https://issues.dlang.org/show_bug.cgi?id=21474
129     }
130     else
131     {
132         // Some DMD 32-bit targets don't have D_SIMD
133         enum MMXSizedVectorsAreEmulated = true;
134         enum SSESizedVectorsAreEmulated = true;
135     }
136 }
137 
138 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated;
139 
140 static if (CoreSimdIsEmulated)
141 {
142     // core.simd is emulated in some capacity: introduce `VectorOps`
143 
144     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
145     {
146         enum Count = N;
147         alias Base = BaseType;
148 
149         BaseType* ptr() return pure nothrow @nogc
150         {
151             return array.ptr;
152         }
153 
154         // Unary operators
155         VectorType opUnary(string op)() pure nothrow @safe @nogc
156         {
157             VectorType res = void;
158             mixin("res.array[] = " ~ op ~ "array[];");
159             return res;
160         }
161 
162         // Binary operators
163         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
164         {
165             VectorType res = void;
166             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
167             return res;
168         }
169 
170         // Assigning a static array
171         void opAssign(ArrayType v) pure nothrow @safe @nogc
172         {
173             array[] = v[];
174         }
175 
176         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
177         {
178             mixin("array[] "  ~ op ~ "= other.array[];");
179         }
180 
181         // Assigning a dyn array
182         this(ArrayType v) pure nothrow @safe @nogc
183         {
184             array[] = v[];
185         }
186 
187         // Broadcast constructor
188         this(BaseType x) pure nothrow @safe @nogc
189         {
190             array[] = x;
191         }
192 
193         /// We can't support implicit conversion but do support explicit casting.
194         /// "Vector types of the same size can be implicitly converted among each other."
195         /// Casting to another vector type is always just a raw copy.
196         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
197             if (VecDest.sizeof == VectorType.sizeof)
198             {
199                 VecDest dest = void;
200                 // Copy
201                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
202                 return dest;
203             }
204 
205         ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc
206         {
207             return array[i];
208         }
209 
210     }
211 
212     // they just weren't interesting enough, use v.array[i] instead.
213     deprecated auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted
214     {
215         static assert(Vec.sizeof == Vec2.sizeof);
216         import core.stdc.string: memcpy;
217         Vec v = void;
218         memcpy(&v, &vec, Vec2.sizeof);
219         return v.array[index];
220     }
221 
222     // they just weren't interesting enough, use v.ptr[i] = x instead.
223     deprecated auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted
224     {
225         static assert(Vec.sizeof == Vec2.sizeof);
226         import core.stdc.string: memcpy;
227         Vec v = void;
228         memcpy(&v, &vec, Vec2.sizeof);
229         v.array[index] = e;
230         return v;
231     }
232 
233     template loadUnaligned(Vec)
234     {
235         // Note: can't be @safe with this signature
236         Vec loadUnaligned(const(BaseType!Vec)* pvec) @trusted
237         {
238             enum bool isVector = ( (Vec.sizeof == 8)  && (!MMXSizedVectorsAreEmulated)
239                                 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) );
240 
241             static if (isVector)
242             {
243                 // PERF: there is probably something faster to do for this compiler (DMD).
244                 //       Avoid this on DMD in the future.
245                 enum size_t Count = Vec.array.length;
246                 Vec result;
247                 foreach(int i; 0..Count)
248                 {
249                     result.ptr[i] = pvec[i];
250                 }
251                 return result;
252             }
253             else
254             {
255                 // Since this vector is emulated, it doesn't have alignement constraints
256                 // and as such we can just cast it.
257                 return *cast(Vec*)(pvec);
258             }
259         }
260     }
261 
262     template storeUnaligned(Vec)
263     {
264         // Note: can't be @safe with this signature
265         void storeUnaligned(Vec v, BaseType!Vec* pvec) @trusted
266         {
267             enum bool isVector = ( (Vec.sizeof == 8)  && (!MMXSizedVectorsAreEmulated)
268                                 || (Vec.sizeof == 16) && (!SSESizedVectorsAreEmulated) );
269 
270             static if (isVector)
271             {
272                 // PERF: there is probably something faster to do for this compiler (DMD).
273                 //       Avoid this on DMD in the future.
274                 enum size_t Count = Vec.array.length;
275                 foreach(int i; 0..Count)
276                     pvec[i] = v.array[i];
277             }
278             else
279             {
280                 *cast(Vec*)(pvec) = v;
281             }
282         }
283     }
284 
285     Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe
286     {
287         enum size_t Count = Vec.array.length;
288         static assert(mask.length == Count);
289 
290         Vec r = void;
291         foreach(int i, m; mask)
292         {
293             static assert (m < Count * 2);
294             int ind = cast(int)m;
295             if (ind < Count)
296                 r.array[i] = a.array[ind];
297             else
298                 r.array[i] = b.array[ind-Count];
299         }
300         return r;
301     }
302 }
303 else
304 {
305     public import core.simd;
306 }
307 
308 // Emulate ldc.simd cmpMask
309 version(LDC)
310 {} 
311 else
312 {
313     private template BaseType(V)
314     {
315         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
316     }
317 
318     private template TrueMask(V)
319     {
320         alias Elem = BaseType!V;
321 
322         static if (is(Elem == float))
323         {
324             immutable uint m1 = 0xffffffff;
325             enum Elem TrueMask = *cast(float*)(&m1);
326         }
327         else static if (is(Elem == double))
328         {
329             immutable ulong m1 = 0xffffffff_ffffffff;
330             enum Elem TrueMask = *cast(double*)(&m1);
331         }
332         else // integer case
333         {
334             enum Elem TrueMask = -1;
335         }
336     }
337 
338     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
339     {
340         enum size_t Count = Vec.array.length;
341         Vec result;
342         foreach(int i; 0..Count)
343         {
344             bool cond = a.array[i] == b.array[i];
345             result.ptr[i] = cond ? TrueMask!Vec : 0;
346         }
347         return result;
348     }
349 
350     Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison
351     {
352         enum size_t Count = Vec.array.length;
353         Vec result;
354         foreach(int i; 0..Count)
355         {
356             bool cond = a.array[i] != b.array[i];
357             result.ptr[i] = cond ? TrueMask!Vec : 0;
358         }
359         return result;
360     }
361 
362     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
363     {
364         enum size_t Count = Vec.array.length;
365         Vec result;
366         foreach(int i; 0..Count)
367         {
368             bool cond = a.array[i] > b.array[i];
369             result.ptr[i] = cond ? TrueMask!Vec : 0;
370         }
371         return result;
372     }
373 
374     Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oge" comparison
375     {
376         enum size_t Count = Vec.array.length;
377         Vec result;
378         foreach(int i; 0..Count)
379         {
380             bool cond = a.array[i] > b.array[i];
381             result.ptr[i] = cond ? TrueMask!Vec : 0;
382         }
383         return result;
384     }
385 }
386 
387 unittest
388 {
389     float4 a = [1, 3, 5, 7];
390     float4 b = [2, 3, 4, 5];
391     int4 c = cast(int4)(greaterMask!float4(a, b));
392     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
393     assert(c.array == correct);
394 }
395 
396 static if (MMXSizedVectorsAreEmulated)
397 {
398     /// MMX-like SIMD types
399     struct float2
400     {
401         float[2] array;
402         mixin VectorOps!(float2, float[2]);
403 
404         private static float allOnes() pure nothrow @nogc @trusted
405         {
406             uint m1 = 0xffffffff;
407             return *cast(float*)(&m1);
408         }
409     }
410 
411     struct byte8
412     {
413         byte[8] array;
414         mixin VectorOps!(byte8, byte[8]);
415     }
416 
417     struct short4
418     {
419         short[4] array;
420         mixin VectorOps!(short4, short[4]);
421     }
422 
423     struct int2
424     {
425         int[2] array;
426         mixin VectorOps!(int2, int[2]);
427     }
428 
429     struct long1
430     {
431         long[1] array;
432         mixin VectorOps!(long1, long[1]);
433     }
434 }
435 else
436 {
437     // For this compiler, defining MMX-sized vectors is working.
438     public import core.simd;
439     alias Vector!(long [1]) long1;
440     alias Vector!(float[2]) float2;
441     alias Vector!(int  [2]) int2;
442     alias Vector!(short[4]) short4;
443     alias Vector!(byte [8]) byte8;
444 }
445 
446 static assert(float2.sizeof == 8);
447 static assert(byte8.sizeof == 8);
448 static assert(short4.sizeof == 8);
449 static assert(int2.sizeof == 8);
450 static assert(long1.sizeof == 8);
451 
452 
453 static if (SSESizedVectorsAreEmulated)
454 {
455     /// SSE-like SIMD types
456 
457     struct float4
458     {
459         float[4] array;
460         mixin VectorOps!(float4, float[4]);
461     }
462 
463     struct byte16
464     {
465         byte[16] array;
466         mixin VectorOps!(byte16, byte[16]);
467     }
468 
469     struct short8
470     {
471         short[8] array;
472         mixin VectorOps!(short8, short[8]);
473     }
474 
475     struct int4
476     {
477         int[4] array;
478         mixin VectorOps!(int4, int[4]);
479     }
480 
481     struct long2
482     {
483         long[2] array;
484         mixin VectorOps!(long2, long[2]);
485     }
486 
487     struct double2
488     {
489         double[2] array;
490         mixin VectorOps!(double2, double[2]);
491     }
492 }
493 
494 static assert(float4.sizeof == 16);
495 static assert(byte16.sizeof == 16);
496 static assert(short8.sizeof == 16);
497 static assert(int4.sizeof == 16);
498 static assert(long2.sizeof == 16);
499 static assert(double2.sizeof == 16);
500 
501 
502 
503 
504 
505 alias __m128 = float4;
506 alias __m128i = int4;
507 alias __m128d = double2;
508 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
509 
510 int _MM_SHUFFLE2(int x, int y) pure @safe
511 {
512     assert(x >= 0 && x <= 1);
513     assert(y >= 0 && y <= 1);
514     return (x << 1) | y;
515 }
516 
517 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
518 {
519     assert(x >= 0 && x <= 3);
520     assert(y >= 0 && y <= 3);
521     assert(z >= 0 && z <= 3);
522     assert(w >= 0 && w <= 3);
523     return (z<<6) | (y<<4) | (x<<2) | w;
524 }
525 
526 // test assignment from scalar to vector type
527 unittest
528 {
529     float4 A = 3.0f;
530     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
531     assert(A.array == correctA);
532 
533     int2 B = 42;
534     int[2] correctB = [42, 42];
535     assert(B.array == correctB);
536 }