1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Auburn Sounds 2016-2018, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 * Authors:   Guillaume Piolat
7 */
8 module inteli.types;
9 
10 version(GNU)
11 {
12     version(X86_64)
13     {
14         enum CoreSimdIsEmulated = false;
15 
16         public import core.simd;
17         import gcc.builtins;
18 
19         // Declare vector types that correspond to MMX types
20         // Because they are expressible in IR anyway.
21         alias Vector!(long [1]) long1;
22         alias Vector!(float[2]) float2;
23         alias Vector!(int  [2]) int2;
24         alias Vector!(short[4]) short4;
25         alias Vector!(byte [8]) byte8;
26 
27         float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4))
28         {
29             return __builtin_ia32_loadups(pvec);
30         }
31 
32         double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2))
33         {
34             return __builtin_ia32_loadupd(pvec);
35         }
36 
37         byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16))
38         {
39             return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
40         }
41 
42         short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8))
43         {
44             return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
45         }
46 
47         int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4))
48         {
49             return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
50         }
51 
52         long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2))
53         {
54             return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
55         }
56 
57         void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4))
58         {
59             __builtin_ia32_storeups(pvec, v);
60         }
61 
62         void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2))
63         {
64             __builtin_ia32_storeupd(pvec, v);
65         }
66 
67         void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16))
68         {
69             __builtin_ia32_storedqu(cast(char*)pvec, v);
70         }
71 
72         void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8))
73         {
74             __builtin_ia32_storedqu(cast(char*)pvec, v);
75         }
76 
77         void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4))
78         {
79             __builtin_ia32_storedqu(cast(char*)pvec, v);
80         }
81 
82         void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2))
83         {
84             __builtin_ia32_storedqu(cast(char*)pvec, v);
85         }
86 
87         // TODO: for performance, replace that anywhere possible by a GDC intrinsic
88         Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted
89         {
90             enum Count = Vec.array.length;
91             static assert(mask.length == Count);
92 
93             Vec r = void;
94             foreach(int i, m; mask)
95             {
96                 static assert (m < Count * 2);
97                 int ind = cast(int)m;
98                 if (ind < Count)
99                     r.ptr[i] = a.array[ind];
100                 else
101                     r.ptr[i] = b.array[ind - Count];
102             }
103             return r;
104         }
105     }
106     else
107     {
108         enum CoreSimdIsEmulated = true;
109     }
110 }
111 else version(LDC)
112 {
113     public import core.simd;
114     public import ldc.simd;
115 
116     // Declare vector types that correspond to MMX types
117     // Because they are expressible in IR anyway.
118     alias Vector!(long [1]) long1;
119     alias Vector!(float[2]) float2;
120     alias Vector!(int  [2]) int2;
121     alias Vector!(short[4]) short4;
122     alias Vector!(byte [8]) byte8;
123 
124     enum CoreSimdIsEmulated = false;
125 }
126 else version(DigitalMars)
127 {
128     enum CoreSimdIsEmulated = true; // TODO: use core.simd with DMD when D_SIMD is defined
129 }
130 
131 static if (CoreSimdIsEmulated)
132 {
133     // This is a LDC SIMD emulation layer, for use with other D compilers.
134     // The goal is to be very similar in precision.
135     // The biggest differences are:
136     //
137     // 1. `cast` everywhere. With LDC vector types, short8 is implicitely convertible to int4
138     //   but this is sadly impossible in D without D_SIMD (Windows 32-bit).
139     //
140     // 2. `vec.array` is directly writeable.
141 
142     nothrow:
143     @nogc:
144     pure:
145 
146 
147     /// MMX-like SIMD types
148     struct float2
149     {
150         float[2] array;
151         mixin VectorOps!(float2, float[2]);
152 
153         enum float TrueMask = allOnes();
154         enum float FalseMask = 0.0f;
155 
156         private static float allOnes()
157         {
158             uint m1 = 0xffffffff;
159             return *cast(float*)(&m1);
160         }
161     }
162 
163     struct byte8
164     {
165         byte[8] array;
166         mixin VectorOps!(byte8, byte[8]);
167         enum byte TrueMask = -1;
168         enum byte FalseMask = 0;
169     }
170 
171     struct short4
172     {
173         short[4] array;
174         mixin VectorOps!(short4, short[4]);
175         enum short TrueMask = -1;
176         enum short FalseMask = 0;
177     }
178 
179     struct int2
180     {
181         int[2] array;
182         mixin VectorOps!(int2, int[2]);
183         enum int TrueMask = -1;
184         enum int FalseMask = 0;
185     }
186 
187     struct long1
188     {
189         long[1] array;
190         mixin VectorOps!(long1, long[1]);
191         enum long TrueMask = -1;
192         enum long FalseMask = 0;
193     }
194 
195     static assert(float2.sizeof == 8);
196     static assert(byte8.sizeof == 8);
197     static assert(short4.sizeof == 8);
198     static assert(int2.sizeof == 8);
199     static assert(long1.sizeof == 8);
200 
201 
202     /// SSE-like SIMD types
203 
204     struct float4
205     {
206         float[4] array;
207         mixin VectorOps!(float4, float[4]);
208 
209         enum float TrueMask = allOnes();
210         enum float FalseMask = 0.0f;
211 
212         private static float allOnes()
213         {
214             uint m1 = 0xffffffff;
215             return *cast(float*)(&m1);
216         }
217     }
218 
219     struct byte16
220     {
221         byte[16] array;
222         mixin VectorOps!(byte16, byte[16]);
223         enum byte TrueMask = -1;
224         enum byte FalseMask = 0;
225     }
226 
227     struct short8
228     {
229         short[8] array;
230         mixin VectorOps!(short8, short[8]);
231         enum short TrueMask = -1;
232         enum short FalseMask = 0;
233     }
234 
235     struct int4
236     {
237         int[4] array;
238         mixin VectorOps!(int4, int[4]);
239         enum int TrueMask = -1;
240         enum int FalseMask = 0;
241     }
242 
243     struct long2
244     {
245         long[2] array;
246         mixin VectorOps!(long2, long[2]);
247         enum long TrueMask = -1;
248         enum long FalseMask = 0;
249     }
250 
251     struct double2
252     {
253         double[2] array;
254         mixin VectorOps!(double2, double[2]);
255 
256         enum double TrueMask = allOnes();
257         enum double FalseMask = 0.0f;
258 
259         private static double allOnes()
260         {
261             ulong m1 = 0xffffffff_ffffffff;
262             return *cast(double*)(&m1);
263         }
264     }
265 
266     static assert(float4.sizeof == 16);
267     static assert(byte16.sizeof == 16);
268     static assert(short8.sizeof == 16);
269     static assert(int4.sizeof == 16);
270     static assert(long2.sizeof == 16);
271     static assert(double2.sizeof == 16);
272 
273     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
274     {
275         enum Count = N;
276         alias Base = BaseType;
277 
278         BaseType* ptr() return pure nothrow @nogc
279         {
280             return array.ptr;
281         }
282 
283         // Unary operators
284         VectorType opUnary(string op)() pure nothrow @safe @nogc
285         {
286             VectorType res = void;
287             mixin("res.array[] = " ~ op ~ "array[];");
288             return res;
289         }
290 
291         // Binary operators
292         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
293         {
294             VectorType res = void;
295             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
296             return res;
297         }
298 
299         // Assigning a static array
300         void opAssign(ArrayType v) pure nothrow @safe @nogc
301         {
302             array[] = v[];
303         }
304 
305         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
306         {
307             mixin("array[] "  ~ op ~ "= other.array[];");
308         }
309 
310         // Assigning a dyn array
311         this(ArrayType v) pure nothrow @safe @nogc
312         {
313             array[] = v[];
314         }
315 
316         // Broadcast constructor
317         this(BaseType x) pure nothrow @safe @nogc
318         {
319             array[] = x;
320         }
321 
322         /// We can't support implicit conversion but do support explicit casting.
323         /// "Vector types of the same size can be implicitly converted among each other."
324         /// Casting to another vector type is always just a raw copy.
325         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
326             if (VecDest.sizeof == VectorType.sizeof)
327         {
328             // import core.stdc.string: memcpy;
329             VecDest dest = void;
330             // Copy
331             dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
332             return dest;
333         }
334 
335         ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc
336         {
337             return array[i];
338         }
339 
340     }
341 
342     auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted
343     {
344         static assert(Vec.sizeof == Vec2.sizeof);
345         import core.stdc.string: memcpy;
346         Vec v = void;
347         memcpy(&v, &vec, Vec2.sizeof);
348         return v.array[index];
349     }
350 
351     auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted
352     {
353         static assert(Vec.sizeof == Vec2.sizeof);
354         import core.stdc.string: memcpy;
355         Vec v = void;
356         memcpy(&v, &vec, Vec2.sizeof);
357         v.array[index] = e;
358         return v;
359     }
360 
361     // Note: can't be @safe with this signature
362     Vec loadUnaligned(Vec)(const(Vec.Base)* pvec) @trusted
363     {
364         return *cast(Vec*)(pvec);
365     }
366 
367     // Note: can't be @safe with this signature
368     void storeUnaligned(Vec)(Vec v, Vec.Base* pvec) @trusted
369     {
370         *cast(Vec*)(pvec) = v;
371     }
372 
373     Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe
374     {
375         static assert(mask.length == Vec.Count);
376 
377         Vec r = void;
378         foreach(int i, m; mask)
379         {
380             static assert (m < Vec.Count * 2);
381             int ind = cast(int)m;
382             if (ind < Vec.Count)
383                 r.array[i] = a.array[ind];
384             else
385                 r.array[i] = b.array[ind-Vec.Count];
386         }
387         return r;
388     }
389 
390     // emulate ldc.simd cmpMask
391 
392     Vec equalMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oeq" comparison
393     {
394         alias BaseType = Vec.Base;
395         alias Count = Vec.Count;
396         Vec result;
397         foreach(int i; 0..Count)
398         {
399             bool cond = a.array[i] == b.array[i];
400             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
401         }
402         return result;
403     }
404 
405     Vec notEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "one" comparison
406     {
407         alias BaseType = Vec.Base;
408         alias Count = Vec.Count;
409         Vec result;
410         foreach(int i; 0..Count)
411         {
412             bool cond = a.array[i] != b.array[i];
413             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
414         }
415         return result;
416     }
417 
418     Vec greaterMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "ogt" comparison
419     {
420         alias BaseType = Vec.Base;
421         alias Count = Vec.Count;
422         Vec result;
423         foreach(int i; 0..Count)
424         {
425             bool cond = a.array[i] > b.array[i];
426             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
427         }
428         return result;
429     }
430 
431     Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oge" comparison
432     {
433         alias BaseType = Vec.Base;
434         alias Count = Vec.Count;
435         Vec result;
436         foreach(int i; 0..Count)
437         {
438             bool cond = a.array[i] > b.array[i];
439             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
440         }
441         return result;
442     }
443 
444     unittest
445     {
446         float4 a = [1, 3, 5, 7];
447         float4 b = [2, 3, 4, 5];
448         int4 c = cast(int4)(greaterMask!float4(a, b));
449         static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
450         assert(c.array == correct);
451     }
452 }
453 
454 nothrow:
455 @nogc:
456 
457 alias __m128 = float4;
458 alias __m128i = int4;
459 alias __m128d = double2;
460 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
461 
462 int _MM_SHUFFLE2(int x, int y) pure @safe
463 {
464     assert(x >= 0 && x <= 1);
465     assert(y >= 0 && y <= 1);
466     return (x << 1) | y;
467 }
468 
469 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
470 {
471     assert(x >= 0 && x <= 3);
472     assert(y >= 0 && y <= 3);
473     assert(z >= 0 && z <= 3);
474     assert(w >= 0 && w <= 3);
475     return (z<<6) | (y<<4) | (x<<2) | w;
476 }
477 
478 // test assignment from scalar to vector type
479 unittest
480 {
481     float4 A = 3.0f;
482     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
483     assert(A.array == correctA);
484 
485     int2 B = 42;
486     int[2] correctB = [42, 42];
487     assert(B.array == correctB);
488 }