inteli.types source code

1 /**
2 * Copyright: Copyright Auburn Sounds 2016-2018, Stefanos Baziotis 2019.
3 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
4 * Authors:   Guillaume Piolat
5 */
6 module inteli.types;
7 
8 version(GNU)
9 {
10     version(X86_64)
11     {
12         enum CoreSimdIsEmulated = false;
13 
14         public import core.simd;
15         import gcc.builtins;
16 
17         // Declare vector types that correspond to MMX types
18         // Because they are expressible in IR anyway.
19         alias Vector!(long [1]) long1;
20         alias Vector!(float[2]) float2;
21         alias Vector!(int  [2]) int2;
22         alias Vector!(short[4]) short4;
23         alias Vector!(byte [8]) byte8;
24 
25         float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4))
26         {
27             return __builtin_ia32_loadups(pvec);
28         }
29 
30         double2 loadUnaligned(Vec)(const(double)* pvec) @trusted if (is(Vec == double2))
31         {
32             return __builtin_ia32_loadupd(pvec);
33         }
34 
35         byte16 loadUnaligned(Vec)(const(byte)* pvec) @trusted if (is(Vec == byte16))
36         {
37             return cast(byte16) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
38         }
39 
40         short8 loadUnaligned(Vec)(const(short)* pvec) @trusted if (is(Vec == short8))
41         {
42             return cast(short8) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
43         }
44 
45         int4 loadUnaligned(Vec)(const(int)* pvec) @trusted if (is(Vec == int4))
46         {
47             return cast(int4) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
48         }
49 
50         long2 loadUnaligned(Vec)(const(long)* pvec) @trusted if (is(Vec == long2))
51         {
52             return cast(long2) __builtin_ia32_loaddqu(cast(const(char)*) pvec);
53         }
54 
55         void storeUnaligned(Vec)(Vec v, float* pvec) @trusted if (is(Vec == float4))
56         {
57             __builtin_ia32_storeups(pvec, v);
58         }
59 
60         void storeUnaligned(Vec)(Vec v, double* pvec) @trusted if (is(Vec == double2))
61         {
62             __builtin_ia32_storeupd(pvec, v);
63         }
64 
65         void storeUnaligned(Vec)(Vec v, byte* pvec) @trusted if (is(Vec == byte16))
66         {
67             __builtin_ia32_storedqu(cast(char*)pvec, v);
68         }
69 
70         void storeUnaligned(Vec)(Vec v, short* pvec) @trusted if (is(Vec == short8))
71         {
72             __builtin_ia32_storedqu(cast(char*)pvec, v);
73         }
74 
75         void storeUnaligned(Vec)(Vec v, int* pvec) @trusted if (is(Vec == int4))
76         {
77             __builtin_ia32_storedqu(cast(char*)pvec, v);
78         }
79 
80         void storeUnaligned(Vec)(Vec v, long* pvec) @trusted if (is(Vec == long2))
81         {
82             __builtin_ia32_storedqu(cast(char*)pvec, v);
83         }
84 
85         // TODO: for performance, replace that anywhere possible by a GDC intrinsic
86         Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted
87         {
88             enum Count = Vec.array.length;
89             static assert(mask.length == Count);
90 
91             Vec r = void;
92             foreach(int i, m; mask)
93             {
94                 static assert (m < Count * 2);
95                 int ind = cast(int)m;
96                 if (ind < Count)
97                     r.ptr[i] = a.array[ind];
98                 else
99                     r.ptr[i] = b.array[ind - Count];
100             }
101             return r;
102         }
103     }
104     else
105     {
106         enum CoreSimdIsEmulated = true;
107     }
108 }
109 else version(LDC)
110 {
111     public import core.simd;
112     public import ldc.simd;
113 
114     // Declare vector types that correspond to MMX types
115     // Because they are expressible in IR anyway.
116     alias Vector!(long [1]) long1;
117     alias Vector!(float[2]) float2;
118     alias Vector!(int  [2]) int2;
119     alias Vector!(short[4]) short4;
120     alias Vector!(byte [8]) byte8;
121 
122     enum CoreSimdIsEmulated = false;
123 }
124 else version(DigitalMars)
125 {
126     enum CoreSimdIsEmulated = true; // TODO: use core.simd with DMD when D_SIMD is defined
127 }
128 
129 static if (CoreSimdIsEmulated)
130 {
131     // This is a LDC SIMD emulation layer, for use with other D compilers.
132     // The goal is to be very similar in precision.
133     // The biggest differences are:
134     //
135     // 1. `cast` everywhere. With LDC vector types, short8 is implicitely convertible to int4
136     //   but this is sadly impossible in D without D_SIMD (Windows 32-bit).
137     //
138     // 2. `vec.array` is directly writeable.
139 
140     nothrow:
141     @nogc:
142     pure:
143 
144 
145     /// MMX-like SIMD types
146     struct float2
147     {
148         float[2] array;
149         mixin VectorOps!(float2, float[2]);
150 
151         enum float TrueMask = allOnes();
152         enum float FalseMask = 0.0f;
153 
154         private static float allOnes()
155         {
156             uint m1 = 0xffffffff;
157             return *cast(float*)(&m1);
158         }
159     }
160 
161     struct byte8
162     {
163         byte[8] array;
164         mixin VectorOps!(byte8, byte[8]);
165         enum byte TrueMask = -1;
166         enum byte FalseMask = 0;
167     }
168 
169     struct short4
170     {
171         short[4] array;
172         mixin VectorOps!(short4, short[4]);
173         enum short TrueMask = -1;
174         enum short FalseMask = 0;
175     }
176 
177     struct int2
178     {
179         int[2] array;
180         mixin VectorOps!(int2, int[2]);
181         enum int TrueMask = -1;
182         enum int FalseMask = 0;
183     }
184 
185     struct long1
186     {
187         long[1] array;
188         mixin VectorOps!(long1, long[1]);
189         enum long TrueMask = -1;
190         enum long FalseMask = 0;
191     }
192 
193     static assert(float2.sizeof == 8);
194     static assert(byte8.sizeof == 8);
195     static assert(short4.sizeof == 8);
196     static assert(int2.sizeof == 8);
197     static assert(long1.sizeof == 8);
198 
199 
200     /// SSE-like SIMD types
201 
202     struct float4
203     {
204         float[4] array;
205         mixin VectorOps!(float4, float[4]);
206 
207         enum float TrueMask = allOnes();
208         enum float FalseMask = 0.0f;
209 
210         private static float allOnes()
211         {
212             uint m1 = 0xffffffff;
213             return *cast(float*)(&m1);
214         }
215     }
216 
217     struct byte16
218     {
219         byte[16] array;
220         mixin VectorOps!(byte16, byte[16]);
221         enum byte TrueMask = -1;
222         enum byte FalseMask = 0;
223     }
224 
225     struct short8
226     {
227         short[8] array;
228         mixin VectorOps!(short8, short[8]);
229         enum short TrueMask = -1;
230         enum short FalseMask = 0;
231     }
232 
233     struct int4
234     {
235         int[4] array;
236         mixin VectorOps!(int4, int[4]);
237         enum int TrueMask = -1;
238         enum int FalseMask = 0;
239     }
240 
241     struct long2
242     {
243         long[2] array;
244         mixin VectorOps!(long2, long[2]);
245         enum long TrueMask = -1;
246         enum long FalseMask = 0;
247     }
248 
249     struct double2
250     {
251         double[2] array;
252         mixin VectorOps!(double2, double[2]);
253 
254         enum double TrueMask = allOnes();
255         enum double FalseMask = 0.0f;
256 
257         private static double allOnes()
258         {
259             ulong m1 = 0xffffffff_ffffffff;
260             return *cast(double*)(&m1);
261         }
262     }
263 
264     static assert(float4.sizeof == 16);
265     static assert(byte16.sizeof == 16);
266     static assert(short8.sizeof == 16);
267     static assert(int4.sizeof == 16);
268     static assert(long2.sizeof == 16);
269     static assert(double2.sizeof == 16);
270 
271     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
272     {
273         enum Count = N;
274         alias Base = BaseType;
275 
276         BaseType* ptr() pure nothrow @nogc
277         {
278             return array.ptr;
279         }
280 
281         // Unary operators
282         VectorType opUnary(string op)() pure nothrow @safe @nogc
283         {
284             VectorType res = void;
285             mixin("res.array[] = " ~ op ~ "array[];");
286             return res;
287         }
288 
289         // Binary operators
290         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
291         {
292             VectorType res = void;
293             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
294             return res;
295         }
296 
297         // Assigning a static array
298         void opAssign(ArrayType v) pure nothrow @safe @nogc
299         {
300             array[] = v[];
301         }
302 
303         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
304         {
305             mixin("array[] "  ~ op ~ "= other.array[];");
306         }
307 
308         // Assigning a dyn array
309         this(ArrayType v) pure nothrow @safe @nogc
310         {
311             array[] = v[];
312         }
313 
314         // Broadcast constructor
315         this(BaseType x) pure nothrow @safe @nogc
316         {
317             array[] = x;
318         }
319 
320         /// We can't support implicit conversion but do support explicit casting.
321         /// "Vector types of the same size can be implicitly converted among each other."
322         /// Casting to another vector type is always just a raw copy.
323         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
324             if (VecDest.sizeof == VectorType.sizeof)
325         {
326             // import core.stdc.string: memcpy;
327             VecDest dest = void;
328             // Copy
329             dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
330             return dest;
331         }
332 
333         ref inout(BaseType) opIndex(size_t i) inout pure nothrow @safe @nogc
334         {
335             return array[i];
336         }
337 
338     }
339 
340     auto extractelement(Vec, int index, Vec2)(Vec2 vec) @trusted
341     {
342         static assert(Vec.sizeof == Vec2.sizeof);
343         import core.stdc.string: memcpy;
344         Vec v = void;
345         memcpy(&v, &vec, Vec2.sizeof);
346         return v.array[index];
347     }
348 
349     auto insertelement(Vec, int index, Vec2)(Vec2 vec, Vec.Base e) @trusted
350     {
351         static assert(Vec.sizeof == Vec2.sizeof);
352         import core.stdc.string: memcpy;
353         Vec v = void;
354         memcpy(&v, &vec, Vec2.sizeof);
355         v.array[index] = e;
356         return v;
357     }
358 
359     // Note: can't be @safe with this signature
360     Vec loadUnaligned(Vec)(const(Vec.Base)* pvec) @trusted
361     {
362         return *cast(Vec*)(pvec);
363     }
364 
365     // Note: can't be @safe with this signature
366     void storeUnaligned(Vec)(Vec v, Vec.Base* pvec) @trusted
367     {
368         *cast(Vec*)(pvec) = v;
369     }
370 
371     Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe
372     {
373         static assert(mask.length == Vec.Count);
374 
375         Vec r = void;
376         foreach(int i, m; mask)
377         {
378             static assert (m < Vec.Count * 2);
379             int ind = cast(int)m;
380             if (ind < Vec.Count)
381                 r.array[i] = a.array[ind];
382             else
383                 r.array[i] = b.array[ind-Vec.Count];
384         }
385         return r;
386     }
387 
388     // emulate ldc.simd cmpMask
389 
390     Vec equalMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oeq" comparison
391     {
392         alias BaseType = Vec.Base;
393         alias Count = Vec.Count;
394         Vec result;
395         foreach(int i; 0..Count)
396         {
397             bool cond = a.array[i] == b.array[i];
398             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
399         }
400         return result;
401     }
402 
403     Vec notEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "one" comparison
404     {
405         alias BaseType = Vec.Base;
406         alias Count = Vec.Count;
407         Vec result;
408         foreach(int i; 0..Count)
409         {
410             bool cond = a.array[i] != b.array[i];
411             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
412         }
413         return result;
414     }
415 
416     Vec greaterMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "ogt" comparison
417     {
418         alias BaseType = Vec.Base;
419         alias Count = Vec.Count;
420         Vec result;
421         foreach(int i; 0..Count)
422         {
423             bool cond = a.array[i] > b.array[i];
424             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
425         }
426         return result;
427     }
428 
429     Vec greaterOrEqualMask(Vec)(Vec a, Vec b) @safe // for floats, equivalent to "oge" comparison
430     {
431         alias BaseType = Vec.Base;
432         alias Count = Vec.Count;
433         Vec result;
434         foreach(int i; 0..Count)
435         {
436             bool cond = a.array[i] > b.array[i];
437             result.array[i] = cond ? Vec.TrueMask : Vec.FalseMask;
438         }
439         return result;
440     }
441 
442     unittest
443     {
444         float4 a = [1, 3, 5, 7];
445         float4 b = [2, 3, 4, 5];
446         int4 c = cast(int4)(greaterMask!float4(a, b));
447         static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
448         assert(c.array == correct);
449     }
450 }
451 
452 nothrow:
453 @nogc:
454 
455 alias __m128 = float4;
456 alias __m128i = int4;
457 alias __m128d = double2;
458 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
459 
460 int _MM_SHUFFLE2(int x, int y) pure @safe
461 {
462     assert(x >= 0 && x <= 1);
463     assert(y >= 0 && y <= 1);
464     return (x << 1) | y;
465 }
466 
467 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
468 {
469     assert(x >= 0 && x <= 3);
470     assert(y >= 0 && y <= 3);
471     assert(z >= 0 && z <= 3);
472     assert(w >= 0 && w <= 3);
473     return (z<<6) | (y<<4) | (x<<2) | w;
474 }
475 
476 // test assignment from scalar to vector type
477 unittest
478 {
479     float4 A = 3.0f;
480     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
481     assert(A.array == correctA);
482 
483     int2 B = 42;
484     int[2] correctB = [42, 42];
485     assert(B.array == correctB);
486 }