inteli.types source code

1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module inteli.types;
8 
9 
10 pure:
11 nothrow:
12 @nogc:
13 
14 version(GNU)
15 {
16     // Note: for GDC support, be sure to use https://explore.dgnu.org/
17 
18     // Future: just detect vectors, do not base upon arch.
19 
20     version(X86_64)
21     {
22         enum MMXSizedVectorsAreEmulated = false;
23         enum SSESizedVectorsAreEmulated = false;
24 
25         // Does GDC support AVX-sized vectors?
26         static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only.
27         {
28             enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 
29         }
30         else
31         {
32             enum AVXSizedVectorsAreEmulated = true;
33         }
34 
35         import gcc.builtins;
36     }
37     else
38     {
39         enum MMXSizedVectorsAreEmulated = true;
40         enum SSESizedVectorsAreEmulated = true;
41         enum AVXSizedVectorsAreEmulated = true;
42     }
43 }
44 else version(LDC)
45 {
46     public import ldc.simd;
47 
48     // Use this alias to mention it should only be used with LDC,
49     // for example when emulated shufflevector would just be wasteful.
50     alias shufflevectorLDC = shufflevector;
51 
52     enum MMXSizedVectorsAreEmulated = false;
53     enum SSESizedVectorsAreEmulated = false;
54     enum AVXSizedVectorsAreEmulated = false;
55 }
56 else version(DigitalMars)
57 {
58     public import core.simd;
59 
60     // Note: turning this true is desirable,
61     // and leads to many bugs being discovered upstream.
62     // the fact that it works relies on many workardounds.
63     // in particular intel-intrinsics with this on is a honeypot for DMD backend bugs.
64     // What happends next is that contributors end up on a DMD bug in their PR.
65     //
66     // Failed attempts: xxx
67     //
68     static if (__VERSION__ >= 2100)
69     {
70         enum bool tryToEnableCoreSimdWithDMD = true;
71     }
72     else
73     {
74         enum bool tryToEnableCoreSimdWithDMD = false;
75     }
76 
77     version(D_SIMD)
78     {
79         enum MMXSizedVectorsAreEmulated = true;
80         enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD;
81 
82         // Note: with DMD, AVX-sized vectors can't be enabled yet.
83         /// On linux + x86_64, this will fail since a few operands seem to be missing. 
84         version(D_AVX)
85             enum AVXSizedVectorsAreEmulated = true;
86         else
87             enum AVXSizedVectorsAreEmulated = true;
88     }
89     else
90     {
91         // Some DMD 32-bit targets don't have D_SIMD
92         enum MMXSizedVectorsAreEmulated = true;
93         enum SSESizedVectorsAreEmulated = true;
94         enum AVXSizedVectorsAreEmulated = true;
95     }
96 }
97 
98 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated;
99 
100 static if (CoreSimdIsEmulated)
101 {
102     // core.simd is emulated in some capacity: introduce `VectorOps`
103 
104     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
105     {
106         enum Count = N;
107         alias Base = BaseType;
108 
109         BaseType* ptr() return pure nothrow @nogc
110         {
111             return array.ptr;
112         }
113 
114         // Unary operators
115         VectorType opUnary(string op)() pure nothrow @safe @nogc
116         {
117             VectorType res = void;
118             mixin("res.array[] = " ~ op ~ "array[];");
119             return res;
120         }
121 
122         // Binary operators
123         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
124         {
125             VectorType res = void;
126             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
127             return res;
128         }
129 
130         // Assigning a BaseType value
131         void opAssign(BaseType e) pure nothrow @safe @nogc
132         {
133             array[] = e;
134         }
135 
136         // Assigning a static array
137         void opAssign(ArrayType v) pure nothrow @safe @nogc
138         {
139             array[] = v[];
140         }
141 
142         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
143         {
144             mixin("array[] "  ~ op ~ "= other.array[];");
145         }
146 
147         // Assigning a dyn array
148         this(ArrayType v) pure nothrow @safe @nogc
149         {
150             array[] = v[];
151         }
152 
153         // Broadcast constructor
154         this(BaseType x) pure nothrow @safe @nogc
155         {
156             array[] = x;
157         }
158 
159         /// We can't support implicit conversion but do support explicit casting.
160         /// "Vector types of the same size can be implicitly converted among each other."
161         /// Casting to another vector type is always just a raw copy.
162         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
163             if (VecDest.sizeof == VectorType.sizeof)
164             {
165                 VecDest dest = void;
166                 // Copy
167                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
168                 return dest;
169             }
170 
171         ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc
172         {
173             return array[i];
174         }
175 
176     }
177 }
178 else
179 {
180     public import core.simd;
181 
182     // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
183     // And GDC sometimes need those unsigned vector types for some intrinsics.
184     // For internal use only.
185     package alias ushort8 = Vector!(ushort[8]);
186     package alias ubyte8  = Vector!(ubyte[8]);
187     package alias ubyte16 = Vector!(ubyte[16]);
188 
189     static if (!AVXSizedVectorsAreEmulated)
190     {
191         package alias ushort16 = Vector!(ushort[16]);
192         package alias ubyte32  = Vector!(ubyte[32]);
193     }
194 }
195 
196 // Emulate ldc.simd cmpMask and other masks.
197 // Note: these should be deprecated on non-LDC, 
198 // since it's slower to generate that code.
199 version(LDC)
200 {} 
201 else
202 {
203     private template BaseType(V)
204     {
205         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
206     }
207 
208     private template TrueMask(V)
209     {
210         alias Elem = BaseType!V;
211 
212         static if (is(Elem == float))
213         {
214             immutable uint m1 = 0xffffffff;
215             enum Elem TrueMask = *cast(float*)(&m1);
216         }
217         else static if (is(Elem == double))
218         {
219             immutable ulong m1 = 0xffffffff_ffffffff;
220             enum Elem TrueMask = *cast(double*)(&m1);
221         }
222         else // integer case
223         {
224             enum Elem TrueMask = -1;
225         }
226     }
227 
228     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
229     {
230         enum size_t Count = Vec.array.length;
231         Vec result;
232         foreach(int i; 0..Count)
233         {
234             bool cond = a.array[i] == b.array[i];
235             result.ptr[i] = cond ? TrueMask!Vec : 0;
236         }
237         return result;
238     }
239 
240     Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison
241     {
242         enum size_t Count = Vec.array.length;
243         Vec result;
244         foreach(int i; 0..Count)
245         {
246             bool cond = a.array[i] != b.array[i];
247             result.ptr[i] = cond ? TrueMask!Vec : 0;
248         }
249         return result;
250     }
251 
252     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
253     {
254         enum size_t Count = Vec.array.length;
255         Vec result;
256         foreach(int i; 0..Count)
257         {
258             bool cond = a.array[i] > b.array[i];
259             result.ptr[i] = cond ? TrueMask!Vec : 0;
260         }
261         return result;
262     }
263 }
264 
265 unittest
266 {
267     float4 a = [1, 3, 5, 7];
268     float4 b = [2, 3, 4, 5];
269     int4 c = cast(int4)(greaterMask!float4(a, b));
270     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
271     assert(c.array == correct);
272 }
273 
274 static if (MMXSizedVectorsAreEmulated)
275 {
276     /// MMX-like SIMD types
277     struct float2
278     {
279         float[2] array;
280         mixin VectorOps!(float2, float[2]);
281     }
282 
283     struct byte8
284     {
285         byte[8] array;
286         mixin VectorOps!(byte8, byte[8]);
287     }
288 
289     struct short4
290     {
291         short[4] array;
292         mixin VectorOps!(short4, short[4]);
293     }
294 
295     struct int2
296     {
297         int[2] array;
298         mixin VectorOps!(int2, int[2]);
299     }
300 
301     struct long1
302     {
303         long[1] array;
304         mixin VectorOps!(long1, long[1]);
305     }
306 }
307 else
308 {
309     // For this compiler, defining MMX-sized vectors is working.
310     public import core.simd;
311     alias Vector!(long [1]) long1;
312     alias Vector!(float[2]) float2;
313     alias Vector!(int  [2]) int2;
314     alias Vector!(short[4]) short4;
315     alias Vector!(byte [8]) byte8;
316 }
317 
318 static assert(float2.sizeof == 8);
319 static assert(byte8.sizeof == 8);
320 static assert(short4.sizeof == 8);
321 static assert(int2.sizeof == 8);
322 static assert(long1.sizeof == 8);
323 
324 
325 static if (SSESizedVectorsAreEmulated)
326 {
327     /// SSE-like SIMD types
328 
329     struct float4
330     {
331         float[4] array;
332         mixin VectorOps!(float4, float[4]);
333     }
334 
335     struct byte16
336     {
337         byte[16] array;
338         mixin VectorOps!(byte16, byte[16]);
339     }
340 
341     struct short8
342     {
343         short[8] array;
344         mixin VectorOps!(short8, short[8]);
345     }
346 
347     struct int4
348     {
349         int[4] array;
350         mixin VectorOps!(int4, int[4]);
351     }
352 
353     struct long2
354     {
355         long[2] array;
356         mixin VectorOps!(long2, long[2]);
357     }
358 
359     struct double2
360     {
361         double[2] array;
362         mixin VectorOps!(double2, double[2]);
363     }
364 }
365 
366 static assert(float4.sizeof == 16);
367 static assert(byte16.sizeof == 16);
368 static assert(short8.sizeof == 16);
369 static assert(int4.sizeof == 16);
370 static assert(long2.sizeof == 16);
371 static assert(double2.sizeof == 16);
372 
373 
374 static if (AVXSizedVectorsAreEmulated)
375 {
376     /// AVX-like SIMD types
377 
378     struct float8
379     {
380         float[8] array;
381         mixin VectorOps!(float8, float[8]);
382     }
383 
384     struct byte32
385     {
386         byte[32] array;
387         mixin VectorOps!(byte32, byte[32]);
388     }
389 
390     struct short16
391     {
392         short[16] array;
393         mixin VectorOps!(short16, short[16]);
394     }
395 
396     struct int8
397     {
398         int[8] array;
399         mixin VectorOps!(int8, int[8]);
400     }
401 
402     struct long4
403     {
404         long[4] array;
405         mixin VectorOps!(long4, long[4]);
406     }
407 
408     struct double4
409     {
410         double[4] array;
411         mixin VectorOps!(double4, double[4]);
412     }
413 }
414 else
415 {
416     public import core.simd;    
417 }
418 static assert(float8.sizeof == 32);
419 static assert(byte32.sizeof == 32);
420 static assert(short16.sizeof == 32);
421 static assert(int8.sizeof == 32);
422 static assert(long4.sizeof == 32);
423 static assert(double4.sizeof == 32);
424 
425 
426 
427 
428 alias __m256 = float8;
429 alias __m256i = long4; // long long __vector with ICC, GCC, and clang
430 alias __m256d = double4;
431 alias __m128 = float4;
432 alias __m128i = int4;
433 alias __m128d = double2;
434 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
435 
436 int _MM_SHUFFLE2(int x, int y) pure @safe
437 {
438     assert(x >= 0 && x <= 1);
439     assert(y >= 0 && y <= 1);
440     return (x << 1) | y;
441 }
442 
443 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
444 {
445     assert(x >= 0 && x <= 3);
446     assert(y >= 0 && y <= 3);
447     assert(z >= 0 && z <= 3);
448     assert(w >= 0 && w <= 3);
449     return (z<<6) | (y<<4) | (x<<2) | w;
450 }
451 
452 // test assignment from scalar to vector type
453 unittest
454 {
455     float4 A = 3.0f;
456     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
457     assert(A.array == correctA);
458 
459     int2 B = 42;
460     int[2] correctB = [42, 42];
461     assert(B.array == correctB);
462 }