1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module inteli.types;
8 
9 
10 pure:
11 nothrow:
12 @nogc:
13 
14 version(GNU)
15 {
16     // Note: for GDC support, be sure to use https://explore.dgnu.org/
17 
18     version(X86_64)
19     {
20         enum MMXSizedVectorsAreEmulated = false;
21         enum SSESizedVectorsAreEmulated = false;
22 
23         // TODO: use D_AVX and D_AVX2 eventually to detect AVX?
24         enum AVXSizedVectorsAreEmulated = true;
25 
26         import gcc.builtins;
27 
28         // TODO: for performance, replace that anywhere possible by a GDC intrinsic
29         Vec shufflevector(Vec, mask...)(Vec a, Vec b) @trusted
30         {
31             enum Count = Vec.array.length;
32             static assert(mask.length == Count);
33 
34             Vec r = void;
35             foreach(int i, m; mask)
36             {
37                 static assert (m < Count * 2);
38                 int ind = cast(int)m;
39                 if (ind < Count)
40                     r.ptr[i] = a.array[ind];
41                 else
42                     r.ptr[i] = b.array[ind - Count];
43             }
44             return r;
45         }
46     }
47     else
48     {
49         enum MMXSizedVectorsAreEmulated = true;
50         enum SSESizedVectorsAreEmulated = true;
51         enum AVXSizedVectorsAreEmulated = true;
52     }
53 }
54 else version(LDC)
55 {
56     public import ldc.simd;
57 
58     // Use this alias to mention it should only be used with LDC,
59     // for example when emulated shufflevector would just be wasteful.
60     alias shufflevectorLDC = shufflevector; 
61 
62     enum MMXSizedVectorsAreEmulated = false;
63     enum SSESizedVectorsAreEmulated = false;
64     enum AVXSizedVectorsAreEmulated = false;
65 }
66 else version(DigitalMars)
67 {
68     public import core.simd;
69 
70     // Note: turning this true is desirable,
71     // and leads to many bugs being discovered upstream.
72     // the fact that it works relies on many workardounds.
73     // in particular intel-intrinsics with this on is a honeypot for DMD backend bugs.
74     // What happends next is that contributors end up on a DMD bug in their PR.
75     //
76     // Failed attempts: xxx
77     //
78     static if (__VERSION__ >= 2100)
79     {
80         enum bool tryToEnableCoreSimdWithDMD = true;
81     }
82     else
83     {
84         enum bool tryToEnableCoreSimdWithDMD = false;
85     }
86 
87     version(D_SIMD)
88     {
89         enum MMXSizedVectorsAreEmulated = true;
90         enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD;
91 
92         // Note: with DMD, AVX-sized vectors can't be enabled yet.
93         /// On linux + x86_64, this will fail since a few operands seem to be missing. 
94         version(D_AVX)
95             enum AVXSizedVectorsAreEmulated = true;
96         else
97             enum AVXSizedVectorsAreEmulated = true;
98     }
99     else
100     {
101         // Some DMD 32-bit targets don't have D_SIMD
102         enum MMXSizedVectorsAreEmulated = true;
103         enum SSESizedVectorsAreEmulated = true;
104         enum AVXSizedVectorsAreEmulated = true;
105     }
106 }
107 
108 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated;
109 
110 version(GNU)
111     enum bool DefineGenericLoadStoreUnaligned = false;
112 else
113     enum bool DefineGenericLoadStoreUnaligned = CoreSimdIsEmulated;
114 
115 
116 static if (CoreSimdIsEmulated)
117 {
118     // core.simd is emulated in some capacity: introduce `VectorOps`
119 
120     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
121     {
122         enum Count = N;
123         alias Base = BaseType;
124 
125         BaseType* ptr() return pure nothrow @nogc
126         {
127             return array.ptr;
128         }
129 
130         // Unary operators
131         VectorType opUnary(string op)() pure nothrow @safe @nogc
132         {
133             VectorType res = void;
134             mixin("res.array[] = " ~ op ~ "array[];");
135             return res;
136         }
137 
138         // Binary operators
139         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
140         {
141             VectorType res = void;
142             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
143             return res;
144         }
145 
146         // Assigning a BaseType value
147         void opAssign(BaseType e) pure nothrow @safe @nogc
148         {
149             array[] = e;
150         }
151 
152         // Assigning a static array
153         void opAssign(ArrayType v) pure nothrow @safe @nogc
154         {
155             array[] = v[];
156         }
157 
158         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
159         {
160             mixin("array[] "  ~ op ~ "= other.array[];");
161         }
162 
163         // Assigning a dyn array
164         this(ArrayType v) pure nothrow @safe @nogc
165         {
166             array[] = v[];
167         }
168 
169         // Broadcast constructor
170         this(BaseType x) pure nothrow @safe @nogc
171         {
172             array[] = x;
173         }
174 
175         /// We can't support implicit conversion but do support explicit casting.
176         /// "Vector types of the same size can be implicitly converted among each other."
177         /// Casting to another vector type is always just a raw copy.
178         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
179             if (VecDest.sizeof == VectorType.sizeof)
180             {
181                 VecDest dest = void;
182                 // Copy
183                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
184                 return dest;
185             }
186 
187         ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc
188         {
189             return array[i];
190         }
191 
192     }
193 }
194 else
195 {
196     public import core.simd;
197 
198     // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
199     // And LDC sometimes need those unsigned vector types for some intrinsics.
200     // For internal use only.
201     package alias ushort8 = Vector!(ushort[8]);
202     package alias ubyte8  = Vector!(ubyte[8]);
203     package alias ubyte16 = Vector!(ubyte[16]);
204 }
205 
206 static if (DefineGenericLoadStoreUnaligned)
207 {
208     Vec shufflevector(Vec, mask...)(Vec a, Vec b) @safe if (Vec.sizeof < 32)
209     {
210         enum size_t Count = Vec.array.length;
211         static assert(mask.length == Count);
212 
213         Vec r = void;
214         foreach(int i, m; mask)
215         {
216             static assert (m < Count * 2);
217             enum int ind = cast(int)m;
218             static if (ind < Count)
219                 r.array[i] = a.array[ind];
220             else
221                 r.array[i] = b.array[ind-Count];
222         }
223         return r;
224     }
225 }
226 
227 // Emulate ldc.simd cmpMask and other masks.
228 // Note: these should be deprecated on non-LDC, 
229 // since it's slower to generate that code.
230 version(LDC)
231 {} 
232 else
233 {
234     private template BaseType(V)
235     {
236         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
237     }
238 
239     private template TrueMask(V)
240     {
241         alias Elem = BaseType!V;
242 
243         static if (is(Elem == float))
244         {
245             immutable uint m1 = 0xffffffff;
246             enum Elem TrueMask = *cast(float*)(&m1);
247         }
248         else static if (is(Elem == double))
249         {
250             immutable ulong m1 = 0xffffffff_ffffffff;
251             enum Elem TrueMask = *cast(double*)(&m1);
252         }
253         else // integer case
254         {
255             enum Elem TrueMask = -1;
256         }
257     }
258 
259     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
260     {
261         enum size_t Count = Vec.array.length;
262         Vec result;
263         foreach(int i; 0..Count)
264         {
265             bool cond = a.array[i] == b.array[i];
266             result.ptr[i] = cond ? TrueMask!Vec : 0;
267         }
268         return result;
269     }
270 
271     Vec notEqualMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "one" comparison
272     {
273         enum size_t Count = Vec.array.length;
274         Vec result;
275         foreach(int i; 0..Count)
276         {
277             bool cond = a.array[i] != b.array[i];
278             result.ptr[i] = cond ? TrueMask!Vec : 0;
279         }
280         return result;
281     }
282 
283     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
284     {
285         enum size_t Count = Vec.array.length;
286         Vec result;
287         foreach(int i; 0..Count)
288         {
289             bool cond = a.array[i] > b.array[i];
290             result.ptr[i] = cond ? TrueMask!Vec : 0;
291         }
292         return result;
293     }
294 }
295 
296 unittest
297 {
298     float4 a = [1, 3, 5, 7];
299     float4 b = [2, 3, 4, 5];
300     int4 c = cast(int4)(greaterMask!float4(a, b));
301     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
302     assert(c.array == correct);
303 }
304 
305 static if (MMXSizedVectorsAreEmulated)
306 {
307     /// MMX-like SIMD types
308     struct float2
309     {
310         float[2] array;
311         mixin VectorOps!(float2, float[2]);
312     }
313 
314     struct byte8
315     {
316         byte[8] array;
317         mixin VectorOps!(byte8, byte[8]);
318     }
319 
320     struct short4
321     {
322         short[4] array;
323         mixin VectorOps!(short4, short[4]);
324     }
325 
326     struct int2
327     {
328         int[2] array;
329         mixin VectorOps!(int2, int[2]);
330     }
331 
332     struct long1
333     {
334         long[1] array;
335         mixin VectorOps!(long1, long[1]);
336     }
337 }
338 else
339 {
340     // For this compiler, defining MMX-sized vectors is working.
341     public import core.simd;
342     alias Vector!(long [1]) long1;
343     alias Vector!(float[2]) float2;
344     alias Vector!(int  [2]) int2;
345     alias Vector!(short[4]) short4;
346     alias Vector!(byte [8]) byte8;
347 }
348 
349 static assert(float2.sizeof == 8);
350 static assert(byte8.sizeof == 8);
351 static assert(short4.sizeof == 8);
352 static assert(int2.sizeof == 8);
353 static assert(long1.sizeof == 8);
354 
355 
356 static if (SSESizedVectorsAreEmulated)
357 {
358     /// SSE-like SIMD types
359 
360     struct float4
361     {
362         float[4] array;
363         mixin VectorOps!(float4, float[4]);
364     }
365 
366     struct byte16
367     {
368         byte[16] array;
369         mixin VectorOps!(byte16, byte[16]);
370     }
371 
372     struct short8
373     {
374         short[8] array;
375         mixin VectorOps!(short8, short[8]);
376     }
377 
378     struct int4
379     {
380         int[4] array;
381         mixin VectorOps!(int4, int[4]);
382     }
383 
384     struct long2
385     {
386         long[2] array;
387         mixin VectorOps!(long2, long[2]);
388     }
389 
390     struct double2
391     {
392         double[2] array;
393         mixin VectorOps!(double2, double[2]);
394     }
395 }
396 
397 static assert(float4.sizeof == 16);
398 static assert(byte16.sizeof == 16);
399 static assert(short8.sizeof == 16);
400 static assert(int4.sizeof == 16);
401 static assert(long2.sizeof == 16);
402 static assert(double2.sizeof == 16);
403 
404 
405 static if (AVXSizedVectorsAreEmulated)
406 {
407     /// AVX-like SIMD types
408 
409     struct float8
410     {
411         float[8] array;
412         mixin VectorOps!(float8, float[8]);
413     }
414 
415     struct byte32
416     {
417         byte[32] array;
418         mixin VectorOps!(byte32, byte[32]);
419     }
420 
421     struct short16
422     {
423         short[16] array;
424         mixin VectorOps!(short16, short[16]);
425     }
426 
427     struct int8
428     {
429         int[8] array;
430         mixin VectorOps!(int8, int[8]);
431     }
432 
433     struct long4
434     {
435         long[4] array;
436         mixin VectorOps!(long4, long[4]);
437     }
438 
439     struct double4
440     {
441         double[4] array;
442         mixin VectorOps!(double4, double[4]);
443     }
444 }
445 else
446 {
447     public import core.simd;    
448 }
449 static assert(float8.sizeof == 32);
450 static assert(byte32.sizeof == 32);
451 static assert(short16.sizeof == 32);
452 static assert(int8.sizeof == 32);
453 static assert(long4.sizeof == 32);
454 static assert(double4.sizeof == 32);
455 
456 
457 
458 
459 alias __m256 = float8;
460 alias __m256i = long4; // long long __vector with ICC, GCC, and clang
461 alias __m256d = double4;
462 alias __m128 = float4;
463 alias __m128i = int4;
464 alias __m128d = double2;
465 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
466 
467 int _MM_SHUFFLE2(int x, int y) pure @safe
468 {
469     assert(x >= 0 && x <= 1);
470     assert(y >= 0 && y <= 1);
471     return (x << 1) | y;
472 }
473 
474 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
475 {
476     assert(x >= 0 && x <= 3);
477     assert(y >= 0 && y <= 3);
478     assert(z >= 0 && z <= 3);
479     assert(w >= 0 && w <= 3);
480     return (z<<6) | (y<<4) | (x<<2) | w;
481 }
482 
483 // test assignment from scalar to vector type
484 unittest
485 {
486     float4 A = 3.0f;
487     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
488     assert(A.array == correctA);
489 
490     int2 B = 42;
491     int[2] correctB = [42, 42];
492     assert(B.array == correctB);
493 }