1 /** 2 * Copyright: Copyright Auburn Sounds 2016-2018. 3 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 4 * Authors: Guillaume Piolat 5 */ 6 module inteli.internals; 7 8 import inteli.types; 9 10 // The only math functions needed for intel-intrinsics 11 public import core.math: fabs, sqrt; // since they are intrinsics 12 13 version(LDC) 14 { 15 public import core.simd; 16 public import ldc.simd; 17 public import ldc.gccbuiltins_x86; 18 public import ldc.intrinsics; 19 public import ldc.llvmasm: __asm; 20 21 // Since LDC 1.13, using the new ldc.llvmasm.__ir variants instead of inlineIR 22 static if (__VERSION__ >= 2083) 23 { 24 import ldc.llvmasm; 25 alias LDCInlineIR = __ir_pure; 26 27 // A version of inline IR with prefix/suffix didn't exist before LDC 1.13 28 alias LDCInlineIREx = __irEx_pure; 29 } 30 else 31 { 32 alias LDCInlineIR = inlineIR; 33 } 34 } 35 36 37 38 package: 39 nothrow @nogc: 40 41 42 // 43 // <ROUNDING> 44 // 45 // Why is that there? For DMD, we cannot use rint because _MM_SET_ROUNDING_MODE 46 // doesn't change the FPU rounding mode, and isn't expected to do so. 47 // So we devised these rounding function to help having consistent rouding between 48 // LDC and DMD. It's important that DMD uses what is in MXCST to round. 49 // 50 51 52 int convertFloatToInt32UsingMXCSR(float value) pure @safe 53 { 54 int result; 55 asm pure nothrow @nogc @trusted 56 { 57 cvtss2si EAX, value; 58 mov result, EAX; 59 } 60 return result; 61 } 62 63 int convertDoubleToInt32UsingMXCSR(double value) pure @safe 64 { 65 int result; 66 asm pure nothrow @nogc @trusted 67 { 68 cvtsd2si EAX, value; 69 mov result, EAX; 70 } 71 return result; 72 } 73 74 long convertFloatToInt64UsingMXCSR(float value) pure @safe 75 { 76 // 64-bit can use an SSE instruction 77 version(D_InlineAsm_X86_64) 78 { 79 long result; 80 version(LDC) // work-around for " Data definition directives inside inline asm are not supported yet." 81 { 82 asm pure nothrow @nogc @trusted 83 { 84 movss XMM0, value; 85 cvtss2si RAX, XMM0; 86 mov result, RAX; 87 } 88 } 89 else 90 { 91 asm pure nothrow @nogc @trusted 92 { 93 movss XMM0, value; 94 db 0xf3; db 0x48; db 0x0f; db 0x2d; db 0xc0; // cvtss2si RAX, XMM0 (DMD refuses to emit) 95 mov result, RAX; 96 } 97 } 98 return result; 99 } 100 else version(D_InlineAsm_X86) 101 { 102 // In the case of 32-bit x86 there is no SSE2 way to convert FP to 64-bit int 103 // This leads to an unfortunate FPU sequence in every C++ compiler. 104 // See: https://godbolt.org/z/vZym77 105 106 // Get current MXCSR rounding 107 uint sseRounding; 108 ushort savedFPUCW; 109 ushort newFPUCW; 110 long result; 111 asm pure nothrow @nogc @trusted 112 { 113 stmxcsr sseRounding; 114 fld value; 115 fnstcw savedFPUCW; 116 mov AX, savedFPUCW; 117 and AX, 0xf3ff; // clear FPU rounding bits 118 movzx ECX, word ptr sseRounding; 119 and ECX, 0x6000; // only keep SSE rounding bits 120 shr ECX, 3; 121 or AX, CX; // make a new control word for FPU with SSE bits 122 mov newFPUCW, AX; 123 fldcw newFPUCW; 124 fistp qword ptr result; // convert, respecting MXCSR (but not other control word things) 125 fldcw savedFPUCW; 126 } 127 return result; 128 } 129 else 130 static assert(false); 131 } 132 133 ///ditto 134 long convertDoubleToInt64UsingMXCSR(double value) pure @safe 135 { 136 // 64-bit can use an SSE instruction 137 version(D_InlineAsm_X86_64) 138 { 139 long result; 140 version(LDC) // work-around for "Data definition directives inside inline asm are not supported yet." 141 { 142 asm pure nothrow @nogc @trusted 143 { 144 movsd XMM0, value; 145 cvtsd2si RAX, XMM0; 146 mov result, RAX; 147 } 148 } 149 else 150 { 151 asm pure nothrow @nogc @trusted 152 { 153 movsd XMM0, value; 154 db 0xf2; db 0x48; db 0x0f; db 0x2d; db 0xc0; // cvtsd2si RAX, XMM0 (DMD refuses to emit) 155 mov result, RAX; 156 } 157 } 158 return result; 159 } 160 else version(D_InlineAsm_X86) 161 { 162 // In the case of 32-bit x86 there is no SSE2 way to convert FP to 64-bit int 163 // This leads to an unfortunate FPU sequence in every C++ compiler. 164 // See: https://godbolt.org/z/vZym77 165 166 // Get current MXCSR rounding 167 uint sseRounding; 168 ushort savedFPUCW; 169 ushort newFPUCW; 170 long result; 171 asm pure nothrow @nogc @trusted 172 { 173 stmxcsr sseRounding; 174 fld value; 175 fnstcw savedFPUCW; 176 mov AX, savedFPUCW; 177 and AX, 0xf3ff; 178 movzx ECX, word ptr sseRounding; 179 and ECX, 0x6000; 180 shr ECX, 3; 181 or AX, CX; 182 mov newFPUCW, AX; 183 fldcw newFPUCW; 184 fistp result; 185 fldcw savedFPUCW; 186 } 187 return result; 188 } 189 else 190 static assert(false); 191 } 192 193 194 // 195 // </ROUNDING> 196 // 197 198 199 // using the Intel terminology here 200 201 byte saturateSignedWordToSignedByte(short value) pure @safe 202 { 203 if (value > 127) value = 127; 204 if (value < -128) value = -128; 205 return cast(byte) value; 206 } 207 208 ubyte saturateSignedWordToUnsignedByte(short value) pure @safe 209 { 210 if (value > 255) value = 255; 211 if (value < 0) value = 0; 212 return cast(ubyte) value; 213 } 214 215 short saturateSignedIntToSignedShort(int value) pure @safe 216 { 217 if (value > 32767) value = 32767; 218 if (value < -32768) value = -32768; 219 return cast(short) value; 220 } 221 222 ushort saturateSignedIntToUnsignedShort(int value) pure @safe 223 { 224 if (value > 65535) value = 65535; 225 if (value < 0) value = 0; 226 return cast(ushort) value; 227 } 228 229 unittest // test saturate operations 230 { 231 assert( saturateSignedWordToSignedByte(32000) == 127); 232 assert( saturateSignedWordToUnsignedByte(32000) == 255); 233 assert( saturateSignedWordToSignedByte(-4000) == -128); 234 assert( saturateSignedWordToUnsignedByte(-4000) == 0); 235 assert( saturateSignedIntToSignedShort(32768) == 32767); 236 assert( saturateSignedIntToUnsignedShort(32768) == 32768); 237 assert( saturateSignedIntToSignedShort(-32769) == -32768); 238 assert( saturateSignedIntToUnsignedShort(-32769) == 0); 239 } 240 241 version(unittest) 242 { 243 // This is just for debugging tests 244 import core.stdc.stdio: printf; 245 246 // printing vectors for implementation 247 // Note: you can override `pure` within a `debug` clause 248 249 void _mm_print_pi32(__m64 v) @trusted 250 { 251 int2 C = cast(int2)v; 252 printf("%d %d\n", C[0], C[1]); 253 } 254 255 void _mm_print_pi16(__m64 v) @trusted 256 { 257 short4 C = cast(short4)v; 258 printf("%d %d %d %d\n", C[0], C[1], C[2], C[3]); 259 } 260 261 void _mm_print_pi8(__m64 v) @trusted 262 { 263 byte8 C = cast(byte8)v; 264 printf("%d %d %d %d %d %d %d %d\n", 265 C[0], C[1], C[2], C[3], C[4], C[5], C[6], C[7]); 266 } 267 268 void _mm_print_epi32(__m128i v) @trusted 269 { 270 printf("%d %d %d %d\n", 271 v[0], v[1], v[2], v[3]); 272 } 273 274 void _mm_print_epi16(__m128i v) @trusted 275 { 276 short8 C = cast(short8)v; 277 printf("%d %d %d %d %d %d %d %d\n", 278 C[0], C[1], C[2], C[3], C[4], C[5], C[6], C[7]); 279 } 280 281 void _mm_print_epi8(__m128i v) @trusted 282 { 283 byte16 C = cast(byte16)v; 284 printf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", 285 C[0], C[1], C[2], C[3], C[4], C[5], C[6], C[7], C[8], C[9], C[10], C[11], C[12], C[13], C[14], C[15]); 286 } 287 288 void _mm_print_ps(__m128 v) @trusted 289 { 290 float4 C = cast(float4)v; 291 printf("%f %f %f %f\n", C[0], C[1], C[2], C[3]); 292 } 293 294 void _mm_print_pd(__m128d v) @trusted 295 { 296 double2 C = cast(double2)v; 297 printf("%f %f\n", C[0], C[1]); 298 } 299 } 300 301 302 // 303 // <FLOATING-POINT COMPARISONS> 304 // 305 // Note: `ldc.simd` cannot express all nuances of FP comparisons, so we 306 // need different IR generation. 307 308 enum FPComparison 309 { 310 oeq, // ordered and equal 311 ogt, // ordered and greater than 312 oge, // ordered and greater than or equal 313 olt, // ordered and less than 314 ole, // ordered and less than or equal 315 one, // ordered and not equal 316 ord, // ordered (no nans) 317 ueq, // unordered or equal 318 ugt, // unordered or greater than ("nle") 319 uge, // unordered or greater than or equal ("nlt") 320 ult, // unordered or less than ("nge") 321 ule, // unordered or less than or equal ("ngt") 322 une, // unordered or not equal ("neq") 323 uno, // unordered (either nans) 324 } 325 326 private static immutable string[FPComparison.max+1] FPComparisonToString = 327 [ 328 "oeq", 329 "ogt", 330 "oge", 331 "olt", 332 "ole", 333 "one", 334 "ord", 335 "ueq", 336 "ugt", 337 "uge", 338 "ult", 339 "ule", 340 "une", 341 "uno", 342 ]; 343 344 // Individual float comparison: returns -1 for true or 0 for false. 345 // Useful for DMD and testing 346 private bool compareFloat(T)(FPComparison comparison, T a, T b) pure @safe 347 { 348 import std.math; 349 bool unordered = isNaN(a) || isNaN(b); 350 final switch(comparison) with(FPComparison) 351 { 352 case oeq: return a == b; 353 case ogt: return a > b; 354 case oge: return a >= b; 355 case olt: return a < b; 356 case ole: return a <= b; 357 case one: return !unordered && (a != b); // NaN with != always yields true 358 case ord: return !unordered; 359 case ueq: return unordered || (a == b); 360 case ugt: return unordered || (a > b); 361 case uge: return unordered || (a >= b); 362 case ult: return unordered || (a < b); 363 case ule: return unordered || (a <= b); 364 case une: return (a != b); // NaN with != always yields true 365 case uno: return unordered; 366 } 367 } 368 369 version(LDC) 370 { 371 /// Provides packed float comparisons 372 package int4 cmpps(FPComparison comparison)(float4 a, float4 b) pure @safe 373 { 374 enum ir = ` 375 %cmp = fcmp `~ FPComparisonToString[comparison] ~` <4 x float> %0, %1 376 %r = sext <4 x i1> %cmp to <4 x i32> 377 ret <4 x i32> %r`; 378 379 return LDCInlineIR!(ir, int4, float4, float4)(a, b); 380 } 381 382 /// Provides packed double comparisons 383 package long2 cmppd(FPComparison comparison)(double2 a, double2 b) pure @safe 384 { 385 enum ir = ` 386 %cmp = fcmp `~ FPComparisonToString[comparison] ~` <2 x double> %0, %1 387 %r = sext <2 x i1> %cmp to <2 x i64> 388 ret <2 x i64> %r`; 389 390 return LDCInlineIR!(ir, long2, double2, double2)(a, b); 391 } 392 393 /// CMPSS-style comparisons 394 /// clang implement it through x86 intrinsics, it is possible with IR alone 395 /// but leads to less optimal code. 396 /// PERF: try to implement it with __builtin_ia32_cmpss and immediate 0 to 7. 397 /// Not that simple. 398 package float4 cmpss(FPComparison comparison)(float4 a, float4 b) pure @safe 399 { 400 /* 401 enum ubyte predicateNumber = FPComparisonToX86Predicate[comparison]; 402 enum bool invertOp = (predicateNumber & 0x80) != 0; 403 static if(invertOp) 404 return __builtin_ia32_cmpsd(b, a, predicateNumber & 0x7f); 405 else 406 return __builtin_ia32_cmpsd(a, b, predicateNumber & 0x7f); 407 */ 408 enum ir = ` 409 %cmp = fcmp `~ FPComparisonToString[comparison] ~` float %0, %1 410 %r = sext i1 %cmp to i32 411 %r2 = bitcast i32 %r to float 412 ret float %r2`; 413 414 float4 r = a; 415 r[0] = LDCInlineIR!(ir, float, float, float)(a[0], b[0]); 416 return r; 417 } 418 419 /// CMPSD-style comparisons 420 /// clang implement it through x86 intrinsics, it is possible with IR alone 421 /// but leads to less optimal code. 422 /// PERF: try to implement it with __builtin_ia32_cmpsd and immediate 0 to 7. 423 /// Not that simple. 424 package double2 cmpsd(FPComparison comparison)(double2 a, double2 b) pure @safe 425 { 426 enum ir = ` 427 %cmp = fcmp `~ FPComparisonToString[comparison] ~` double %0, %1 428 %r = sext i1 %cmp to i64 429 %r2 = bitcast i64 %r to double 430 ret double %r2`; 431 432 double2 r = a; 433 r[0] = LDCInlineIR!(ir, double, double, double)(a[0], b[0]); 434 return r; 435 } 436 437 // Note: ucomss and ucomsd are left unimplemented 438 package int comss(FPComparison comparison)(float4 a, float4 b) pure @safe 439 { 440 enum ir = ` 441 %cmp = fcmp `~ FPComparisonToString[comparison] ~` float %0, %1 442 %r = zext i1 %cmp to i32 443 ret i32 %r`; 444 445 return LDCInlineIR!(ir, int, float, float)(a[0], b[0]); 446 } 447 448 // Note: ucomss and ucomsd are left unimplemented 449 package int comsd(FPComparison comparison)(double2 a, double2 b) pure @safe 450 { 451 enum ir = ` 452 %cmp = fcmp `~ FPComparisonToString[comparison] ~` double %0, %1 453 %r = zext i1 %cmp to i32 454 ret i32 %r`; 455 456 return LDCInlineIR!(ir, int, double, double)(a[0], b[0]); 457 } 458 } 459 else 460 { 461 /// Provides packed float comparisons 462 package int4 cmpps(FPComparison comparison)(float4 a, float4 b) pure @safe 463 { 464 int4 result; 465 foreach(i; 0..4) 466 { 467 result[i] = compareFloat!float(comparison, a[i], b[i]) ? -1 : 0; 468 } 469 return result; 470 } 471 472 /// Provides packed double comparisons 473 package long2 cmppd(FPComparison comparison)(double2 a, double2 b) pure @safe 474 { 475 long2 result; 476 foreach(i; 0..2) 477 { 478 result[i] = compareFloat!double(comparison, a[i], b[i]) ? -1 : 0; 479 } 480 return result; 481 } 482 483 /// Provides CMPSS-style comparison 484 package float4 cmpss(FPComparison comparison)(float4 a, float4 b) pure @safe 485 { 486 int4 result = cast(int4)a; 487 result[0] = compareFloat!float(comparison, a[0], b[0]) ? -1 : 0; 488 return cast(float4)result; 489 } 490 491 /// Provides CMPSD-style comparison 492 package double2 cmpsd(FPComparison comparison)(double2 a, double2 b) pure @safe 493 { 494 long2 result = cast(long2)a; 495 result[0] = compareFloat!double(comparison, a[0], b[0]) ? -1 : 0; 496 return cast(double2)result; 497 } 498 499 package int comss(FPComparison comparison)(float4 a, float4 b) pure @safe 500 { 501 return compareFloat!float(comparison, a[0], b[0]) ? 1 : 0; 502 } 503 504 // Note: ucomss and ucomsd are left unimplemented 505 package int comsd(FPComparison comparison)(double2 a, double2 b) pure @safe 506 { 507 return compareFloat!double(comparison, a[0], b[0]) ? 1 : 0; 508 } 509 } 510 unittest // cmpps 511 { 512 // Check all comparison type is working 513 float4 A = [1, 3, 5, float.nan]; 514 float4 B = [2, 3, 4, 5]; 515 516 int4 result_oeq = cmpps!(FPComparison.oeq)(A, B); 517 int4 result_ogt = cmpps!(FPComparison.ogt)(A, B); 518 int4 result_oge = cmpps!(FPComparison.oge)(A, B); 519 int4 result_olt = cmpps!(FPComparison.olt)(A, B); 520 int4 result_ole = cmpps!(FPComparison.ole)(A, B); 521 int4 result_one = cmpps!(FPComparison.one)(A, B); 522 int4 result_ord = cmpps!(FPComparison.ord)(A, B); 523 int4 result_ueq = cmpps!(FPComparison.ueq)(A, B); 524 int4 result_ugt = cmpps!(FPComparison.ugt)(A, B); 525 int4 result_uge = cmpps!(FPComparison.uge)(A, B); 526 int4 result_ult = cmpps!(FPComparison.ult)(A, B); 527 int4 result_ule = cmpps!(FPComparison.ule)(A, B); 528 int4 result_une = cmpps!(FPComparison.une)(A, B); 529 int4 result_uno = cmpps!(FPComparison.uno)(A, B); 530 531 static immutable int[4] correct_oeq = [ 0,-1, 0, 0]; 532 static immutable int[4] correct_ogt = [ 0, 0,-1, 0]; 533 static immutable int[4] correct_oge = [ 0,-1,-1, 0]; 534 static immutable int[4] correct_olt = [-1, 0, 0, 0]; 535 static immutable int[4] correct_ole = [-1,-1, 0, 0]; 536 static immutable int[4] correct_one = [-1, 0,-1, 0]; 537 static immutable int[4] correct_ord = [-1,-1,-1, 0]; 538 static immutable int[4] correct_ueq = [ 0,-1, 0,-1]; 539 static immutable int[4] correct_ugt = [ 0, 0,-1,-1]; 540 static immutable int[4] correct_uge = [ 0,-1,-1,-1]; 541 static immutable int[4] correct_ult = [-1, 0, 0,-1]; 542 static immutable int[4] correct_ule = [-1,-1, 0,-1]; 543 static immutable int[4] correct_une = [-1, 0,-1,-1]; 544 static immutable int[4] correct_uno = [ 0, 0, 0,-1]; 545 546 assert(result_oeq.array == correct_oeq); 547 assert(result_ogt.array == correct_ogt); 548 assert(result_oge.array == correct_oge); 549 assert(result_olt.array == correct_olt); 550 assert(result_ole.array == correct_ole); 551 assert(result_one.array == correct_one); 552 assert(result_ord.array == correct_ord); 553 assert(result_ueq.array == correct_ueq); 554 assert(result_ugt.array == correct_ugt); 555 assert(result_uge.array == correct_uge); 556 assert(result_ult.array == correct_ult); 557 assert(result_ule.array == correct_ule); 558 assert(result_une.array == correct_une); 559 assert(result_uno.array == correct_uno); 560 } 561 unittest 562 { 563 double2 a = [1, 3]; 564 double2 b = [2, 3]; 565 long2 c = cmppd!(FPComparison.ult)(a, b); 566 static immutable long[2] correct = [cast(long)(-1), 0]; 567 assert(c.array == correct); 568 } 569 unittest // cmpss and comss 570 { 571 void testComparison(FPComparison comparison)(float4 A, float4 B) 572 { 573 float4 result = cmpss!comparison(A, B); 574 int4 iresult = cast(int4)result; 575 int expected = compareFloat!float(comparison, A[0], B[0]) ? -1 : 0; 576 assert(iresult[0] == expected); 577 assert(result[1] == A[1]); 578 assert(result[2] == A[2]); 579 assert(result[3] == A[3]); 580 581 // check comss 582 int comResult = comss!comparison(A, B); 583 assert( (expected != 0) == (comResult != 0) ); 584 } 585 586 // Check all comparison type is working 587 float4 A = [1, 3, 5, 6]; 588 float4 B = [2, 3, 4, 5]; 589 float4 C = [float.nan, 3, 4, 5]; 590 591 testComparison!(FPComparison.oeq)(A, B); 592 testComparison!(FPComparison.oeq)(A, C); 593 testComparison!(FPComparison.ogt)(A, B); 594 testComparison!(FPComparison.ogt)(A, C); 595 testComparison!(FPComparison.oge)(A, B); 596 testComparison!(FPComparison.oge)(A, C); 597 testComparison!(FPComparison.olt)(A, B); 598 testComparison!(FPComparison.olt)(A, C); 599 testComparison!(FPComparison.ole)(A, B); 600 testComparison!(FPComparison.ole)(A, C); 601 testComparison!(FPComparison.one)(A, B); 602 testComparison!(FPComparison.one)(A, C); 603 testComparison!(FPComparison.ord)(A, B); 604 testComparison!(FPComparison.ord)(A, C); 605 testComparison!(FPComparison.ueq)(A, B); 606 testComparison!(FPComparison.ueq)(A, C); 607 testComparison!(FPComparison.ugt)(A, B); 608 testComparison!(FPComparison.ugt)(A, C); 609 testComparison!(FPComparison.uge)(A, B); 610 testComparison!(FPComparison.uge)(A, C); 611 testComparison!(FPComparison.ult)(A, B); 612 testComparison!(FPComparison.ult)(A, C); 613 testComparison!(FPComparison.ule)(A, B); 614 testComparison!(FPComparison.ule)(A, C); 615 testComparison!(FPComparison.une)(A, B); 616 testComparison!(FPComparison.une)(A, C); 617 testComparison!(FPComparison.uno)(A, B); 618 testComparison!(FPComparison.uno)(A, C); 619 } 620 unittest // cmpsd and comsd 621 { 622 void testComparison(FPComparison comparison)(double2 A, double2 B) 623 { 624 double2 result = cmpsd!comparison(A, B); 625 long2 iresult = cast(long2)result; 626 long expected = compareFloat!double(comparison, A[0], B[0]) ? -1 : 0; 627 assert(iresult[0] == expected); 628 assert(result[1] == A[1]); 629 630 // check comsd 631 int comResult = comsd!comparison(A, B); 632 assert( (expected != 0) == (comResult != 0) ); 633 } 634 635 // Check all comparison type is working 636 double2 A = [1, 3]; 637 double2 B = [2, 4]; 638 double2 C = [double.nan, 5]; 639 640 testComparison!(FPComparison.oeq)(A, B); 641 testComparison!(FPComparison.oeq)(A, C); 642 testComparison!(FPComparison.ogt)(A, B); 643 testComparison!(FPComparison.ogt)(A, C); 644 testComparison!(FPComparison.oge)(A, B); 645 testComparison!(FPComparison.oge)(A, C); 646 testComparison!(FPComparison.olt)(A, B); 647 testComparison!(FPComparison.olt)(A, C); 648 testComparison!(FPComparison.ole)(A, B); 649 testComparison!(FPComparison.ole)(A, C); 650 testComparison!(FPComparison.one)(A, B); 651 testComparison!(FPComparison.one)(A, C); 652 testComparison!(FPComparison.ord)(A, B); 653 testComparison!(FPComparison.ord)(A, C); 654 testComparison!(FPComparison.ueq)(A, B); 655 testComparison!(FPComparison.ueq)(A, C); 656 testComparison!(FPComparison.ugt)(A, B); 657 testComparison!(FPComparison.ugt)(A, C); 658 testComparison!(FPComparison.uge)(A, B); 659 testComparison!(FPComparison.uge)(A, C); 660 testComparison!(FPComparison.ult)(A, B); 661 testComparison!(FPComparison.ult)(A, C); 662 testComparison!(FPComparison.ule)(A, B); 663 testComparison!(FPComparison.ule)(A, C); 664 testComparison!(FPComparison.une)(A, B); 665 testComparison!(FPComparison.une)(A, C); 666 testComparison!(FPComparison.uno)(A, B); 667 testComparison!(FPComparison.uno)(A, C); 668 } 669 670 // 671 // </FLOATING-POINT COMPARISONS> 672 // 673 674 675 __m64 to_m64(__m128i a) pure @safe 676 { 677 long2 la = cast(long2)a; 678 long1 r; 679 r[0] = la[0]; 680 return r; 681 } 682 683 __m128i to_m128i(__m64 a) pure @safe 684 { 685 long2 r = [0, 0]; 686 r[0] = a[0]; 687 return cast(__m128i)r; 688 }