1 /** 2 * Copyright: Copyright Auburn Sounds 2019. 3 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 4 * Authors: Guillaume Piolat 5 */ 6 module inteli.mmx; 7 8 public import inteli.types; 9 import inteli.internals; 10 11 nothrow @nogc: 12 13 // Important: you don't need to call _mm_empty when using "MMX" capabilities of intel-intrinsics, 14 // since it just generates the right IR and cleaning-up FPU registers is up to the codegen. 15 // intel-intrinsics is just semantics. 16 17 18 /// Add packed 16-bit integers in `a` and `b`. 19 __m64 _mm_add_pi16 (__m64 a, __m64 b) 20 { 21 return cast(__m64)(cast(short4)a + cast(short4)b); 22 } 23 unittest 24 { 25 short4 R = cast(short4) _mm_add_pi16(_mm_set1_pi16(4), _mm_set1_pi16(3)); 26 short[4] correct = [7, 7, 7, 7]; 27 assert(R.array == correct); 28 } 29 30 /// Add packed 32-bit integers in `a` and `b`. 31 __m64 _mm_add_pi32 (__m64 a, __m64 b) 32 { 33 return cast(__m64)(cast(int2)a + cast(int2)b); 34 } 35 unittest 36 { 37 int2 R = cast(int2) _mm_add_pi32(_mm_set1_pi32(4), _mm_set1_pi32(3)); 38 int[2] correct = [7, 7]; 39 assert(R.array == correct); 40 } 41 42 /// Add packed 8-bit integers in `a` and `b`. 43 __m64 _mm_add_pi8 (__m64 a, __m64 b) 44 { 45 return cast(__m64)(cast(byte8)a + cast(byte8)b); 46 } 47 unittest 48 { 49 byte8 R = cast(byte8) _mm_add_pi8(_mm_set1_pi8(127), _mm_set1_pi8(-128)); 50 byte[8] correct = [-1, -1, -1, -1, -1, -1, -1, -1]; 51 assert(R.array == correct); 52 } 53 54 /// Add packed 16-bit integers in `a` and `b` using signed saturation. 55 // PERF: PADDSW not generated 56 __m64 _mm_adds_pi16(__m64 a, __m64 b) pure @trusted 57 { 58 short[4] res; 59 short4 sa = cast(short4)a; 60 short4 sb = cast(short4)b; 61 foreach(i; 0..4) 62 res[i] = saturateSignedIntToSignedShort(sa.array[i] + sb.array[i]); 63 return *cast(__m64*)(res.ptr); 64 } 65 unittest 66 { 67 short4 res = cast(short4) _mm_adds_pi16(_mm_set_pi16(3, 2, 1, 0), 68 _mm_set_pi16(3, 2, 1, 0)); 69 static immutable short[4] correctResult = [0, 2, 4, 6]; 70 assert(res.array == correctResult); 71 } 72 73 /// Add packed 8-bit integers in `a` and `b` using signed saturation. 74 // PERF: PADDSB not generated 75 __m64 _mm_adds_pi8(__m64 a, __m64 b) pure @trusted 76 { 77 byte[8] res; 78 byte8 sa = cast(byte8)a; 79 byte8 sb = cast(byte8)b; 80 foreach(i; 0..8) 81 res[i] = saturateSignedWordToSignedByte(sa.array[i] + sb.array[i]); 82 return *cast(__m64*)(res.ptr); 83 } 84 unittest 85 { 86 byte8 res = cast(byte8) _mm_adds_pi8(_mm_set_pi8(7, 6, 5, 4, 3, 2, 1, 0), 87 _mm_set_pi8(7, 6, 5, 4, 3, 2, 1, 0)); 88 static immutable byte[8] correctResult = [0, 2, 4, 6, 8, 10, 12, 14]; 89 assert(res.array == correctResult); 90 } 91 92 /// Add packed 16-bit integers in `a` and `b` using unsigned saturation. 93 // PERF: PADDUSW not generated 94 __m64 _mm_adds_pu16(__m64 a, __m64 b) pure @trusted 95 { 96 ushort[4] res; 97 short4 sa = cast(short4)a; 98 short4 sb = cast(short4)b; 99 foreach(i; 0..4) 100 res[i] = saturateSignedIntToUnsignedShort(cast(ushort)(sa.array[i]) + cast(ushort)(sb.array[i])); 101 return *cast(__m64*)(res.ptr); 102 } 103 unittest 104 { 105 short4 res = cast(short4) _mm_adds_pu16(_mm_set_pi16(3, 2, cast(short)65535, 0), 106 _mm_set_pi16(3, 2, 1, 0)); 107 static immutable short[4] correctResult = [0, cast(short)65535, 4, 6]; 108 assert(res.array == correctResult); 109 } 110 111 /// Add packed 8-bit integers in `a` and `b` using unsigned saturation. 112 // PERF: PADDUSB not generated 113 __m64 _mm_adds_pu8(__m64 a, __m64 b) pure @trusted 114 { 115 byte[8] res; 116 byte8 sa = cast(byte8)a; 117 byte8 sb = cast(byte8)b; 118 foreach(i; 0..8) 119 res[i] = saturateSignedWordToUnsignedByte(cast(ubyte)(sa.array[i]) + cast(ubyte)(sb.array[i])); 120 return *cast(__m64*)(res.ptr); 121 } 122 unittest 123 { 124 byte8 res = cast(byte8) _mm_adds_pu8(_mm_set_pi8(7, 6, 5, 4, 3, 2, cast(byte)255, 0), 125 _mm_set_pi8(7, 6, 5, 4, 3, 2, 1, 0)); 126 static immutable byte[8] correctResult = [0, cast(byte)255, 4, 6, 8, 10, 12, 14]; 127 assert(res.array == correctResult); 128 } 129 130 /// Compute the bitwise AND of 64 bits (representing integer data) in `a` and `b`. 131 __m64 _mm_and_si64 (__m64 a, __m64 b) pure @safe 132 { 133 return a & b; 134 } 135 unittest 136 { 137 __m64 A = [7]; 138 __m64 B = [14]; 139 __m64 R = _mm_and_si64(A, B); 140 assert(R[0] == 6); 141 } 142 143 /// Compute the bitwise NOT of 64 bits (representing integer data) in `a` and then AND with `b`. 144 __m64 _mm_andnot_si64 (__m64 a, __m64 b) 145 { 146 return (~a) & b; 147 } 148 unittest 149 { 150 __m64 A = [7]; 151 __m64 B = [14]; 152 __m64 R = _mm_andnot_si64(A, B); 153 assert(R[0] == 8); 154 } 155 156 157 __m64 _mm_cmpeq_pi16 (__m64 a, __m64 b) pure @safe 158 { 159 return cast(__m64) equalMask!short4(cast(short4)a, cast(short4)b); 160 } 161 unittest 162 { 163 short4 A = [-3, -2, -1, 0]; 164 short4 B = [ 4, 3, 2, 1]; 165 short[4] E = [ 0, 0, 0, 0]; 166 short4 R = cast(short4)(_mm_cmpeq_pi16(cast(__m64)A, cast(__m64)B)); 167 assert(R.array == E); 168 } 169 170 __m64 _mm_cmpeq_pi32 (__m64 a, __m64 b) pure @safe 171 { 172 return cast(__m64) equalMask!int2(cast(int2)a, cast(int2)b); 173 } 174 unittest 175 { 176 int2 A = [-3, -2]; 177 int2 B = [ 4, -2]; 178 int[2] E = [ 0, -1]; 179 int2 R = cast(int2)(_mm_cmpeq_pi32(cast(__m64)A, cast(__m64)B)); 180 assert(R.array == E); 181 } 182 183 __m64 _mm_cmpeq_pi8 (__m64 a, __m64 b) pure @safe 184 { 185 return cast(__m64) equalMask!byte8(cast(byte8)a, cast(byte8)b); 186 } 187 unittest 188 { 189 __m64 A = _mm_setr_pi8(1, 2, 3, 1, 2, 1, 1, 2); 190 __m64 B = _mm_setr_pi8(2, 2, 1, 2, 3, 1, 2, 3); 191 byte8 C = cast(byte8) _mm_cmpeq_pi8(A, B); 192 byte[8] correct = [0,-1, 0, 0, 0,-1, 0, 0]; 193 assert(C.array == correct); 194 } 195 196 __m64 _mm_cmpgt_pi16 (__m64 a, __m64 b) pure @safe 197 { 198 return cast(__m64) greaterMask!short4(cast(short4)a, cast(short4)b); 199 } 200 unittest 201 { 202 short4 A = [-3, -2, -1, 0]; 203 short4 B = [ 4, 3, 2, 1]; 204 short[4] E = [ 0, 0, 0, 0]; 205 short4 R = cast(short4)(_mm_cmpgt_pi16(cast(__m64)A, cast(__m64)B)); 206 assert(R.array == E); 207 } 208 209 __m64 _mm_cmpgt_pi32 (__m64 a, __m64 b) pure @safe 210 { 211 return cast(__m64) greaterMask!int2(cast(int2)a, cast(int2)b); 212 } 213 unittest 214 { 215 int2 A = [-3, 2]; 216 int2 B = [ 4, -2]; 217 int[2] E = [ 0, -1]; 218 int2 R = cast(int2)(_mm_cmpgt_pi32(cast(__m64)A, cast(__m64)B)); 219 assert(R.array == E); 220 } 221 222 __m64 _mm_cmpgt_pi8 (__m64 a, __m64 b) pure @safe 223 { 224 return cast(__m64) greaterMask!byte8(cast(byte8)a, cast(byte8)b); 225 } 226 unittest 227 { 228 __m64 A = _mm_setr_pi8(1, 2, 3, 1, 2, 1, 1, 2); 229 __m64 B = _mm_setr_pi8(2, 2, 1, 2, 3, 1, 2, 3); 230 byte8 C = cast(byte8) _mm_cmpgt_pi8(A, B); 231 byte[8] correct = [0, 0,-1, 0, 0, 0, 0, 0]; 232 assert(C.array == correct); 233 } 234 235 /// Copy 64-bit integer `a` to `dst`. 236 long _mm_cvtm64_si64 (__m64 a) pure @safe 237 { 238 return a[0]; 239 } 240 241 /// Copy 32-bit integer `a` to the lower elements of `dst`, and zero the upper element of `dst`. 242 __m64 _mm_cvtsi32_si64 (int a) pure @safe 243 { 244 __m64 r = void; 245 r[0] = a; 246 return r; 247 } 248 unittest 249 { 250 __m64 R = _mm_cvtsi32_si64(-1); 251 assert(R[0] == -1); 252 } 253 254 /// Copy 64-bit integer `a` to `dst`. 255 __m64 _mm_cvtsi64_m64 (long a) pure @safe 256 { 257 __m64 r = void; 258 r[0] = a; 259 return r; 260 } 261 unittest 262 { 263 __m64 R = _mm_cvtsi64_m64(-1); 264 assert(R[0] == -1); 265 } 266 267 /// Copy the lower 32-bit integer in `a` to `dst`. 268 int _mm_cvtsi64_si32 (__m64 a) pure @safe 269 { 270 return cast(int)a[0]; 271 } 272 273 alias _m_empty = _mm_empty; 274 275 void _mm_empty() pure @safe 276 { 277 // do nothing, see comment on top of file 278 } 279 280 alias _m_from_int = _mm_cvtsi32_si64; 281 alias _m_from_int64 = _mm_cvtsi64_m64; 282 283 /+ 284 __m64 _mm_madd_pi16 (__m64 a, __m64 b) TODO 285 pmulhw 286 __m64 _mm_mulhi_pi16 (__m64 a, __m64 b) TODO 287 pmullw 288 __m64 _mm_mullo_pi16 (__m64 a, __m64 b) TODO 289 por 290 __m64 _mm_or_si64 (__m64 a, __m64 b) TODO 291 packsswb 292 __m64 _mm_packs_pi16 (__m64 a, __m64 b) TODO 293 packssdw 294 __m64 _mm_packs_pi32 (__m64 a, __m64 b) TODO 295 packuswb 296 __m64 _mm_packs_pu16 (__m64 a, __m64 b) TODO 297 packssdw 298 __m64 _m_packssdw (__m64 a, __m64 b) TODO 299 packsswb 300 __m64 _m_packsswb (__m64 a, __m64 b) TODO 301 packuswb 302 __m64 _m_packuswb (__m64 a, __m64 b) TODO 303 +/ 304 305 306 deprecated alias _m_paddb = _mm_add_pi8; 307 deprecated alias _m_paddd = _mm_add_pi32; 308 309 /+ 310 paddsb 311 __m64 _m_paddsb (__m64 a, __m64 b) TODO 312 paddsw 313 __m64 _m_paddsw (__m64 a, __m64 b) TODO 314 paddusb 315 __m64 _m_paddusb (__m64 a, __m64 b) TODO 316 paddusw 317 __m64 _m_paddusw (__m64 a, __m64 b) TODO 318 +/ 319 320 deprecated alias _m_paddw = _mm_add_pi16; 321 deprecated alias _m_pand = _mm_and_si64; 322 deprecated alias _m_pandn = _mm_andnot_si64; 323 324 /+ 325 pcmpeqb 326 __m64 _m_pcmpeqb (__m64 a, __m64 b) TODO 327 pcmpeqd 328 __m64 _m_pcmpeqd (__m64 a, __m64 b) TODO 329 pcmpeqw 330 __m64 _m_pcmpeqw (__m64 a, __m64 b) TODO 331 pcmpgtb 332 __m64 _m_pcmpgtb (__m64 a, __m64 b) TODO 333 pcmpgtd 334 __m64 _m_pcmpgtd (__m64 a, __m64 b) TODO 335 pcmpgtw 336 __m64 _m_pcmpgtw (__m64 a, __m64 b) TODO 337 pmaddwd 338 __m64 _m_pmaddwd (__m64 a, __m64 b) TODO 339 pmulhw 340 __m64 _m_pmulhw (__m64 a, __m64 b) TODO 341 pmullw 342 __m64 _m_pmullw (__m64 a, __m64 b) TODO 343 por 344 __m64 _m_por (__m64 a, __m64 b) TODO 345 pslld 346 __m64 _m_pslld (__m64 a, __m64 count) TODO 347 pslld 348 __m64 _m_pslldi (__m64 a, int imm8) TODO 349 psllq 350 __m64 _m_psllq (__m64 a, __m64 count) TODO 351 psllq 352 __m64 _m_psllqi (__m64 a, int imm8) TODO 353 psllw 354 __m64 _m_psllw (__m64 a, __m64 count) TODO 355 psllw 356 __m64 _m_psllwi (__m64 a, int imm8) TODO 357 psrad 358 __m64 _m_psrad (__m64 a, __m64 count) TODO 359 psrad 360 __m64 _m_psradi (__m64 a, int imm8) TODO 361 psraw 362 __m64 _m_psraw (__m64 a, __m64 count) TODO 363 psraw 364 __m64 _m_psrawi (__m64 a, int imm8) TODO 365 psrld 366 __m64 _m_psrld (__m64 a, __m64 count) TODO 367 psrld 368 __m64 _m_psrldi (__m64 a, int imm8) TODO 369 psrlq 370 __m64 _m_psrlq (__m64 a, __m64 count) TODO 371 psrlq 372 __m64 _m_psrlqi (__m64 a, int imm8) TODO 373 psrlw 374 __m64 _m_psrlw (__m64 a, __m64 count) TODO 375 psrlw 376 __m64 _m_psrlwi (__m64 a, int imm8) TODO 377 psubb 378 __m64 _m_psubb (__m64 a, __m64 b) TODO 379 psubd 380 __m64 _m_psubd (__m64 a, __m64 b) TODO 381 psubsb 382 __m64 _m_psubsb (__m64 a, __m64 b) TODO 383 psubsw 384 __m64 _m_psubsw (__m64 a, __m64 b) TODO 385 psubusb 386 __m64 _m_psubusb (__m64 a, __m64 b) TODO 387 psubusw 388 __m64 _m_psubusw (__m64 a, __m64 b) TODO 389 psubw 390 __m64 _m_psubw (__m64 a, __m64 b) TODO 391 punpckhbw 392 __m64 _m_punpckhbw (__m64 a, __m64 b) TODO 393 punpckhdq 394 __m64 _m_punpckhdq (__m64 a, __m64 b) TODO 395 punpcklbw 396 __m64 _m_punpckhwd (__m64 a, __m64 b) TODO 397 punpcklbw 398 __m64 _m_punpcklbw (__m64 a, __m64 b) TODO 399 punpckldq 400 __m64 _m_punpckldq (__m64 a, __m64 b) TODO 401 punpcklwd 402 __m64 _m_punpcklwd (__m64 a, __m64 b) TODO 403 pxor 404 __m64 _m_pxor (__m64 a, __m64 b) TODO 405 +/ 406 407 __m64 _mm_set_pi16 (short e3, short e2, short e1, short e0) pure @trusted 408 { 409 short[4] arr = [e0, e1, e2, e3]; 410 return *cast(__m64*)(arr.ptr); 411 } 412 unittest 413 { 414 short4 R = cast(short4) _mm_set_pi16(3, 2, 1, 0); 415 short[4] correct = [0, 1, 2, 3]; 416 assert(R.array == correct); 417 } 418 419 __m64 _mm_set_pi32 (int e1, int e0) pure @trusted 420 { 421 int[2] arr = [e0, e1]; 422 return *cast(__m64*)(arr.ptr); 423 } 424 unittest 425 { 426 int2 R = cast(int2) _mm_set_pi32(1, 0); 427 int[2] correct = [0, 1]; 428 assert(R.array == correct); 429 } 430 431 __m64 _mm_set_pi8 (char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) pure @trusted 432 { 433 byte[8] arr = [e0, e1, e2, e3, e4, e5, e6, e7]; 434 return *cast(__m64*)(arr.ptr); 435 } 436 unittest 437 { 438 byte8 R = cast(byte8) _mm_set_pi8(7, 6, 5, 4, 3, 2, 1, 0); 439 byte[8] correct = [0, 1, 2, 3, 4, 5, 6, 7]; 440 assert(R.array == correct); 441 } 442 443 __m64 _mm_set1_pi16 (short a) pure @trusted 444 { 445 short[4] arr = [a, a, a, a]; 446 return *cast(__m64*)(arr.ptr); 447 } 448 unittest 449 { 450 short4 R = cast(short4) _mm_set1_pi16(44); 451 short[4] correct = [44, 44, 44, 44]; 452 assert(R.array == correct); 453 } 454 455 __m64 _mm_set1_pi32 (int a) pure @trusted 456 { 457 int[2] arr = [a, a]; 458 return *cast(__m64*)(arr.ptr); 459 } 460 unittest 461 { 462 int2 R = cast(int2) _mm_set1_pi32(43); 463 int[2] correct = [43, 43]; 464 assert(R.array == correct); 465 } 466 467 __m64 _mm_set1_pi8 (byte a) pure @trusted 468 { 469 byte[8] arr = [a, a, a, a, a, a, a, a]; 470 return *cast(__m64*)(arr.ptr); 471 } 472 unittest 473 { 474 byte8 R = cast(byte8) _mm_set1_pi8(42); 475 byte[8] correct = [42, 42, 42, 42, 42, 42, 42, 42]; 476 assert(R.array == correct); 477 } 478 479 __m64 _mm_setr_pi16 (short e3, short e2, short e1, short e0) pure @trusted 480 { 481 short[4] arr = [e3, e2, e1, e0]; 482 return *cast(__m64*)(arr.ptr); 483 } 484 unittest 485 { 486 short4 R = cast(short4) _mm_setr_pi16(0, 1, 2, 3); 487 short[4] correct = [0, 1, 2, 3]; 488 assert(R.array == correct); 489 } 490 491 __m64 _mm_setr_pi32 (int e1, int e0) pure @trusted 492 { 493 int[2] arr = [e1, e0]; 494 return *cast(__m64*)(arr.ptr); 495 } 496 unittest 497 { 498 int2 R = cast(int2) _mm_setr_pi32(0, 1); 499 int[2] correct = [0, 1]; 500 assert(R.array == correct); 501 } 502 503 __m64 _mm_setr_pi8 (char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) pure @trusted 504 { 505 byte[8] arr = [e7, e6, e5, e4, e3, e2, e1, e0]; 506 return *cast(__m64*)(arr.ptr); 507 } 508 unittest 509 { 510 byte8 R = cast(byte8) _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); 511 byte[8] correct = [0, 1, 2, 3, 4, 5, 6, 7]; 512 assert(R.array == correct); 513 } 514 515 __m64 _mm_setzero_si64 () pure @trusted 516 { 517 __m64 r; 518 r[0] = 0; 519 return r; 520 } 521 unittest 522 { 523 __m64 R = _mm_setzero_si64(); 524 assert(R[0] == 0); 525 } 526 527 528 /+ 529 __m64 _mm_sll_pi16 (__m64 a, __m64 count) TODO 530 pslld 531 __m64 _mm_sll_pi32 (__m64 a, __m64 count) TODO 532 psllq 533 __m64 _mm_sll_si64 (__m64 a, __m64 count) TODO 534 psllw 535 __m64 _mm_slli_pi16 (__m64 a, int imm8) TODO 536 pslld 537 __m64 _mm_slli_pi32 (__m64 a, int imm8) TODO 538 psllq 539 __m64 _mm_slli_si64 (__m64 a, int imm8) TODO 540 psraw 541 __m64 _mm_sra_pi16 (__m64 a, __m64 count) TODO 542 psrad 543 __m64 _mm_sra_pi32 (__m64 a, __m64 count) TODO 544 psraw 545 __m64 _mm_srai_pi16 (__m64 a, int imm8) TODO 546 psrad 547 __m64 _mm_srai_pi32 (__m64 a, int imm8) TODO 548 psrlw 549 __m64 _mm_srl_pi16 (__m64 a, __m64 count) TODO 550 psrld 551 __m64 _mm_srl_pi32 (__m64 a, __m64 count) TODO 552 psrlq 553 __m64 _mm_srl_si64 (__m64 a, __m64 count) TODO 554 psrlw 555 __m64 _mm_srli_pi16 (__m64 a, int imm8) TODO 556 psrld 557 __m64 _mm_srli_pi32 (__m64 a, int imm8) TODO 558 psrlq 559 __m64 _mm_srli_si64 (__m64 a, int imm8) TODO 560 psubw 561 __m64 _mm_sub_pi16 (__m64 a, __m64 b) TODO 562 psubd 563 __m64 _mm_sub_pi32 (__m64 a, __m64 b) TODO 564 psubb 565 __m64 _mm_sub_pi8 (__m64 a, __m64 b) TODO 566 psubsw 567 __m64 _mm_subs_pi16 (__m64 a, __m64 b) TODO 568 psubsb 569 __m64 _mm_subs_pi8 (__m64 a, __m64 b) TODO 570 psubusw 571 __m64 _mm_subs_pu16 (__m64 a, __m64 b) TODO 572 psubusb 573 __m64 _mm_subs_pu8 (__m64 a, __m64 b) TODO 574 +/ 575 576 deprecated alias _m_to_int = _mm_cvtsi64_si32; 577 deprecated alias _m_to_int64 = _mm_cvtm64_si64; 578 579 /+ 580 punpcklbw 581 __m64 _mm_unpackhi_pi16 (__m64 a, __m64 b) TODO 582 punpckhdq 583 __m64 _mm_unpackhi_pi32 (__m64 a, __m64 b) TODO 584 punpckhbw 585 __m64 _mm_unpackhi_pi8 (__m64 a, __m64 b) TODO 586 punpcklwd 587 __m64 _mm_unpacklo_pi16 (__m64 a, __m64 b) TODO 588 punpckldq 589 __m64 _mm_unpacklo_pi32 (__m64 a, __m64 b) TODO 590 punpcklbw 591 __m64 _mm_unpacklo_pi8 (__m64 a, __m64 b) TODO 592 pxor 593 __m64 _mm_xor_si64 (__m64 a, __m64 b) TODO 594 595 +/ 596 597 598 /+ 599 #define _m_packsswb _mm_packs_pi16 600 #define _m_packssdw _mm_packs_pi32 601 #define _m_packuswb _mm_packs_pu16 602 #define _m_punpckhbw _mm_unpackhi_pi8 603 #define _m_punpckhwd _mm_unpackhi_pi16 604 #define _m_punpckhdq _mm_unpackhi_pi32 605 #define _m_punpcklbw _mm_unpacklo_pi8 606 #define _m_punpcklwd _mm_unpacklo_pi16 607 #define _m_punpckldq _mm_unpacklo_pi32 608 609 #define _m_paddsb _mm_adds_pi8 610 #define _m_paddsw _mm_adds_pi16 611 #define _m_paddusb _mm_adds_pu8 612 #define _m_paddusw _mm_adds_pu16 613 #define _m_psubb _mm_sub_pi8 614 #define _m_psubw _mm_sub_pi16 615 #define _m_psubd _mm_sub_pi32 616 #define _m_psubsb _mm_subs_pi8 617 #define _m_psubsw _mm_subs_pi16 618 #define _m_psubusb _mm_subs_pu8 619 #define _m_psubusw _mm_subs_pu16 620 #define _m_pmaddwd _mm_madd_pi16 621 #define _m_pmulhw _mm_mulhi_pi16 622 #define _m_pmullw _mm_mullo_pi16 623 #define _m_psllw _mm_sll_pi16 624 #define _m_psllwi _mm_slli_pi16 625 #define _m_pslld _mm_sll_pi32 626 #define _m_pslldi _mm_slli_pi32 627 #define _m_psllq _mm_sll_si64 628 #define _m_psllqi _mm_slli_si64 629 #define _m_psraw _mm_sra_pi16 630 #define _m_psrawi _mm_srai_pi16 631 #define _m_psrad _mm_sra_pi32 632 #define _m_psradi _mm_srai_pi32 633 #define _m_psrlw _mm_srl_pi16 634 #define _m_psrlwi _mm_srli_pi16 635 #define _m_psrld _mm_srl_pi32 636 #define _m_psrldi _mm_srli_pi32 637 #define _m_psrlq _mm_srl_si64 638 #define _m_psrlqi _mm_srli_si64 639 #define _m_por _mm_or_si64 640 #define _m_pxor _mm_xor_si64 641 #define _m_pcmpeqb _mm_cmpeq_pi8 642 #define _m_pcmpeqw _mm_cmpeq_pi16 643 #define _m_pcmpeqd _mm_cmpeq_pi32 644 #define _m_pcmpgtb _mm_cmpgt_pi8 645 #define _m_pcmpgtw _mm_cmpgt_pi16 646 #define _m_pcmpgtd _mm_cmpgt_pi32 647 +/