This repository has been archived by the owner on May 31, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathintrin.h
1135 lines (1112 loc) · 66.1 KB
/
intrin.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/***
* intrin.h - declarations/definitions for platform specific intrinsic functions.
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*Purpose:
* This include file contains the declarations for platform specific intrinsic
* functions, or will include other files that have declaration of intrinsic
* functions. Also there will be some platform specific macros to be used with
* intrinsic functions.
*
****/
#pragma once
#define __INTRIN_H_
#ifndef RC_INVOKED
#ifndef __midl
#include <vcruntime.h>
#include <setjmp.h>
#ifndef _M_CEE_PURE
#if defined (_M_IX86) || defined (_M_X64)
#include <immintrin.h>
#include <ammintrin.h>
#endif
#if defined (_M_IX86)
#include <mm3dnow.h>
#endif
#if defined (_M_ARM)
#include <armintr.h>
#include <arm_neon.h>
#endif
#if defined (_M_ARM64)
#include <arm64intr.h>
#include <arm64_neon.h>
#endif
#endif
#if defined (__cplusplus)
extern "C" {
#endif /* defined (__cplusplus) */
/*
** __MACHINE : everything
** __MACHINEX86 : x86 only
** __MACHINEX64 : x64 only
** __MACHINEX86_X64 : x86 and x64 only
** __MACHINEARM : ARM only
** __MACHINEARM64 : ARM64 only
** __MACHINEARM_ARM64 : ARM and ARM64 only
** __MACHINEARM_ARM64_X64 : ARM and 64-bit Arch only
** __MACHINEARM64_X64 : ARM64 and x64 only
** __MACHINEWVMPURE : /clr:pure only
** __MACHINEZ : nothing
*/
#define __MACHINEX86 __MACHINE
#define __MACHINEX64 __MACHINE
#define __MACHINEX86_X64 __MACHINE
#define __MACHINEARM __MACHINE
#define __MACHINEARM64 __MACHINE
#define __MACHINEARM_ARM64 __MACHINE
#define __MACHINEARM_ARM64_X64 __MACHINE
#define __MACHINEARM64_X64 __MACHINE
/* Most intrinsics not available to pure managed code */
#if defined (_M_CEE_PURE)
#define __MACHINE(X) __MACHINEZ(X)
#define __MACHINEWVMPURE(X) X;
#else /* defined (_M_CEE_PURE) */
#define __MACHINE(X) X;
#define __MACHINEWVMPURE(X) __MACHINEZ(X)
#endif /* defined (_M_CEE_PURE) */
#define __MACHINEZ(X) /* NOTHING */
#if !defined (_M_IX86)
#undef __MACHINEX86
#define __MACHINEX86 __MACHINEZ
#endif /* !defined (_M_IX86) */
#if !defined (_M_X64)
#undef __MACHINEX64
#define __MACHINEX64 __MACHINEZ
#endif /* !defined (_M_X64) */
#if !(defined (_M_IX86) || defined (_M_X64))
#undef __MACHINEX86_X64
#define __MACHINEX86_X64 __MACHINEZ
#endif /* !(defined (_M_IX86) || defined (_M_X64)) */
#if !defined (_M_ARM)
#undef __MACHINEARM
#define __MACHINEARM __MACHINEZ
#endif /* !defined (_M_ARM) */
/* For compatibility with <winnt.h>, some intrinsics are __cdecl except on x64 */
#if defined (_M_X64)
#define __MACHINECALL_CDECL_OR_DEFAULT
#else
#define __MACHINECALL_CDECL_OR_DEFAULT __cdecl
#endif
#if !defined(_M_ARM64)
#undef __MACHINEARM64
#define __MACHINEARM64 __MACHINEZ
#endif
#if !(defined(_M_ARM) || defined(_M_ARM64))
#undef __MACHINEARM_ARM64
#define __MACHINEARM_ARM64 __MACHINEZ
#endif
#if !(defined(_M_ARM) || defined(_M_X64) || defined(_M_ARM64))
#undef __MACHINEARM_ARM64_X64
#define __MACHINEARM_ARM64_X64 __MACHINEZ
#endif
#if !(defined(_M_X64) || defined(_M_ARM64))
#undef __MACHINEARM64_X64
#define __MACHINEARM64_X64 __MACHINEZ
#endif
__MACHINEARM(int _AddSatInt(int, int))
__MACHINE(void * _AddressOfReturnAddress(void))
__MACHINE(unsigned char _BitScanForward(unsigned long * _Index, unsigned long _Mask))
__MACHINEX64(unsigned char _BitScanForward64(unsigned long * _Index, unsigned __int64 _Mask))
__MACHINEARM64(unsigned char _BitScanForward64(unsigned long * _Index, unsigned __int64 _Mask))
__MACHINE(unsigned char _BitScanReverse(unsigned long * _Index, unsigned long _Mask))
__MACHINEX64(unsigned char _BitScanReverse64(unsigned long * _Index, unsigned __int64 _Mask))
__MACHINEARM64(unsigned char _BitScanReverse64(unsigned long * _Index, unsigned __int64 _Mask))
__MACHINEARM_ARM64(double _CopyDoubleFromInt64(__int64))
__MACHINEARM_ARM64(float _CopyFloatFromInt32(__int32))
__MACHINEARM_ARM64(__int32 _CopyInt32FromFloat(float))
__MACHINEARM_ARM64(__int64 _CopyInt64FromDouble(double))
__MACHINEARM_ARM64(unsigned int _CountLeadingOnes(unsigned long))
__MACHINEARM_ARM64(unsigned int _CountLeadingOnes64(unsigned __int64))
__MACHINEARM_ARM64(unsigned int _CountLeadingSigns(long))
__MACHINEARM_ARM64(unsigned int _CountLeadingSigns64(__int64))
__MACHINEARM_ARM64(unsigned int _CountLeadingZeros(unsigned long))
__MACHINEARM_ARM64(unsigned int _CountLeadingZeros64(unsigned __int64))
__MACHINEARM(unsigned int _CountOneBits(unsigned long))
__MACHINEARM(unsigned int _CountOneBits64(unsigned __int64))
__MACHINEARM(int _DAddSatInt(int, int))
__MACHINEARM(int _DSubSatInt(int, int))
__MACHINEARM_ARM64(long _InterlockedAdd(long volatile * _Addend, long _Value))
__MACHINEARM_ARM64(__int64 _InterlockedAdd64(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedAdd64_acq(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedAdd64_nf(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedAdd64_rel(__int64 volatile * _Addend, __int64 _Value))
__MACHINEX86(long _InterlockedAddLargeStatistic(__int64 volatile * _Addend, long _Value))
__MACHINEARM_ARM64(long _InterlockedAdd_acq(long volatile * _Addend, long _Value))
__MACHINEARM_ARM64(long _InterlockedAdd_nf(long volatile * _Addend, long _Value))
__MACHINEARM_ARM64(long _InterlockedAdd_rel(long volatile * _Addend, long _Value))
__MACHINE(long _InterlockedAnd(long volatile * _Value, long _Mask))
__MACHINE(short _InterlockedAnd16(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedAnd16_acq(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedAnd16_nf(short volatile * _Value, short _Mask))
__MACHINEX64(short _InterlockedAnd16_np(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedAnd16_rel(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64_X64(__int64 _InterlockedAnd64(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedAnd64_acq(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedAnd64_nf(__int64 volatile * _Value, __int64 _Mask))
__MACHINEX64(__int64 _InterlockedAnd64_np(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedAnd64_rel(__int64 volatile * _Value, __int64 _Mask))
__MACHINE(char _InterlockedAnd8(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedAnd8_acq(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedAnd8_nf(char volatile * _Value, char _Mask))
__MACHINEX64(char _InterlockedAnd8_np(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedAnd8_rel(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(long _InterlockedAnd_acq(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedAnd_nf(long volatile * _Value, long _Mask))
__MACHINEX64(long _InterlockedAnd_np(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedAnd_rel(long volatile * _Value, long _Mask))
__MACHINE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedCompareExchange(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINEWVMPURE(long _InterlockedCompareExchange(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINEARM64_X64(unsigned char _InterlockedCompareExchange128(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult))
__MACHINEARM64(unsigned char _InterlockedCompareExchange128_acq(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult))
__MACHINEARM64(unsigned char _InterlockedCompareExchange128_nf(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult))
__MACHINEX64(unsigned char _InterlockedCompareExchange128_np(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult))
__MACHINEARM64(unsigned char _InterlockedCompareExchange128_rel(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult))
__MACHINE(short _InterlockedCompareExchange16(short volatile * _Destination, short _Exchange, short _Comparand))
__MACHINEARM_ARM64(short _InterlockedCompareExchange16_acq(short volatile * _Destination, short _Exchange, short _Comparand))
__MACHINEARM_ARM64(short _InterlockedCompareExchange16_nf(short volatile * _Destination, short _Exchange, short _Comparand))
__MACHINEX64(short _InterlockedCompareExchange16_np(short volatile * _Destination, short _Exchange, short _Comparand))
__MACHINEARM_ARM64(short _InterlockedCompareExchange16_rel(short volatile * _Destination, short _Exchange, short _Comparand))
__MACHINE(__int64 _InterlockedCompareExchange64(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand))
__MACHINEARM_ARM64(__int64 _InterlockedCompareExchange64_acq(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand))
__MACHINEARM_ARM64(__int64 _InterlockedCompareExchange64_nf(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand))
__MACHINEX64(__int64 _InterlockedCompareExchange64_np(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand))
__MACHINEARM_ARM64(__int64 _InterlockedCompareExchange64_rel(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand))
__MACHINE(char _InterlockedCompareExchange8(char volatile * _Destination, char _Exchange, char _Comparand))
__MACHINEARM_ARM64(char _InterlockedCompareExchange8_acq(char volatile * _Destination, char _Exchange, char _Comparand))
__MACHINEARM_ARM64(char _InterlockedCompareExchange8_nf(char volatile * _Destination, char _Exchange, char _Comparand))
__MACHINEARM_ARM64(char _InterlockedCompareExchange8_rel(char volatile * _Destination, char _Exchange, char _Comparand))
__MACHINE(void * _InterlockedCompareExchangePointer(void * volatile * _Destination, void * _Exchange, void * _Comparand))
__MACHINEARM_ARM64(void * _InterlockedCompareExchangePointer_acq(void * volatile * _Destination, void * _Exchange, void * _Comparand))
__MACHINEARM_ARM64(void * _InterlockedCompareExchangePointer_nf(void * volatile * _Destination, void * _Exchange, void * _Comparand))
__MACHINEX64(void * _InterlockedCompareExchangePointer_np(void * volatile * _Destination, void * _Exchange, void * _Comparand))
__MACHINEARM_ARM64(void * _InterlockedCompareExchangePointer_rel(void * volatile * _Destination, void * _Exchange, void * _Comparand))
__MACHINEARM_ARM64(long _InterlockedCompareExchange_acq(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINEARM_ARM64(long _InterlockedCompareExchange_nf(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINEX64(long _InterlockedCompareExchange_np(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINEARM_ARM64(long _InterlockedCompareExchange_rel(long volatile * _Destination, long _Exchange, long _Comparand))
__MACHINE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedDecrement(long volatile * _Addend))
__MACHINEWVMPURE(long _InterlockedDecrement(long volatile * _Addend))
__MACHINE(short _InterlockedDecrement16(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedDecrement16_acq(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedDecrement16_nf(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedDecrement16_rel(short volatile * _Addend))
__MACHINEARM_ARM64_X64(__int64 _InterlockedDecrement64(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedDecrement64_acq(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedDecrement64_nf(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedDecrement64_rel(__int64 volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedDecrement_acq(long volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedDecrement_nf(long volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedDecrement_rel(long volatile * _Addend))
__MACHINE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile * _Target, long _Value))
__MACHINEWVMPURE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile * _Target, long _Value))
__MACHINE(short _InterlockedExchange16(short volatile * _Target, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchange16_acq(short volatile * _Target, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchange16_nf(short volatile * _Target, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchange16_rel(short volatile * _Target, short _Value))
__MACHINEARM_ARM64_X64(__int64 _InterlockedExchange64(__int64 volatile * _Target, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchange64_acq(__int64 volatile * _Target, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchange64_nf(__int64 volatile * _Target, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchange64_rel(__int64 volatile * _Target, __int64 _Value))
__MACHINE(char _InterlockedExchange8(char volatile * _Target, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchange8_acq(char volatile * _Target, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchange8_nf(char volatile * _Target, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchange8_rel(char volatile * _Target, char _Value))
__MACHINE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchangeAdd(long volatile * _Addend, long _Value))
__MACHINE(short _InterlockedExchangeAdd16(short volatile * _Addend, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchangeAdd16_acq(short volatile * _Addend, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchangeAdd16_nf(short volatile * _Addend, short _Value))
__MACHINEARM_ARM64(short _InterlockedExchangeAdd16_rel(short volatile * _Addend, short _Value))
__MACHINEARM_ARM64_X64(__int64 _InterlockedExchangeAdd64(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchangeAdd64_acq(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchangeAdd64_nf(__int64 volatile * _Addend, __int64 _Value))
__MACHINEARM_ARM64(__int64 _InterlockedExchangeAdd64_rel(__int64 volatile * _Addend, __int64 _Value))
__MACHINE(char _InterlockedExchangeAdd8(char volatile * _Addend, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchangeAdd8_acq(char volatile * _Addend, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchangeAdd8_nf(char volatile * _Addend, char _Value))
__MACHINEARM_ARM64(char _InterlockedExchangeAdd8_rel(char volatile * _Addend, char _Value))
__MACHINEARM_ARM64(long _InterlockedExchangeAdd_acq(long volatile * _Addend, long _Value))
__MACHINEARM_ARM64(long _InterlockedExchangeAdd_nf(long volatile * _Addend, long _Value))
__MACHINEARM_ARM64(long _InterlockedExchangeAdd_rel(long volatile * _Addend, long _Value))
__MACHINE(void * _InterlockedExchangePointer(void * volatile * _Target, void * _Value))
__MACHINEARM_ARM64(void * _InterlockedExchangePointer_acq(void * volatile * _Target, void * _Value))
__MACHINEARM_ARM64(void * _InterlockedExchangePointer_nf(void * volatile * _Target, void * _Value))
__MACHINEARM_ARM64(void * _InterlockedExchangePointer_rel(void * volatile * _Target, void * _Value))
__MACHINEARM_ARM64(long _InterlockedExchange_acq(long volatile * _Target, long _Value))
__MACHINEARM_ARM64(long _InterlockedExchange_nf(long volatile * _Target, long _Value))
__MACHINEARM_ARM64(long _InterlockedExchange_rel(long volatile * _Target, long _Value))
__MACHINE(long __MACHINECALL_CDECL_OR_DEFAULT _InterlockedIncrement(long volatile * _Addend))
__MACHINEWVMPURE(long _InterlockedIncrement(long volatile * _Addend))
__MACHINE(short _InterlockedIncrement16(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedIncrement16_acq(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedIncrement16_nf(short volatile * _Addend))
__MACHINEARM_ARM64(short _InterlockedIncrement16_rel(short volatile * _Addend))
__MACHINEARM_ARM64_X64(__int64 _InterlockedIncrement64(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedIncrement64_acq(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedIncrement64_nf(__int64 volatile * _Addend))
__MACHINEARM_ARM64(__int64 _InterlockedIncrement64_rel(__int64 volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedIncrement_acq(long volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedIncrement_nf(long volatile * _Addend))
__MACHINEARM_ARM64(long _InterlockedIncrement_rel(long volatile * _Addend))
__MACHINE(long _InterlockedOr(long volatile * _Value, long _Mask))
__MACHINE(short _InterlockedOr16(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedOr16_acq(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedOr16_nf(short volatile * _Value, short _Mask))
__MACHINEX64(short _InterlockedOr16_np(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedOr16_rel(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64_X64(__int64 _InterlockedOr64(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedOr64_acq(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedOr64_nf(__int64 volatile * _Value, __int64 _Mask))
__MACHINEX64(__int64 _InterlockedOr64_np(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedOr64_rel(__int64 volatile * _Value, __int64 _Mask))
__MACHINE(char _InterlockedOr8(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedOr8_acq(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedOr8_nf(char volatile * _Value, char _Mask))
__MACHINEX64(char _InterlockedOr8_np(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedOr8_rel(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(long _InterlockedOr_acq(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedOr_nf(long volatile * _Value, long _Mask))
__MACHINEX64(long _InterlockedOr_np(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedOr_rel(long volatile * _Value, long _Mask))
__MACHINE(long _InterlockedXor(long volatile * _Value, long _Mask))
__MACHINE(short _InterlockedXor16(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedXor16_acq(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedXor16_nf(short volatile * _Value, short _Mask))
__MACHINEX64(short _InterlockedXor16_np(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64(short _InterlockedXor16_rel(short volatile * _Value, short _Mask))
__MACHINEARM_ARM64_X64(__int64 _InterlockedXor64(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedXor64_acq(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedXor64_nf(__int64 volatile * _Value, __int64 _Mask))
__MACHINEX64(__int64 _InterlockedXor64_np(__int64 volatile * _Value, __int64 _Mask))
__MACHINEARM_ARM64(__int64 _InterlockedXor64_rel(__int64 volatile * _Value, __int64 _Mask))
__MACHINE(char _InterlockedXor8(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedXor8_acq(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedXor8_nf(char volatile * _Value, char _Mask))
__MACHINEX64(char _InterlockedXor8_np(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(char _InterlockedXor8_rel(char volatile * _Value, char _Mask))
__MACHINEARM_ARM64(long _InterlockedXor_acq(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedXor_nf(long volatile * _Value, long _Mask))
__MACHINEX64(long _InterlockedXor_np(long volatile * _Value, long _Mask))
__MACHINEARM_ARM64(long _InterlockedXor_rel(long volatile * _Value, long _Mask))
__MACHINEARM(unsigned int _MoveFromCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int))
__MACHINEARM(unsigned int _MoveFromCoprocessor2(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int))
__MACHINEARM(unsigned __int64 _MoveFromCoprocessor64(unsigned int, unsigned int, unsigned int))
__MACHINEARM(void _MoveToCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int))
__MACHINEARM(void _MoveToCoprocessor2(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int))
__MACHINEARM(void _MoveToCoprocessor64(unsigned __int64, unsigned int, unsigned int, unsigned int))
__MACHINEARM(long _MulHigh(long, long))
__MACHINEARM(unsigned long _MulUnsignedHigh(unsigned long, unsigned long))
__MACHINE(void _ReadBarrier(void))
__MACHINEARM(int _ReadStatusReg(int))
__MACHINEARM64(__int64 _ReadStatusReg(int))
__MACHINEARM64(unsigned __int64 __getReg(int))
__MACHINEARM64(unsigned __int64 __getCallerReg(int))
__MACHINEARM64(double __getRegFp(int))
__MACHINEARM64(double __getCallerRegFp(int))
__MACHINE(void _ReadWriteBarrier(void))
__MACHINE(void * _ReturnAddress(void))
__MACHINEARM(int _SubSatInt(int, int))
__MACHINE(void _WriteBarrier(void))
__MACHINEARM(void _WriteStatusReg(int, int, int))
__MACHINEARM64(void _WriteStatusReg(int, __int64))
__MACHINEARM64(void __setReg(int, unsigned __int64))
__MACHINEARM64(void __setCallerReg(int, unsigned __int64))
__MACHINEARM64(void __setRegFp(int, double))
__MACHINEARM64(void __setCallerRegFp(int, double))
__MACHINEX86(void __addfsbyte(unsigned long, unsigned char))
__MACHINEX86(void __addfsdword(unsigned long, unsigned long))
__MACHINEX86(void __addfsword(unsigned long, unsigned short))
__MACHINEX64(void __addgsbyte(unsigned long, unsigned char))
__MACHINEX64(void __addgsdword(unsigned long, unsigned long))
__MACHINEX64(void __addgsqword(unsigned long, unsigned __int64))
__MACHINEX64(void __addgsword(unsigned long, unsigned short))
__MACHINEARM64(void __addx18byte(unsigned long, unsigned char))
__MACHINEARM64(void __addx18dword(unsigned long, unsigned long))
__MACHINEARM64(void __addx18qword(unsigned long, unsigned __int64))
__MACHINEARM64(void __addx18word(unsigned long, unsigned short))
__MACHINE(void __code_seg(const char *))
__MACHINEX86_X64(void __cpuid(int[4], int))
__MACHINEX86_X64(void __cpuidex(int[4], int, int))
__MACHINE(void __cdecl __debugbreak(void))
__MACHINEARM_ARM64(void __emit(unsigned __int32))
__MACHINEX86_X64(__int64 __emul(int, int))
__MACHINEX86_X64(unsigned __int64 __emulu(unsigned int, unsigned int))
__MACHINE(__declspec(noreturn) void __fastfail(unsigned int))
__MACHINEX64(void __faststorefence(void))
__MACHINEX86_X64(unsigned int __getcallerseflags(void))
__MACHINEX86_X64(void __halt(void))
__MACHINEARM_ARM64(unsigned int __hvc(unsigned int, ...))
__MACHINEARM64(void __break(int))
__MACHINEX86_X64(unsigned char __inbyte(unsigned short))
__MACHINEX86_X64(void __inbytestring(unsigned short, unsigned char *, unsigned long))
__MACHINEX86(void __incfsbyte(unsigned long))
__MACHINEX86(void __incfsdword(unsigned long))
__MACHINEX86(void __incfsword(unsigned long))
__MACHINEX64(void __incgsbyte(unsigned long))
__MACHINEX64(void __incgsdword(unsigned long))
__MACHINEX64(void __incgsqword(unsigned long))
__MACHINEX64(void __incgsword(unsigned long))
__MACHINEARM64(void __incx18byte(unsigned long))
__MACHINEARM64(void __incx18dword(unsigned long))
__MACHINEARM64(void __incx18qword(unsigned long))
__MACHINEARM64(void __incx18word(unsigned long))
__MACHINEX86_X64(unsigned long __indword(unsigned short))
__MACHINEX86_X64(void __indwordstring(unsigned short, unsigned long *, unsigned long))
__MACHINEX86_X64(void __int2c(void))
__MACHINEX86_X64(void __invlpg(void *))
__MACHINEX86_X64(unsigned short __inword(unsigned short))
__MACHINEX86_X64(void __inwordstring(unsigned short, unsigned short *, unsigned long))
__MACHINEARM_ARM64(__int16 __iso_volatile_load16(const volatile __int16 *))
__MACHINEARM_ARM64(__int32 __iso_volatile_load32(const volatile __int32 *))
__MACHINEARM_ARM64(__int64 __iso_volatile_load64(const volatile __int64 *))
__MACHINEARM_ARM64(__int8 __iso_volatile_load8(const volatile __int8 *))
__MACHINEARM_ARM64(void __iso_volatile_store16(volatile __int16 *, __int16))
__MACHINEARM_ARM64(void __iso_volatile_store32(volatile __int32 *, __int32))
__MACHINEARM_ARM64(void __iso_volatile_store64(volatile __int64 *, __int64))
__MACHINEARM_ARM64(void __iso_volatile_store8(volatile __int8 *, __int8))
__MACHINEARM(__int64 __ldrexd(const volatile __int64 *))
__MACHINEX86_X64(void __lidt(void *))
__MACHINEX86_X64(unsigned __int64 __ll_lshift(unsigned __int64, int))
__MACHINEX86_X64(__int64 __ll_rshift(__int64, int))
__MACHINEX86_X64(unsigned int __lzcnt(unsigned int))
__MACHINEX86_X64(unsigned short __lzcnt16(unsigned short))
__MACHINEX64(unsigned __int64 __lzcnt64(unsigned __int64))
__MACHINEX86_X64(void __movsb(unsigned char *, unsigned char const *, size_t))
__MACHINEX86_X64(void __movsd(unsigned long *, unsigned long const *, size_t))
__MACHINEX64(void __movsq(unsigned long long *, unsigned long long const *, size_t))
__MACHINEX86_X64(void __movsw(unsigned short *, unsigned short const *, size_t))
__MACHINEARM64_X64(__int64 __mulh(__int64, __int64))
__MACHINE(void __nop(void))
__MACHINEX86_X64(void __nvreg_restore_fence(void))
__MACHINEX86_X64(void __nvreg_save_fence(void))
__MACHINEX86_X64(void __outbyte(unsigned short, unsigned char))
__MACHINEX86_X64(void __outbytestring(unsigned short, unsigned char *, unsigned long))
__MACHINEX86_X64(void __outdword(unsigned short, unsigned long))
__MACHINEX86_X64(void __outdwordstring(unsigned short, unsigned long *, unsigned long))
__MACHINEX86_X64(void __outword(unsigned short, unsigned short))
__MACHINEX86_X64(void __outwordstring(unsigned short, unsigned short *, unsigned long))
__MACHINEX86_X64(unsigned int __popcnt(unsigned int))
__MACHINEX86_X64(unsigned short __popcnt16(unsigned short))
__MACHINEX64(unsigned __int64 __popcnt64(unsigned __int64))
__MACHINEARM_ARM64(void __cdecl __prefetch(const void *))
__MACHINEARM(void __cdecl __prefetchw(const void *))
__MACHINEARM(unsigned __int64 __rdpmccntr64(void))
__MACHINEX86_X64(unsigned __int64 __rdtsc(void))
__MACHINEX86_X64(unsigned __int64 __rdtscp(unsigned int *))
__MACHINEX64(unsigned __int64 __readcr0(void))
__MACHINEX86(unsigned long __readcr0(void))
__MACHINEX64(unsigned __int64 __readcr2(void))
__MACHINEX86(unsigned long __readcr2(void))
__MACHINEX64(unsigned __int64 __readcr3(void))
__MACHINEX86(unsigned long __readcr3(void))
__MACHINEX64(unsigned __int64 __readcr4(void))
__MACHINEX86(unsigned long __readcr4(void))
__MACHINEX64(unsigned __int64 __readcr8(void))
__MACHINEX86(unsigned long __readcr8(void))
__MACHINEX64(unsigned __int64 __readdr(unsigned int))
__MACHINEX86(unsigned int __readdr(unsigned int))
__MACHINEX64(unsigned __int64 __readeflags(void))
__MACHINEX86(unsigned int __readeflags(void))
__MACHINEX86(unsigned char __readfsbyte(unsigned long))
__MACHINEX86(unsigned long __readfsdword(unsigned long))
__MACHINEX86(unsigned __int64 __readfsqword(unsigned long))
__MACHINEX86(unsigned short __readfsword(unsigned long))
__MACHINEX64(unsigned char __readgsbyte(unsigned long))
__MACHINEX64(unsigned long __readgsdword(unsigned long))
__MACHINEX64(unsigned __int64 __readgsqword(unsigned long))
__MACHINEX64(unsigned short __readgsword(unsigned long))
__MACHINEX86_X64(unsigned __int64 __readmsr(unsigned long))
__MACHINEX86_X64(unsigned __int64 __readpmc(unsigned long))
__MACHINEARM64(unsigned char __readx18byte(unsigned long))
__MACHINEARM64(unsigned long __readx18dword(unsigned long))
__MACHINEARM64(unsigned __int64 __readx18qword(unsigned long))
__MACHINEARM64(unsigned short __readx18word(unsigned long))
__MACHINEX86_X64(unsigned long __segmentlimit(unsigned long))
__MACHINEARM_ARM64(void __sev(void))
__MACHINEX64(unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift))
__MACHINEX64(unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift))
__MACHINEX86_X64(void __sidt(void *))
__MACHINEARM_ARM64(void __static_assert(int, const char *))
__MACHINEX86_X64(void __stosb(unsigned char *, unsigned char, size_t))
__MACHINEX86_X64(void __stosd(unsigned long *, unsigned long, size_t))
__MACHINEX64(void __stosq(unsigned __int64 *, unsigned __int64, size_t))
__MACHINEX86_X64(void __stosw(unsigned short *, unsigned short, size_t))
__MACHINEX86_X64(void __svm_clgi(void))
__MACHINEX86_X64(void __svm_invlpga(void *, int))
__MACHINEX86_X64(void __svm_skinit(int))
__MACHINEX86_X64(void __svm_stgi(void))
__MACHINEX86_X64(void __svm_vmload(size_t))
__MACHINEX86_X64(void __svm_vmrun(size_t))
__MACHINEX86_X64(void __svm_vmsave(size_t))
__MACHINEARM(unsigned int __swi(unsigned int, ...))
__MACHINEARM_ARM64(unsigned int __svc(unsigned int, ...))
__MACHINEARM64(unsigned int __hlt(unsigned int, ...))
__MACHINEARM64(unsigned int __sys(int, __int64))
__MACHINEARM(int __trap(int, ...))
__MACHINEX86_X64(void __ud2(void))
__MACHINEX86_X64(unsigned __int64 __ull_rshift(unsigned __int64, int))
__MACHINEARM64_X64(unsigned __int64 __umulh(unsigned __int64, unsigned __int64))
__MACHINEX86_X64(void __vmx_off(void))
__MACHINEX64(unsigned char __vmx_on(unsigned __int64 *))
__MACHINEX64(unsigned char __vmx_vmclear(unsigned __int64 *))
__MACHINEX64(unsigned char __vmx_vmlaunch(void))
__MACHINEX64(unsigned char __vmx_vmptrld(unsigned __int64 *))
__MACHINEX86_X64(void __vmx_vmptrst(unsigned __int64 *))
__MACHINEX64(unsigned char __vmx_vmread(size_t, size_t *))
__MACHINEX64(unsigned char __vmx_vmresume(void))
__MACHINEX64(unsigned char __vmx_vmwrite(size_t, size_t))
__MACHINEX86_X64(void __wbinvd(void))
__MACHINEARM_ARM64(void __wfe(void))
__MACHINEARM_ARM64(void __wfi(void))
__MACHINEX64(void __writecr0(unsigned __int64))
__MACHINEX86(void __writecr0(unsigned int))
__MACHINEX64(void __writecr3(unsigned __int64))
__MACHINEX86(void __writecr3(unsigned int))
__MACHINEX64(void __writecr4(unsigned __int64))
__MACHINEX86(void __writecr4(unsigned int))
__MACHINEX64(void __writecr8(unsigned __int64))
__MACHINEX86(void __writecr8(unsigned int))
__MACHINEX64(void __writedr(unsigned int, unsigned __int64))
__MACHINEX86(void __writedr(unsigned int, unsigned int))
__MACHINEX64(void __writeeflags(unsigned __int64))
__MACHINEX86(void __writeeflags(unsigned int))
__MACHINEX86(void __writefsbyte(unsigned long, unsigned char))
__MACHINEX86(void __writefsdword(unsigned long, unsigned long))
__MACHINEX86(void __writefsqword(unsigned long, unsigned __int64))
__MACHINEX86(void __writefsword(unsigned long, unsigned short))
__MACHINEX64(void __writegsbyte(unsigned long, unsigned char))
__MACHINEX64(void __writegsdword(unsigned long, unsigned long))
__MACHINEX64(void __writegsqword(unsigned long, unsigned __int64))
__MACHINEX64(void __writegsword(unsigned long, unsigned short))
__MACHINEX86_X64(void __writemsr(unsigned long, unsigned __int64))
__MACHINEARM64(void __writex18byte(unsigned long, unsigned char))
__MACHINEARM64(void __writex18dword(unsigned long, unsigned long))
__MACHINEARM64(void __writex18qword(unsigned long, unsigned __int64))
__MACHINEARM64(void __writex18word(unsigned long, unsigned short))
__MACHINEARM_ARM64(void __yield(void))
__MACHINE(unsigned char _bittest(long const *, long))
__MACHINEARM64_X64(unsigned char _bittest64(__int64 const *, __int64))
__MACHINE(unsigned char _bittestandcomplement(long *, long))
__MACHINEARM64_X64(unsigned char _bittestandcomplement64(__int64 *, __int64))
__MACHINE(unsigned char _bittestandreset(long *, long))
__MACHINEARM64_X64(unsigned char _bittestandreset64(__int64 *, __int64))
__MACHINE(unsigned char _bittestandset(long *, long))
__MACHINEARM64_X64(unsigned char _bittestandset64(__int64 *, __int64))
__MACHINE(_Check_return_ unsigned __int64 __cdecl _byteswap_uint64(_In_ unsigned __int64))
__MACHINE(_Check_return_ unsigned long __cdecl _byteswap_ulong(_In_ unsigned long))
__MACHINE(_Check_return_ unsigned short __cdecl _byteswap_ushort(_In_ unsigned short))
__MACHINE(void __cdecl _disable(void))
__MACHINE(void __cdecl _enable(void))
__MACHINE(unsigned char _interlockedbittestandreset(long volatile *, long))
__MACHINEARM64_X64(unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandreset_acq(long volatile *, long))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandreset_nf(long volatile *, long))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandreset_rel(long volatile *, long))
__MACHINEARM64(unsigned char _interlockedbittestandreset64_acq(__int64 volatile *, __int64))
__MACHINEARM64(unsigned char _interlockedbittestandreset64_rel(__int64 volatile *, __int64))
__MACHINEARM64(unsigned char _interlockedbittestandreset64_nf(__int64 volatile *, __int64))
__MACHINE(unsigned char _interlockedbittestandset(long volatile *, long))
__MACHINEARM64_X64(unsigned char _interlockedbittestandset64(__int64 volatile *, __int64))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandset_acq(long volatile *, long))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandset_nf(long volatile *, long))
__MACHINEARM_ARM64(unsigned char _interlockedbittestandset_rel(long volatile *, long))
__MACHINEARM64(unsigned char _interlockedbittestandset64_acq(__int64 volatile *, __int64))
__MACHINEARM64(unsigned char _interlockedbittestandset64_rel(__int64 volatile *, __int64))
__MACHINEARM64(unsigned char _interlockedbittestandset64_nf(__int64 volatile *, __int64))
__MACHINEARM64(unsigned __int32 __crc32b(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32h(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32w(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32d(unsigned __int32, unsigned __int64))
__MACHINEARM64(unsigned __int32 __crc32cb(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32ch(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32cw(unsigned __int32, unsigned __int32))
__MACHINEARM64(unsigned __int32 __crc32cd(unsigned __int32, unsigned __int64))
__MACHINEARM(int _isunordered(double, double))
__MACHINEARM(int _isunorderedf(float, float))
__MACHINE(_Check_return_ unsigned long __cdecl _lrotl(_In_ unsigned long, _In_ int))
__MACHINE(_Check_return_ unsigned long __cdecl _lrotr(_In_ unsigned long, _In_ int))
__MACHINEX86(void _m_empty(void))
__MACHINEX86(void _m_femms(void))
__MACHINEX86(__m64 _m_from_float(float))
__MACHINEX86(__m64 _m_from_int(int))
__MACHINEX86(void _m_maskmovq(__m64, __m64, char *))
__MACHINEX86(__m64 _m_packssdw(__m64, __m64))
__MACHINEX86(__m64 _m_packsswb(__m64, __m64))
__MACHINEX86(__m64 _m_packuswb(__m64, __m64))
__MACHINEX86(__m64 _m_paddb(__m64, __m64))
__MACHINEX86(__m64 _m_paddd(__m64, __m64))
__MACHINEX86(__m64 _m_paddsb(__m64, __m64))
__MACHINEX86(__m64 _m_paddsw(__m64, __m64))
__MACHINEX86(__m64 _m_paddusb(__m64, __m64))
__MACHINEX86(__m64 _m_paddusw(__m64, __m64))
__MACHINEX86(__m64 _m_paddw(__m64, __m64))
__MACHINEX86(__m64 _m_pand(__m64, __m64))
__MACHINEX86(__m64 _m_pandn(__m64, __m64))
__MACHINEX86(__m64 _m_pavgb(__m64, __m64))
__MACHINEX86(__m64 _m_pavgusb(__m64, __m64))
__MACHINEX86(__m64 _m_pavgw(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpeqb(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpeqd(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpeqw(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpgtb(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpgtd(__m64, __m64))
__MACHINEX86(__m64 _m_pcmpgtw(__m64, __m64))
__MACHINEX86(int _m_pextrw(__m64, int))
__MACHINEX86(__m64 _m_pf2id(__m64))
__MACHINEX86(__m64 _m_pf2iw(__m64))
__MACHINEX86(__m64 _m_pfacc(__m64, __m64))
__MACHINEX86(__m64 _m_pfadd(__m64, __m64))
__MACHINEX86(__m64 _m_pfcmpeq(__m64, __m64))
__MACHINEX86(__m64 _m_pfcmpge(__m64, __m64))
__MACHINEX86(__m64 _m_pfcmpgt(__m64, __m64))
__MACHINEX86(__m64 _m_pfmax(__m64, __m64))
__MACHINEX86(__m64 _m_pfmin(__m64, __m64))
__MACHINEX86(__m64 _m_pfmul(__m64, __m64))
__MACHINEX86(__m64 _m_pfnacc(__m64, __m64))
__MACHINEX86(__m64 _m_pfpnacc(__m64, __m64))
__MACHINEX86(__m64 _m_pfrcp(__m64))
__MACHINEX86(__m64 _m_pfrcpit1(__m64, __m64))
__MACHINEX86(__m64 _m_pfrcpit2(__m64, __m64))
__MACHINEX86(__m64 _m_pfrsqit1(__m64, __m64))
__MACHINEX86(__m64 _m_pfrsqrt(__m64))
__MACHINEX86(__m64 _m_pfsub(__m64, __m64))
__MACHINEX86(__m64 _m_pfsubr(__m64, __m64))
__MACHINEX86(__m64 _m_pi2fd(__m64))
__MACHINEX86(__m64 _m_pi2fw(__m64))
__MACHINEX86(__m64 _m_pinsrw(__m64, int, int))
__MACHINEX86(__m64 _m_pmaddwd(__m64, __m64))
__MACHINEX86(__m64 _m_pmaxsw(__m64, __m64))
__MACHINEX86(__m64 _m_pmaxub(__m64, __m64))
__MACHINEX86(__m64 _m_pminsw(__m64, __m64))
__MACHINEX86(__m64 _m_pminub(__m64, __m64))
__MACHINEX86(int _m_pmovmskb(__m64))
__MACHINEX86(__m64 _m_pmulhrw(__m64, __m64))
__MACHINEX86(__m64 _m_pmulhuw(__m64, __m64))
__MACHINEX86(__m64 _m_pmulhw(__m64, __m64))
__MACHINEX86(__m64 _m_pmullw(__m64, __m64))
__MACHINEX86(__m64 _m_por(__m64, __m64))
__MACHINEX86_X64(void _m_prefetch(void *))
__MACHINEX86_X64(void _m_prefetchw(volatile const void *))
__MACHINEX86(__m64 _m_psadbw(__m64, __m64))
__MACHINEX86(__m64 _m_pshufw(__m64, int))
__MACHINEX86(__m64 _m_pslld(__m64, __m64))
__MACHINEX86(__m64 _m_pslldi(__m64, int))
__MACHINEX86(__m64 _m_psllq(__m64, __m64))
__MACHINEX86(__m64 _m_psllqi(__m64, int))
__MACHINEX86(__m64 _m_psllw(__m64, __m64))
__MACHINEX86(__m64 _m_psllwi(__m64, int))
__MACHINEX86(__m64 _m_psrad(__m64, __m64))
__MACHINEX86(__m64 _m_psradi(__m64, int))
__MACHINEX86(__m64 _m_psraw(__m64, __m64))
__MACHINEX86(__m64 _m_psrawi(__m64, int))
__MACHINEX86(__m64 _m_psrld(__m64, __m64))
__MACHINEX86(__m64 _m_psrldi(__m64, int))
__MACHINEX86(__m64 _m_psrlq(__m64, __m64))
__MACHINEX86(__m64 _m_psrlqi(__m64, int))
__MACHINEX86(__m64 _m_psrlw(__m64, __m64))
__MACHINEX86(__m64 _m_psrlwi(__m64, int))
__MACHINEX86(__m64 _m_psubb(__m64, __m64))
__MACHINEX86(__m64 _m_psubd(__m64, __m64))
__MACHINEX86(__m64 _m_psubsb(__m64, __m64))
__MACHINEX86(__m64 _m_psubsw(__m64, __m64))
__MACHINEX86(__m64 _m_psubusb(__m64, __m64))
__MACHINEX86(__m64 _m_psubusw(__m64, __m64))
__MACHINEX86(__m64 _m_psubw(__m64, __m64))
__MACHINEX86(__m64 _m_pswapd(__m64))
__MACHINEX86(__m64 _m_punpckhbw(__m64, __m64))
__MACHINEX86(__m64 _m_punpckhdq(__m64, __m64))
__MACHINEX86(__m64 _m_punpckhwd(__m64, __m64))
__MACHINEX86(__m64 _m_punpcklbw(__m64, __m64))
__MACHINEX86(__m64 _m_punpckldq(__m64, __m64))
__MACHINEX86(__m64 _m_punpcklwd(__m64, __m64))
__MACHINEX86(__m64 _m_pxor(__m64, __m64))
__MACHINEX86(float _m_to_float(__m64))
__MACHINEX86(int _m_to_int(__m64))
__MACHINEX86_X64(__m128i _mm_abs_epi16(__m128i))
__MACHINEX86_X64(__m128i _mm_abs_epi32(__m128i))
__MACHINEX86_X64(__m128i _mm_abs_epi8(__m128i))
__MACHINEX86_X64(__m64 _mm_abs_pi16(__m64))
__MACHINEX86_X64(__m64 _mm_abs_pi32(__m64))
__MACHINEX86_X64(__m64 _mm_abs_pi8(__m64))
__MACHINEX86_X64(__m128i _mm_add_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_add_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_add_epi64(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_add_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_add_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_add_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_add_sd(__m128d, __m128d))
__MACHINEX86(__m64 _mm_add_si64(__m64, __m64))
__MACHINEX86_X64(__m128 _mm_add_ss(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_adds_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_adds_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_adds_epu16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_adds_epu8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_addsub_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_addsub_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_alignr_epi8(__m128i, __m128i, int))
__MACHINEX86_X64(__m64 _mm_alignr_pi8(__m64, __m64, int))
__MACHINEX86_X64(__m128d _mm_and_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_and_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_and_si128(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_andnot_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_andnot_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_andnot_si128(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_avg_epu16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_avg_epu8(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_blend_epi16(__m128i, __m128i, int))
__MACHINEX86_X64(__m128d _mm_blend_pd(__m128d, __m128d, int))
__MACHINEX86_X64(__m128 _mm_blend_ps(__m128, __m128, int))
__MACHINEX86_X64(__m128i _mm_blendv_epi8(__m128i, __m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_blendv_pd(__m128d, __m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_blendv_ps(__m128, __m128, __m128))
__MACHINEX86_X64(void _mm_clflush(void const *))
__MACHINEX86_X64(void _mm_clflushopt(void const *))
__MACHINEX86_X64(void _mm_clwb(void const *))
__MACHINEX86_X64(__m128i _mm_cmpeq_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpeq_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpeq_epi64(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpeq_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_cmpeq_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpeq_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpeq_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpeq_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_cmpestra(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(int _mm_cmpestrc(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(int _mm_cmpestri(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(__m128i _mm_cmpestrm(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(int _mm_cmpestro(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(int _mm_cmpestrs(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(int _mm_cmpestrz(__m128i, int, __m128i, int, int))
__MACHINEX86_X64(__m128d _mm_cmpge_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpge_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpge_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpge_ss(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_cmpgt_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpgt_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpgt_epi64(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmpgt_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_cmpgt_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpgt_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpgt_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpgt_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_cmpistra(__m128i, __m128i, int))
__MACHINEX86_X64(int _mm_cmpistrc(__m128i, __m128i, int))
__MACHINEX86_X64(int _mm_cmpistri(__m128i, __m128i, int))
__MACHINEX86_X64(__m128i _mm_cmpistrm(__m128i, __m128i, int))
__MACHINEX86_X64(int _mm_cmpistro(__m128i, __m128i, int))
__MACHINEX86_X64(int _mm_cmpistrs(__m128i, __m128i, int))
__MACHINEX86_X64(int _mm_cmpistrz(__m128i, __m128i, int))
__MACHINEX86_X64(__m128d _mm_cmple_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmple_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmple_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmple_ss(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_cmplt_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmplt_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_cmplt_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_cmplt_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmplt_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmplt_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmplt_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpneq_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpneq_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpneq_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpneq_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnge_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnge_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnge_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnge_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpngt_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpngt_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpngt_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpngt_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnle_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnle_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnle_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnle_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnlt_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnlt_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpnlt_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpnlt_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpord_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpord_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpord_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpord_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpunord_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpunord_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_cmpunord_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_cmpunord_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comieq_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comieq_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comige_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comige_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comigt_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comigt_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comile_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comile_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comilt_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comilt_ss(__m128, __m128))
__MACHINEX86_X64(int _mm_comineq_sd(__m128d, __m128d))
__MACHINEX86_X64(int _mm_comineq_ss(__m128, __m128))
__MACHINEX86_X64(unsigned int _mm_crc32_u16(unsigned int, unsigned short))
__MACHINEX86_X64(unsigned int _mm_crc32_u32(unsigned int, unsigned int))
__MACHINEX64(unsigned __int64 _mm_crc32_u64(unsigned __int64, unsigned __int64))
__MACHINEX86_X64(unsigned int _mm_crc32_u8(unsigned int, unsigned char))
__MACHINEX86(__m128 _mm_cvt_pi2ps(__m128, __m64))
__MACHINEX86(__m64 _mm_cvt_ps2pi(__m128))
__MACHINEX86_X64(__m128 _mm_cvt_si2ss(__m128, int))
__MACHINEX86_X64(int _mm_cvt_ss2si(__m128))
__MACHINEX86_X64(__m128i _mm_cvtepi16_epi32(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepi16_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepi32_epi64(__m128i))
__MACHINEX86_X64(__m128d _mm_cvtepi32_pd(__m128i))
__MACHINEX86_X64(__m128 _mm_cvtepi32_ps(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepi8_epi16(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepi8_epi32(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepi8_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu16_epi32(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu16_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu32_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu8_epi16(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu8_epi32(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtepu8_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_cvtpd_epi32(__m128d))
__MACHINEX86(__m64 _mm_cvtpd_pi32(__m128d))
__MACHINEX86_X64(__m128 _mm_cvtpd_ps(__m128d))
__MACHINEX86(__m128d _mm_cvtpi32_pd(__m64))
__MACHINEX86_X64(__m128i _mm_cvtps_epi32(__m128))
__MACHINEX86_X64(__m128d _mm_cvtps_pd(__m128))
__MACHINEX86_X64(int _mm_cvtsd_si32(__m128d))
__MACHINEX64(__int64 _mm_cvtsd_si64(__m128d))
__MACHINEX64(__int64 _mm_cvtsd_si64x(__m128d))
__MACHINEX86_X64(__m128 _mm_cvtsd_ss(__m128, __m128d))
__MACHINEX86_X64(int _mm_cvtsi128_si32(__m128i))
__MACHINEX64(__int64 _mm_cvtsi128_si64(__m128i))
__MACHINEX64(__int64 _mm_cvtsi128_si64x(__m128i))
__MACHINEX86_X64(__m128d _mm_cvtsi32_sd(__m128d, int))
__MACHINEX86_X64(__m128i _mm_cvtsi32_si128(int))
__MACHINEX64(__m128d _mm_cvtsi64_sd(__m128d, __int64))
__MACHINEX64(__m128i _mm_cvtsi64_si128(__int64))
__MACHINEX64(__m128 _mm_cvtsi64_ss(__m128, __int64))
__MACHINEX64(__m128d _mm_cvtsi64x_sd(__m128d, __int64))
__MACHINEX64(__m128i _mm_cvtsi64x_si128(__int64))
__MACHINEX64(__m128 _mm_cvtsi64x_ss(__m128, __int64))
__MACHINEX86_X64(__m128d _mm_cvtss_sd(__m128d, __m128))
__MACHINEX64(__int64 _mm_cvtss_si64(__m128))
__MACHINEX64(__int64 _mm_cvtss_si64x(__m128))
__MACHINEX86(__m64 _mm_cvtt_ps2pi(__m128))
__MACHINEX86_X64(int _mm_cvtt_ss2si(__m128))
__MACHINEX86_X64(__m128i _mm_cvttpd_epi32(__m128d))
__MACHINEX86(__m64 _mm_cvttpd_pi32(__m128d))
__MACHINEX86_X64(__m128i _mm_cvttps_epi32(__m128))
__MACHINEX86_X64(int _mm_cvttsd_si32(__m128d))
__MACHINEX64(__int64 _mm_cvttsd_si64(__m128d))
__MACHINEX64(__int64 _mm_cvttsd_si64x(__m128d))
__MACHINEX64(__int64 _mm_cvttss_si64(__m128))
__MACHINEX64(__int64 _mm_cvttss_si64x(__m128))
__MACHINEX86_X64(__m128d _mm_div_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_div_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_div_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_div_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_dp_pd(__m128d, __m128d, int))
__MACHINEX86_X64(__m128 _mm_dp_ps(__m128, __m128, int))
__MACHINEX86_X64(int _mm_extract_epi16(__m128i, int))
__MACHINEX86_X64(int _mm_extract_epi32(__m128i, int))
__MACHINEX64(__int64 _mm_extract_epi64(__m128i, int))
__MACHINEX86_X64(int _mm_extract_epi8(__m128i, int))
__MACHINEX86_X64(int _mm_extract_ps(__m128, int))
__MACHINEX86_X64(__m128i _mm_extract_si64(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_extracti_si64(__m128i, int, int))
__MACHINEX86_X64(unsigned int _mm_getcsr(void))
__MACHINEX86_X64(__m128i _mm_hadd_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_hadd_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_hadd_pd(__m128d, __m128d))
__MACHINEX86_X64(__m64 _mm_hadd_pi16(__m64, __m64))
__MACHINEX86_X64(__m64 _mm_hadd_pi32(__m64, __m64))
__MACHINEX86_X64(__m128 _mm_hadd_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_hadds_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m64 _mm_hadds_pi16(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_hsub_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_hsub_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_hsub_pd(__m128d, __m128d))
__MACHINEX86_X64(__m64 _mm_hsub_pi16(__m64, __m64))
__MACHINEX86_X64(__m64 _mm_hsub_pi32(__m64, __m64))
__MACHINEX86_X64(__m128 _mm_hsub_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_hsubs_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m64 _mm_hsubs_pi16(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_insert_epi16(__m128i, int, int))
__MACHINEX86_X64(__m128i _mm_insert_epi32(__m128i, int, int))
__MACHINEX64(__m128i _mm_insert_epi64(__m128i, __int64, int))
__MACHINEX86_X64(__m128i _mm_insert_epi8(__m128i, int, int))
__MACHINEX86_X64(__m128 _mm_insert_ps(__m128, __m128, int))
__MACHINEX86_X64(__m128i _mm_insert_si64(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_inserti_si64(__m128i, __m128i, int, int))
__MACHINEX86_X64(__m128i _mm_lddqu_si128(__m128i const *))
__MACHINEX86_X64(void _mm_lfence(void))
__MACHINEX86_X64(__m128d _mm_load1_pd(double const *))
__MACHINEX86_X64(__m128d _mm_load_pd(double const *))
__MACHINEX86_X64(__m128 _mm_load_ps(float const *))
__MACHINEX86_X64(__m128 _mm_load_ps1(float const *))
__MACHINEX86_X64(__m128d _mm_load_sd(double const *))
__MACHINEX86_X64(__m128i _mm_load_si128(__m128i const *))
__MACHINEX86_X64(__m128 _mm_load_ss(float const *))
__MACHINEX86_X64(__m128d _mm_loaddup_pd(double const *))
__MACHINEX86_X64(__m128d _mm_loadh_pd(__m128d, double const *))
__MACHINEX86_X64(__m128 _mm_loadh_pi(__m128, __m64 const *))
__MACHINEX86_X64(__m128i _mm_loadl_epi64(__m128i const *))
__MACHINEX86_X64(__m128d _mm_loadl_pd(__m128d, double const *))
__MACHINEX86_X64(__m128 _mm_loadl_pi(__m128, __m64 const *))
__MACHINEX86_X64(__m128d _mm_loadr_pd(double const *))
__MACHINEX86_X64(__m128 _mm_loadr_ps(float const *))
__MACHINEX86_X64(__m128d _mm_loadu_pd(double const *))
__MACHINEX86_X64(__m128 _mm_loadu_ps(float const *))
__MACHINEX86_X64(__m128i _mm_loadu_si128(__m128i const *))
__MACHINEX86_X64(__m128i _mm_madd_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_maddubs_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m64 _mm_maddubs_pi16(__m64, __m64))
__MACHINEX86_X64(void _mm_maskmoveu_si128(__m128i, __m128i, char *))
__MACHINEX86_X64(__m128i _mm_max_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_max_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_max_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_max_epu16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_max_epu32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_max_epu8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_max_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_max_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_max_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_max_ss(__m128, __m128))
__MACHINEX86_X64(void _mm_mfence(void))
__MACHINEX86_X64(__m128i _mm_min_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_min_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_min_epi8(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_min_epu16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_min_epu32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_min_epu8(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_min_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_min_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_min_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_min_ss(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_minpos_epu16(__m128i))
__MACHINEX86_X64(void _mm_monitor(void const *, unsigned int, unsigned int))
__MACHINEX86_X64(__m128i _mm_move_epi64(__m128i))
__MACHINEX86_X64(__m128d _mm_move_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_move_ss(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_movedup_pd(__m128d))
__MACHINEX86_X64(__m128 _mm_movehdup_ps(__m128))
__MACHINEX86_X64(__m128 _mm_movehl_ps(__m128, __m128))
__MACHINEX86_X64(__m128 _mm_moveldup_ps(__m128))
__MACHINEX86_X64(__m128 _mm_movelh_ps(__m128, __m128))
__MACHINEX86_X64(int _mm_movemask_epi8(__m128i))
__MACHINEX86_X64(int _mm_movemask_pd(__m128d))
__MACHINEX86_X64(int _mm_movemask_ps(__m128))
__MACHINEX86(__m64 _mm_movepi64_pi64(__m128i))
__MACHINEX86(__m128i _mm_movpi64_epi64(__m64))
__MACHINEX86_X64(__m128i _mm_mpsadbw_epu8(__m128i, __m128i, int))
__MACHINEX86_X64(__m128i _mm_mul_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_mul_epu32(__m128i, __m128i))
__MACHINEX86_X64(__m128d _mm_mul_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_mul_ps(__m128, __m128))
__MACHINEX86_X64(__m128d _mm_mul_sd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_mul_ss(__m128, __m128))
__MACHINEX86(__m64 _mm_mul_su32(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_mulhi_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_mulhi_epu16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_mulhrs_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m64 _mm_mulhrs_pi16(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_mullo_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_mullo_epi32(__m128i, __m128i))
__MACHINEX86_X64(void _mm_mwait(unsigned int, unsigned int))
__MACHINEX86_X64(__m128d _mm_or_pd(__m128d, __m128d))
__MACHINEX86_X64(__m128 _mm_or_ps(__m128, __m128))
__MACHINEX86_X64(__m128i _mm_or_si128(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_packs_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_packs_epi32(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_packus_epi16(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_packus_epi32(__m128i, __m128i))
__MACHINEX86_X64(void _mm_pause(void))
__MACHINEX86_X64(void _mm_pcommit(void))
__MACHINEX86_X64(int _mm_popcnt_u32(unsigned int))
__MACHINEX64(__int64 _mm_popcnt_u64(unsigned __int64))
__MACHINEX86_X64(void _mm_prefetch(char const *, int))
__MACHINEX86_X64(__m128 _mm_rcp_ps(__m128))
__MACHINEX86_X64(__m128 _mm_rcp_ss(__m128))
__MACHINEX86_X64(__m128d _mm_round_pd(__m128d, int))
__MACHINEX86_X64(__m128 _mm_round_ps(__m128, int))
__MACHINEX86_X64(__m128d _mm_round_sd(__m128d, __m128d, int))
__MACHINEX86_X64(__m128 _mm_round_ss(__m128, __m128, int))
__MACHINEX86_X64(__m128 _mm_rsqrt_ps(__m128))
__MACHINEX86_X64(__m128 _mm_rsqrt_ss(__m128))
__MACHINEX86_X64(__m128i _mm_sad_epu8(__m128i, __m128i))
__MACHINEX86_X64(__m128i _mm_set1_epi16(short))
__MACHINEX86_X64(__m128i _mm_set1_epi32(int))
__MACHINEX86(__m128i _mm_set1_epi64(__m64))
__MACHINEX86_X64(__m128i _mm_set1_epi64x(__int64))
__MACHINEX86_X64(__m128i _mm_set1_epi8(char))
__MACHINEX86_X64(__m128d _mm_set1_pd(double))
__MACHINEX86(__m64 _mm_set1_pi16(short))
__MACHINEX86(__m64 _mm_set1_pi32(int))
__MACHINEX86(__m64 _mm_set1_pi8(char))
__MACHINEX86_X64(__m128i _mm_set_epi16(short, short, short, short, short, short, short, short))
__MACHINEX86_X64(__m128i _mm_set_epi32(int, int, int, int))
__MACHINEX86(__m128i _mm_set_epi64(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_set_epi64x(__int64, __int64))
__MACHINEX86_X64(__m128i _mm_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char))
__MACHINEX86_X64(__m128d _mm_set_pd(double, double))
__MACHINEX86(__m64 _mm_set_pi16(short, short, short, short))
__MACHINEX86(__m64 _mm_set_pi32(int, int))
__MACHINEX86(__m64 _mm_set_pi8(char, char, char, char, char, char, char, char))
__MACHINEX86_X64(__m128 _mm_set_ps(float, float, float, float))
__MACHINEX86_X64(__m128 _mm_set_ps1(float))
__MACHINEX86_X64(__m128d _mm_set_sd(double))
__MACHINEX86_X64(__m128 _mm_set_ss(float))
__MACHINEX86_X64(void _mm_setcsr(unsigned int))
__MACHINEX86_X64(__m128i _mm_setl_epi64(__m128i))
__MACHINEX86_X64(__m128i _mm_setr_epi16(short, short, short, short, short, short, short, short))
__MACHINEX86_X64(__m128i _mm_setr_epi32(int, int, int, int))
__MACHINEX86(__m128i _mm_setr_epi64(__m64, __m64))
__MACHINEX86_X64(__m128i _mm_setr_epi64x(__int64, __int64))
__MACHINEX86_X64(__m128i _mm_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char))
__MACHINEX86_X64(__m128d _mm_setr_pd(double, double))
__MACHINEX86(__m64 _mm_setr_pi16(short, short, short, short))
__MACHINEX86(__m64 _mm_setr_pi32(int, int))
__MACHINEX86(__m64 _mm_setr_pi8(char, char, char, char, char, char, char, char))
__MACHINEX86_X64(__m128 _mm_setr_ps(float, float, float, float))
__MACHINEX86_X64(__m128d _mm_setzero_pd(void))
__MACHINEX86_X64(__m128 _mm_setzero_ps(void))
__MACHINEX86_X64(__m128i _mm_setzero_si128(void))
__MACHINEX86(__m64 _mm_setzero_si64(void))
__MACHINEX86_X64(void _mm_sfence(void))
__MACHINEX86_X64(__m128i _mm_shuffle_epi32(__m128i, int))
__MACHINEX86_X64(__m128i _mm_shuffle_epi8(__m128i, __m128i))