diff --git a/suite/synctools/arm64_gen_vreg.c b/suite/synctools/arm64_gen_vreg.c deleted file mode 100644 index 9b7a938351..0000000000 --- a/suite/synctools/arm64_gen_vreg.c +++ /dev/null @@ -1,38 +0,0 @@ -// $ make arm64_gen_vreg -// $ ./arm64_gen_vreg > AArch64GenRegisterV.inc - -#include -#include -#include -#include - -#undef CAPSTONE_DIET -#define GET_REGINFO_ENUM - -#include "AArch64GenRegisterInfo.inc" -#include "AArch64GenRegisterName.inc" - -int main() -{ - unsigned int i; - size_t size = (size_t)getRegisterName(i, 100); - - printf("// size = %zu\n", size); - - for(i = 1; i < size; i++) { - unsigned int j; - const char *name = getRegisterName(i, AArch64_vreg); - //printf("%u: ARM64_REG_%s, ", i, getRegisterName(i, AArch64_vreg)); - if (strlen(name) == 0) { - printf("0,\n"); - } else { - printf("ARM64_REG_"); - for(j = 0; j < strlen(name); j++) { - printf("%c", toupper(name[j])); - } - printf(",\n"); - } - } - - return 0; -} diff --git a/suite/synctools/tablegen/AArch64/AArch64.td b/suite/synctools/tablegen/AArch64/AArch64.td deleted file mode 100644 index 80e574b7b8..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64.td +++ /dev/null @@ -1,1257 +0,0 @@ -//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing. -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// AArch64 Subtarget features. -// - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP">; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; - -def FeatureSM4 : SubtargetFeature< - "sm4", "HasSM4", "true", - "Enable SM3 and SM4 support", [FeatureNEON]>; - -def FeatureSHA2 : SubtargetFeature< - "sha2", "HasSHA2", "true", - "Enable SHA1 and SHA256 support", [FeatureNEON]>; - -def FeatureSHA3 : SubtargetFeature< - "sha3", "HasSHA3", "true", - "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>; - -def FeatureAES : SubtargetFeature< - "aes", "HasAES", "true", - "Enable AES support", [FeatureNEON]>; - -// Crypto has been split up and any combination is now valid (see the -// crypto definitions above). Also, crypto is now context sensitive: -// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. -// Therefore, we rely on Clang, the user interacing tool, to pass on the -// appropriate crypto options. But here in the backend, crypto has very little -// meaning anymore. We kept the Crypto definition here for backward -// compatibility, and now imply features SHA2 and AES, which was the -// "traditional" meaning of Crypto. -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; - -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable ARMv8 CRC-32 checksum instructions">; - -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; - -def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", - "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; - -def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", - "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">; - -def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", - "Enable out of line atomics to support LSE instructions">; - -def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", - "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; - -def FeaturePAN : SubtargetFeature< - "pan", "HasPAN", "true", - "Enables ARM v8.1 Privileged Access-Never extension">; - -def FeatureLOR : SubtargetFeature< - "lor", "HasLOR", "true", - "Enables ARM v8.1 Limited Ordering Regions extension">; - -def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", - "true", "Enable RW operand CONTEXTIDR_EL2" >; - -def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", - "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >; - -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable ARMv8 PMUv3 Performance Monitors extension">; - -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Full FP16", [FeatureFPARMv8]>; - -def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", - "Enable FP16 FML instructions", [FeatureFullFP16]>; - -def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", - "Enable Statistical Profiling extension">; - -def FeaturePAN_RWV : SubtargetFeature< - "pan-rwv", "HasPAN_RWV", "true", - "Enable v8.2 PAN s1e1R and s1e1W Variants", - [FeaturePAN]>; - -// UAO PState -def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", - "Enable v8.2 UAO PState">; - -def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", - "true", "Enable v8.2 data Cache Clean to Point of Persistence" >; - -def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", - "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>; - -// This flag is currently still labeled as Experimental, but when fully -// implemented this should tell the compiler to use the zeroing pseudos to -// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive -// lanes are known to be zero. The pseudos will then be expanded using the -// MOVPRFX instruction to zero the inactive lanes. This feature should only be -// enabled if MOVPRFX instructions are known to merge with the destructive -// operations they prefix. -// -// This feature could similarly be extended to support cheap merging of _any_ -// value into the inactive lanes using the MOVPRFX instruction that uses -// merging-predication. -def FeatureExperimentalZeroingPseudos - : SubtargetFeature<"use-experimental-zeroing-pseudos", - "UseExperimentalZeroingPseudos", "true", - "Hint to the compiler that the MOVPRFX instruction is " - "merged with destructive operations", - []>; - -def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", - "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; - -def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", - "Enable Scalable Vector Extension 2 (SVE2) instructions", - [FeatureSVE, FeatureUseScalarIncVL]>; - -def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", - "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>; - -def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", - "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>; - -def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", - "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; - -def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", - "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; - -def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", - "Has zero-cycle register moves">; - -def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", - "Has zero-cycle zeroing instructions for generic registers">; - -def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", - "Has no zero-cycle zeroing instructions for FP registers">; - -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions", - [FeatureZCZeroingGP]>; - -/// ... but the floating-point version doesn't quite work in rare cases on older -/// CPUs. -def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", - "HasZeroCycleZeroingFPWorkaround", "true", - "The zero-cycle floating-point zeroing instruction has a bug">; - -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; - -foreach i = {1-7,9-15,18,20-28,30} in - def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", - "Reserve X"#i#", making it unavailable " - "as a GPR">; - -foreach i = {8-15,18} in - def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, - "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; - -def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", - "true", - "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; - -def FeaturePredictableSelectIsExpensive : SubtargetFeature< - "predictable-select-expensive", "PredictableSelectIsExpensive", "true", - "Prefer likely predicted branches over selects">; - -def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "CustomAsCheapAsMove", "true", - "Use custom handling of cheap instructions">; - -def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", - "ExynosAsCheapAsMove", "true", - "Use Exynos specific handling of cheap instructions", - [FeatureCustomCheapAsMoveHandling]>; - -def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", - "UsePostRAScheduler", "true", "Schedule again after register allocation">; - -def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", - "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; - -def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", - "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; - -def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", - "true", "STR of Q register with register offset is slow">; - -def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< - "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", - "true", "Use alternative pattern for sextload convert to f32">; - -def FeatureArithmeticBccFusion : SubtargetFeature< - "arith-bcc-fusion", "HasArithmeticBccFusion", "true", - "CPU fuses arithmetic+bcc operations">; - -def FeatureArithmeticCbzFusion : SubtargetFeature< - "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", - "CPU fuses arithmetic + cbz/cbnz operations">; - -def FeatureCmpBccFusion : SubtargetFeature< - "cmp-bcc-fusion", "HasCmpBccFusion", "true", - "CPU fuses cmp+bcc operations">; - -def FeatureFuseAddress : SubtargetFeature< - "fuse-address", "HasFuseAddress", "true", - "CPU fuses address generation and memory operations">; - -def FeatureFuseAES : SubtargetFeature< - "fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - -def FeatureFuseArithmeticLogic : SubtargetFeature< - "fuse-arith-logic", "HasFuseArithmeticLogic", "true", - "CPU fuses arithmetic and logic operations">; - -def FeatureFuseCCSelect : SubtargetFeature< - "fuse-csel", "HasFuseCCSelect", "true", - "CPU fuses conditional select operations">; - -def FeatureFuseCryptoEOR : SubtargetFeature< - "fuse-crypto-eor", "HasFuseCryptoEOR", "true", - "CPU fuses AES/PMULL and EOR operations">; - -def FeatureFuseLiterals : SubtargetFeature< - "fuse-literals", "HasFuseLiterals", "true", - "CPU fuses literal generation operations">; - -def FeatureDisableLatencySchedHeuristic : SubtargetFeature< - "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", - "Disable latency scheduling heuristic">; - -def FeatureForce32BitJumpTables - : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", - "Force jump table entries to be 32-bits wide except at MinSize">; - -def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", - "Enable support for RCPC extension">; - -def FeatureUseRSqrt : SubtargetFeature< - "use-reciprocal-square-root", "UseRSqrt", "true", - "Use the reciprocal square root approximation">; - -def FeatureDotProd : SubtargetFeature< - "dotprod", "HasDotProd", "true", - "Enable dot product support">; - -def FeaturePAuth : SubtargetFeature< - "pauth", "HasPAuth", "true", - "Enable v8.3-A Pointer Authentication extension">; - -def FeatureJS : SubtargetFeature< - "jsconv", "HasJS", "true", - "Enable v8.3-A JavaScript FP conversion instructions", - [FeatureFPARMv8]>; - -def FeatureCCIDX : SubtargetFeature< - "ccidx", "HasCCIDX", "true", - "Enable v8.3-A Extend of the CCSIDR number of sets">; - -def FeatureComplxNum : SubtargetFeature< - "complxnum", "HasComplxNum", "true", - "Enable v8.3-A Floating-point complex number support", - [FeatureNEON]>; - -def FeatureNV : SubtargetFeature< - "nv", "HasNV", "true", - "Enable v8.4-A Nested Virtualization Enchancement">; - -def FeatureMPAM : SubtargetFeature< - "mpam", "HasMPAM", "true", - "Enable v8.4-A Memory system Partitioning and Monitoring extension">; - -def FeatureDIT : SubtargetFeature< - "dit", "HasDIT", "true", - "Enable v8.4-A Data Independent Timing instructions">; - -def FeatureTRACEV8_4 : SubtargetFeature< - "tracev8.4", "HasTRACEV8_4", "true", - "Enable v8.4-A Trace extension">; - -def FeatureAM : SubtargetFeature< - "am", "HasAM", "true", - "Enable v8.4-A Activity Monitors extension">; - -def FeatureAMVS : SubtargetFeature< - "amvs", "HasAMVS", "true", - "Enable v8.6-A Activity Monitors Virtualization support", - [FeatureAM]>; - -def FeatureSEL2 : SubtargetFeature< - "sel2", "HasSEL2", "true", - "Enable v8.4-A Secure Exception Level 2 extension">; - -def FeatureTLB_RMI : SubtargetFeature< - "tlb-rmi", "HasTLB_RMI", "true", - "Enable v8.4-A TLB Range and Maintenance Instructions">; - -def FeatureFlagM : SubtargetFeature< - "flagm", "HasFlagM", "true", - "Enable v8.4-A Flag Manipulation Instructions">; - -// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset -def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", - "Enable v8.4-A RCPC instructions with Immediate Offsets", - [FeatureRCPC]>; - -def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; - -def FeatureLSLFast : SubtargetFeature< - "lsl-fast", "HasLSLFast", "true", - "CPU has a fastpath logical shift of up to 3 places">; - -def FeatureAggressiveFMA : - SubtargetFeature<"aggressive-fma", - "HasAggressiveFMA", - "true", - "Enable Aggressive FMA for floating-point.">; - -def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true", - "Enable alternative NZCV format for floating point comparisons">; - -def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true", - "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " - "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >; - -def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict", - "true", "Enable architectural speculation restriction" >; - -def FeatureSB : SubtargetFeature<"sb", "HasSB", - "true", "Enable v8.5 Speculation Barrier" >; - -def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS", - "true", "Enable Speculative Store Bypass Safe bit" >; - -def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true", - "Enable v8.5a execution and data prediction invalidation instructions" >; - -def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", - "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >; - -def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", - "true", "Enable Branch Target Identification" >; - -def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", - "true", "Enable Random Number generation instructions" >; - -def FeatureMTE : SubtargetFeature<"mte", "HasMTE", - "true", "Enable Memory Tagging Extension" >; - -def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", - "true", "Enable Trace Buffer Extension">; - -def FeatureETE : SubtargetFeature<"ete", "HasETE", - "true", "Enable Embedded Trace Extension", - [FeatureTRBE]>; - -def FeatureTME : SubtargetFeature<"tme", "HasTME", - "true", "Enable Transactional Memory Extension" >; - -def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", - "AllowTaggedGlobals", - "true", "Use an instruction sequence for taking the address of a global " - "that allows a memory tag in the upper address bits">; - -def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", - "true", "Enable BFloat16 Extension" >; - -def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", - "true", "Enable Matrix Multiply Int8 Extension">; - -def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32", - "true", "Enable Matrix Multiply FP32 Extension", [FeatureSVE]>; - -def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64", - "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>; - -def FeatureXS : SubtargetFeature<"xs", "HasXS", - "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">; - -def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", - "true", "Enable Armv8.7-A WFET and WFIT instruction">; - -def FeatureHCX : SubtargetFeature< - "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">; - -def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", - "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">; - -def FeatureHBC : SubtargetFeature<"hbc", "HasHBC", - "true", "Enable Armv8.8-A Hinted Conditional Branches Extension">; - -def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS", - "true", "Enable Armv8.8-A memcpy and memset acceleration instructions">; - -def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", - "true", "Enable Branch Record Buffer Extension">; - -def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", - "true", "Enable extra register in the Statistical Profiling Extension">; - -def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps", - "true", "Enable fine grained virtualization traps extension">; - -def FeatureEnhancedCounterVirtualization : - SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", - "true", "Enable enhanced counter virtualization extension">; - -def FeatureRME : SubtargetFeature<"rme", "HasRME", - "true", "Enable Realm Management Extension">; - -// A subset of SVE(2) instructions are legal in Streaming SVE execution mode -// defined by SME. -def FeatureStreamingSVE : SubtargetFeature<"streaming-sve", - "HasStreamingSVE", "true", - "Enable subset of SVE(2) instructions for Streaming SVE execution mode">; -def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", - "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>; - -def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true", - "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; - -def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true", - "Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>; - -def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", - "Apple A7 (the CPU formerly known as Cyclone)">; - -def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true", - "Enable Exception Level 2 Virtual Memory System Architecture">; - -def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", - "Enable Exception Level 3">; - -def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", - "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; - -def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", - "NoBTIAtReturnTwice", "true", - "Don't place a BTI instruction " - "after a return-twice">; - -//===----------------------------------------------------------------------===// -// Architectures. -// -def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true", - "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>; - -def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE, - FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>; - -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, - FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; - -def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, - FeatureJS, FeatureCCIDX, FeatureComplxNum]>; - -def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, - FeatureNV, FeatureMPAM, FeatureDIT, - FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, - FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>; - -def HasV8_5aOps : SubtargetFeature< - "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", - [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, - FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, - FeatureBranchTargetId]>; - -def HasV8_6aOps : SubtargetFeature< - "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", - [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, - FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; - -def HasV8_7aOps : SubtargetFeature< - "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", - [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; - -def HasV8_8aOps : SubtargetFeature< - "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", - [HasV8_7aOps, FeatureHBC, FeatureMOPS]>; - -def HasV9_0aOps : SubtargetFeature< - "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", - [HasV8_5aOps, FeatureSVE2]>; - -def HasV9_1aOps : SubtargetFeature< - "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions", - [HasV8_6aOps, HasV9_0aOps]>; - -def HasV9_2aOps : SubtargetFeature< - "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", - [HasV8_7aOps, HasV9_1aOps]>; - -def HasV9_3aOps : SubtargetFeature< - "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions", - [HasV8_8aOps, HasV9_2aOps]>; - -def HasV8_0rOps : SubtargetFeature< - "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", - [//v8.1 - FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2, - //v8.2 - FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, - //v8.3 - FeatureComplxNum, FeatureCCIDX, FeatureJS, - FeaturePAuth, FeatureRCPC, - //v8.4 - FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI, - FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "AArch64RegisterInfo.td" -include "AArch64RegisterBanks.td" -include "AArch64CallingConvention.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "AArch64Schedule.td" -include "AArch64InstrInfo.td" -include "AArch64SchedPredicates.td" -include "AArch64SchedPredExynos.td" -include "AArch64SchedPredAmpere.td" -include "AArch64Combine.td" - -def AArch64InstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Named operands for MRS/MSR/TLBI/... -//===----------------------------------------------------------------------===// - -include "AArch64SystemOperands.td" - -//===----------------------------------------------------------------------===// -// Access to privileged registers -//===----------------------------------------------------------------------===// - -foreach i = 1-3 in -def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP", - "true", "Permit use of TPIDR_EL"#i#" for the TLS base">; - -//===----------------------------------------------------------------------===// -// Control codegen mitigation against Straight Line Speculation vulnerability. -//===----------------------------------------------------------------------===// - -def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", - "HardenSlsRetBr", "true", - "Harden against straight line speculation across RET and BR instructions">; -def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", - "HardenSlsBlr", "true", - "Harden against straight line speculation across BLR instructions">; -def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", - "HardenSlsNoComdat", "true", - "Generate thunk code for SLS mitigation in the normal text section">; - -//===----------------------------------------------------------------------===// -// AArch64 Processors supported. -// - -//===----------------------------------------------------------------------===// -// Unsupported features to disable for scheduling models -//===----------------------------------------------------------------------===// - -class AArch64Unsupported { list F; } - -def SVEUnsupported : AArch64Unsupported { - let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, - HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE]; -} - -def PAUnsupported : AArch64Unsupported { - let F = [HasPAuth]; -} - -def SMEUnsupported : AArch64Unsupported { - let F = [HasSME, HasSMEF64, HasSMEI64]; -} - -include "AArch64SchedA53.td" -include "AArch64SchedA55.td" -include "AArch64SchedA57.td" -include "AArch64SchedCyclone.td" -include "AArch64SchedFalkor.td" -include "AArch64SchedKryo.td" -include "AArch64SchedExynosM3.td" -include "AArch64SchedExynosM4.td" -include "AArch64SchedExynosM5.td" -include "AArch64SchedThunderX.td" -include "AArch64SchedThunderX2T99.td" -include "AArch64SchedA64FX.td" -include "AArch64SchedThunderX3T110.td" -include "AArch64SchedTSV110.td" -include "AArch64SchedAmpere1.td" - -def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", - "Cortex-A35 ARM processors">; - -def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", [ - FeatureFuseAES, - FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, - FeaturePostRAScheduler]>; - -def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", - "Cortex-A55 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureFuseAddress]>; - -def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", - "Cortex-A510 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler - ]>; - -def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", [ - FeatureFuseAES, - FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, - FeatureFuseLiterals, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", - "Cortex-A65 ARM processors", [ - FeatureFuseAES, - FeatureFuseAddress, - FeatureFuseLiterals]>; - -def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", - "Cortex-A72 ARM processors", [ - FeatureFuseAES, - FeatureFuseLiterals]>; - -def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", - "Cortex-A73 ARM processors", [ - FeatureFuseAES]>; - -def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", - "Cortex-A75 ARM processors", [ - FeatureFuseAES]>; - -def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", - "Cortex-A76 ARM processors", [ - FeatureFuseAES]>; - -def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", - "Cortex-A77 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES]>; - -def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", - "Cortex-A78 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeaturePostRAScheduler]>; - -def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", - "CortexA78C", - "Cortex-A78C ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeaturePostRAScheduler]>; - -def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", - "Cortex-A710 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureCmpBccFusion]>; - -def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", - "CortexR82", - "Cortex-R82 ARM processors", [ - FeaturePostRAScheduler]>; - -def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", - "Cortex-X1 ARM processors", [ - FeatureCmpBccFusion, - FeatureFuseAES, - FeaturePostRAScheduler]>; - -def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", - "Cortex-X2 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler, - FeatureCmpBccFusion]>; - -def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", - "Fujitsu A64FX processors", [ - FeaturePostRAScheduler, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeaturePredictableSelectIsExpensive - ]>; - -def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", - "Nvidia Carmel processors">; - -// Note that cyclone does not fuse AES instructions, but newer apple chips do -// perform the fusion and cyclone is used by default when targetting apple OSes. -def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", - "Apple A7 (the CPU formerly known as Cyclone)", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, FeatureFuseCryptoEOR, - FeatureZCRegMove, - FeatureZCZeroing, - FeatureZCZeroingFPWorkaround] - >; - -def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", - "Apple A10", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureZCRegMove, - FeatureZCZeroing] - >; - -def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", - "Apple A11", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureZCRegMove, - FeatureZCZeroing] - >; - -def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", - "Apple A12", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureZCRegMove, - FeatureZCZeroing] - >; - -def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", - "Apple A13", [ - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAES, - FeatureFuseCryptoEOR, - FeatureZCRegMove, - FeatureZCZeroing] - >; - -def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", - "Apple A14", [ - FeatureAggressiveFMA, - FeatureAlternateSExtLoadCVTF32Pattern, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureDisableLatencySchedHeuristic, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseCryptoEOR, - FeatureFuseLiterals, - FeatureZCRegMove, - FeatureZCZeroing]>; - -def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M3 processors", - [FeatureExynosCheapAsMoveHandling, - FeatureForce32BitJumpTables, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseCCSelect, - FeatureFuseLiterals, - FeatureLSLFast, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M3 processors", - [FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, - FeatureExynosCheapAsMoveHandling, - FeatureForce32BitJumpTables, - FeatureFuseAddress, - FeatureFuseAES, - FeatureFuseArithmeticLogic, - FeatureFuseCCSelect, - FeatureFuseLiterals, - FeatureLSLFast, - FeaturePostRAScheduler, - FeatureZCZeroing]>; - -def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", - "Qualcomm Kryo processors", [ - FeatureCustomCheapAsMoveHandling, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureLSLFast] - >; - -def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", - "Qualcomm Falkor processors", [ - FeatureCustomCheapAsMoveHandling, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureLSLFast, - FeatureSlowSTRQro - ]>; - -def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", - "Neoverse E1 ARM processors", [ - FeaturePostRAScheduler, - FeatureFuseAES - ]>; - -def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1", - "Neoverse N1 ARM processors", [ - FeaturePostRAScheduler, - FeatureFuseAES - ]>; - -def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", - "Neoverse N2 ARM processors", [ - FeaturePostRAScheduler, - FeatureFuseAES - ]>; -def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", - "Neoverse 512-TVB ARM processors", [ - FeaturePostRAScheduler, - FeatureFuseAES - ]>; - -def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", - "Neoverse V1 ARM processors", [ - FeatureFuseAES, - FeaturePostRAScheduler]>; - -def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", - "Qualcomm Saphira processors", [ - FeatureCustomCheapAsMoveHandling, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroing, - FeatureLSLFast]>; - -def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", - "Cavium ThunderX2 processors", [ - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", - "ThunderX3T110", - "Marvell ThunderX3 processors", [ - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureBalanceFPOps, - FeatureStrictAlign]>; - -def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", - "ThunderXT88", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", - "ThunderXT81", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", - "ThunderXT83", - "Cavium ThunderX processors", [ - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive]>; - -def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", - "HiSilicon TS-V110 processors", [ - FeatureCustomCheapAsMoveHandling, - FeatureFuseAES, - FeaturePostRAScheduler]>; - -def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", - "Ampere Computing Ampere-1 processors", [ - FeaturePostRAScheduler, - FeatureFuseAES, - FeatureLSLFast, - FeatureAggressiveFMA, - FeatureArithmeticBccFusion, - FeatureCmpBccFusion, - FeatureFuseAddress, - FeatureFuseLiterals]>; - -def ProcessorFeatures { - list A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; - list A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon]; - list A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureMatMulInt8, FeatureBF16, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS, FeatureRAS]; - list A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS]; - list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS]; - list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeaturePerfMon, FeatureSPE, - FeatureSSBS]; - list A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureFlagM, FeatureFP16FML, FeaturePAuth, - FeaturePerfMon, FeatureRCPC, FeatureSPE, - FeatureSSBS]; - list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureETE, FeatureMTE, FeatureFP16FML, - FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; - list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, - FeatureFP16FML, FeatureSSBS, FeaturePredRes, - FeatureSB, FeatureSpecRestrict]; - list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureRCPC, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeatureSSBS]; - list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureRCPC, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeaturePAuth, FeatureSSBS]; - list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, - FeatureMatMulInt8, FeatureBF16, FeatureAM, - FeatureMTE, FeatureETE, FeatureSVE2BitPerm, - FeatureFP16FML]; - list A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, - FeatureSHA2, FeaturePerfMon, FeatureFullFP16, - FeatureSVE, FeatureComplxNum]; - list Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto, - FeatureFullFP16]; - list AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg]; - list AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureCRC, - FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]; - list AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16]; - list AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16]; - list AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFullFP16, - FeatureFP16FML, FeatureSHA3]; - list AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFRInt3264, - FeatureSpecRestrict, FeatureSSBS, FeatureSB, - FeaturePredRes, FeatureCacheDeepPersist, - FeatureFullFP16, FeatureFP16FML, FeatureSHA3, - FeatureAltFPCmp]; - list ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeaturePerfMon]; - list ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFullFP16, FeaturePerfMon]; - list Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeaturePerfMon, - FeatureRDM]; - list NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSSBS]; - list NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, - FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSPE, FeatureSSBS]; - list NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE, - FeatureMatMulInt8, FeatureMTE, FeatureSVE2, - FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto]; - list Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, - FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, - FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, - FeaturePerfMon, FeatureRandGen, FeatureSPE, - FeatureSSBS, FeatureSVE]; - list NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, - FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, - FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, - FeaturePerfMon, FeatureRandGen, FeatureSPE, - FeatureSSBS, FeatureSVE]; - list Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeatureSPE, FeaturePerfMon]; - list ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; - list ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeatureLSE]; - list ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto, - FeatureFPARMv8, FeatureNEON, FeatureLSE, - FeaturePAuth, FeaturePerfMon]; - list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureSPE, - FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; - list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, - FeatureMTE, FeatureSSBS]; - - // ETE and TRBE are future architecture extensions. We temporarily enable them - // by default for users targeting generic AArch64. The extensions do not - // affect code generated by the compiler and can be used only by explicitly - // mentioning the new system register names in assembly. - list Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE]; -} - - -def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, - [FeatureFuseAES, FeaturePostRAScheduler]>; -def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, - [TuneA35]>; -def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, - [TuneA35]>; -def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53, - [TuneA53]>; -def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55, - [TuneA55]>; -def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510, - [TuneA510]>; -def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53, - [TuneA57]>; -def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65, - [TuneA65]>; -def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65, - [TuneA65]>; -def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53, - [TuneA72]>; -def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53, - [TuneA73]>; -def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55, - [TuneA75]>; -def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76, - [TuneA76]>; -def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76, - [TuneA76]>; -def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77, - [TuneA77]>; -def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78, - [TuneA78]>; -def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, - [TuneA78C]>; -def : ProcessorModel<"cortex-a710", CortexA57Model, ProcessorFeatures.A710, - [TuneA710]>; -def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, - [TuneR82]>; -def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, - [TuneX1]>; -def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, - [TuneX1]>; -def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2, - [TuneX2]>; -def : ProcessorModel<"neoverse-e1", CortexA53Model, - ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; -def : ProcessorModel<"neoverse-n1", CortexA57Model, - ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; -def : ProcessorModel<"neoverse-n2", CortexA57Model, - ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; -def : ProcessorModel<"neoverse-512tvb", CortexA57Model, - ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; -def : ProcessorModel<"neoverse-v1", CortexA57Model, - ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; -def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3, - [TuneExynosM3]>; -def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4, - [TuneExynosM4]>; -def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4, - [TuneExynosM4]>; -def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor, - [TuneFalkor]>; -def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira, - [TuneSaphira]>; -def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>; - -// Cavium ThunderX/ThunderX T8X Processors -def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX, - [TuneThunderX]>; -def : ProcessorModel<"thunderxt88", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT88]>; -def : ProcessorModel<"thunderxt81", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT81]>; -def : ProcessorModel<"thunderxt83", ThunderXT8XModel, - ProcessorFeatures.ThunderX, [TuneThunderXT83]>; -// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. -def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, - ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>; -// Marvell ThunderX3T110 Processors. -def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, - ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; -def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, - [TuneTSV110]>; - -// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. -def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; - -// iPhone and iPad CPUs -def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7, - [TuneAppleA7]>; -def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10, - [TuneAppleA10]>; -def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11, - [TuneAppleA11]>; -def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; -def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13, - [TuneAppleA13]>; -def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14, - [TuneAppleA14]>; - -// Mac CPUs -def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, - [TuneAppleA14]>; - -// watch CPUs. -def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; -def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12, - [TuneAppleA12]>; - -// Alias for the latest Apple processor model supported by LLVM. -def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14, - [TuneAppleA14]>; - -// Fujitsu A64FX -def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, - [TuneA64FX]>; - -// Nvidia Carmel -def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, - [TuneCarmel]>; - -// Ampere Computing -def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, - [TuneAmpere1]>; - -//===----------------------------------------------------------------------===// -// Assembly parser -//===----------------------------------------------------------------------===// - -def GenericAsmParserVariant : AsmParserVariant { - int Variant = 0; - string Name = "generic"; - string BreakCharacters = "."; - string TokenizingCharacters = "[]*!/"; -} - -def AppleAsmParserVariant : AsmParserVariant { - int Variant = 1; - string Name = "apple-neon"; - string BreakCharacters = "."; - string TokenizingCharacters = "[]*!/"; -} - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// AArch64 Uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def GenericAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int PassSubtarget = 1; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def AppleAsmWriter : AsmWriter { - let AsmWriterClassName = "AppleInstPrinter"; - int PassSubtarget = 1; - int Variant = 1; - int isMCAsmWriter = 1; -} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def AArch64 : Target { - let InstructionSet = AArch64InstrInfo; - let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; - let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; - let AllowRegisterRenaming = 1; -} - -//===----------------------------------------------------------------------===// -// Pfm Counters -//===----------------------------------------------------------------------===// - -include "AArch64PfmCounters.td" diff --git a/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td b/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td deleted file mode 100644 index f26151536a..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td +++ /dev/null @@ -1,500 +0,0 @@ -//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for AArch64 architecture. -// -//===----------------------------------------------------------------------===// - -/// CCIfBigEndian - Match only if we're in big endian mode. -class CCIfBigEndian : - CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; - -class CCIfILP32 : - CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; - - -//===----------------------------------------------------------------------===// -// ARM AAPCS64 Calling Convention -//===----------------------------------------------------------------------===// - -let Entry = 1 in -def CC_AArch64_AAPCS : CallingConv<[ - CCIfType<[iPTR], CCBitConvertToType>, - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32], CCBitConvertToType>, - - // Big endian vectors must be passed as if they were 1-element vectors so that - // their lanes are in a consistent order. - CCIfBigEndian>>, - CCIfBigEndian>>, - - // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter. - // However, on windows, in some circumstances, the SRet is passed in X0 or X1 - // instead. The presence of the inreg attribute indicates that SRet is - // passed in the alternative register (X0 or X1), not X8: - // - X0 for non-instance methods. - // - X1 for instance methods. - - // The "sret" attribute identifies indirect returns. - // The "inreg" attribute identifies non-aggregate types. - // The position of the "sret" attribute identifies instance/non-instance - // methods. - // "sret" on argument 0 means non-instance methods. - // "sret" on argument 1 means instance methods. - - CCIfInReg>>>>, - - CCIfSRet>>, - - // Put ByVal arguments directly on the stack. Minimum size and alignment of a - // slot is 64-bit. - CCIfByVal>, - - // The 'nest' parameter, if any, is passed in X18. - // Darwin uses X18 as the platform register and hence 'nest' isn't currently - // supported there. - CCIfNest>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is passed in X21. - CCIfSwiftError>>, - - // Pass SwiftAsync in an otherwise callee saved register so that it will be - // preserved for normal function calls. - CCIfSwiftAsync>>, - - CCIfConsecutiveRegs>, - - CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, - nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], - CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, - CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, - nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], - CCPassIndirect>, - - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], - CCAssignToReg<[P0, P1, P2, P3]>>, - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], - CCPassIndirect>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], CCIfSplit>>, - - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], - CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>, - CCIfType<[i32, f32], CCAssignToStack<8, 8>>, - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], - CCAssignToStack<8, 8>>, - CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToStack<16, 16>> -]>; - -let Entry = 1 in -def RetCC_AArch64_AAPCS : CallingConv<[ - CCIfType<[iPTR], CCBitConvertToType>, - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32], CCBitConvertToType>, - - CCIfConsecutiveRegs>, - CCIfSwiftError>>, - - // Big endian vectors must be passed as if they were 1-element vectors so that - // their lanes are in a consistent order. - CCIfBigEndian>>, - CCIfBigEndian>>, - - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], - CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, - nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], - CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, - - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], - CCAssignToReg<[P0, P1, P2, P3]>> -]>; - -// Vararg functions on windows pass floats in integer registers -let Entry = 1 in -def CC_AArch64_Win64_VarArg : CallingConv<[ - CCIfType<[f16, bf16], CCBitConvertToType>, - CCIfType<[f32], CCBitConvertToType>, - CCIfType<[f64], CCBitConvertToType>, - CCDelegateTo -]>; - -// Windows Control Flow Guard checks take a single argument (the target function -// address) and have no return value. -let Entry = 1 in -def CC_AArch64_Win64_CFGuard_Check : CallingConv<[ - CCIfType<[i64], CCAssignToReg<[X15]>> -]>; - - -// Darwin uses a calling convention which differs in only two ways -// from the standard one at this level: -// + i128s (i.e. split i64s) don't need even registers. -// + Stack slots are sized as needed rather than being at least 64-bit. -let Entry = 1 in -def CC_AArch64_DarwinPCS : CallingConv<[ - CCIfType<[iPTR], CCBitConvertToType>, - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - // An SRet is passed in X8, not X0 like a normal pointer parameter. - CCIfSRet>>, - - // Put ByVal arguments directly on the stack. Minimum size and alignment of a - // slot is 64-bit. - CCIfByVal>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is passed in X21. - CCIfSwiftError>>, - - // Pass SwiftAsync in an otherwise callee saved register so that it will be - // preserved for normal function calls. - CCIfSwiftAsync>>, - - CCIfConsecutiveRegs>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], - CCIfSplit>>, - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], - CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, - CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16", - CCAssignToStack<2, 2>>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - - // Re-demote pointers to 32-bits so we don't end up storing 64-bit - // values and clobbering neighbouring stack locations. Not very pretty. - CCIfPtr>>, - CCIfPtr>>, - - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToStack<16, 16>> -]>; - -let Entry = 1 in -def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ - CCIfType<[iPTR], CCBitConvertToType>, - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - CCIfConsecutiveRegs>, - - // Handle all scalar types as either i64 or f64. - CCIfType<[i8, i16, i32], CCPromoteToType>, - CCIfType<[f16, bf16, f32], CCPromoteToType>, - - // Everything is on the stack. - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToStack<16, 16>> -]>; - -// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the -// same as the normal Darwin VarArgs handling. -let Entry = 1 in -def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - // Handle all scalar types as either i32 or f32. - CCIfType<[i8, i16], CCPromoteToType>, - CCIfType<[f16, bf16], CCPromoteToType>, - - // Everything is on the stack. - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfPtr>>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64], CCIfSplit>>, - CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], - CCAssignToStack<16, 16>> -]>; - - -// The WebKit_JS calling convention only passes the first argument (the callee) -// in register and the remaining arguments on stack. We allow 32bit stack slots, -// so that WebKit can write partial values in the stack and define the other -// 32bit quantity as undef. -let Entry = 1 in -def CC_AArch64_WebKit_JS : CallingConv<[ - // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[i32], CCAssignToReg<[W0]>>, - CCIfType<[i64], CCAssignToReg<[X0]>>, - - // Pass the remaining arguments on the stack instead. - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64, f64], CCAssignToStack<8, 8>> -]>; - -let Entry = 1 in -def RetCC_AArch64_WebKit_JS : CallingConv<[ - CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>> -]>; - -//===----------------------------------------------------------------------===// -// ARM64 Calling Convention for GHC -//===----------------------------------------------------------------------===// - -// This calling convention is specific to the Glasgow Haskell Compiler. -// The only documentation is the GHC source code, specifically the C header -// file: -// -// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h -// -// which defines the registers for the Spineless Tagless G-Machine (STG) that -// GHC uses to implement lazy evaluation. The generic STG machine has a set of -// registers which are mapped to appropriate set of architecture specific -// registers for each CPU architecture. -// -// The STG Machine is documented here: -// -// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode -// -// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI -// register mapping". - -let Entry = 1 in -def CC_AArch64_GHC : CallingConv<[ - CCIfType<[iPTR], CCBitConvertToType>, - - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, - - CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, - CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, - CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, - - // Promote i8/i16/i32 arguments to i64. - CCIfType<[i8, i16, i32], CCPromoteToType>, - - // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim - CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> -]>; - -// The order of the callee-saves in this file is important, because the -// FrameLowering code will use this order to determine the layout the -// callee-save area in the stack frame. As can be observed below, Darwin -// requires the frame-record (LR, FP) to be at the top the callee-save area, -// whereas for other platforms they are at the bottom. - -// FIXME: LR is only callee-saved in the sense that *we* preserve it and are -// presumably a callee to someone. External functions may not do so, but this -// is currently safe since BL has LR as an implicit-def and what happens after a -// tail call doesn't matter. -// -// It would be better to model its preservation semantics properly (create a -// vreg on entry, use it in RET & tail call generation; make that vreg def if we -// end up saving LR as part of a call frame). Watch this space... -def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, - X25, X26, X27, X28, LR, FP, - D8, D9, D10, D11, - D12, D13, D14, D15)>; - -// A variant for treating X18 as callee saved, when interfacing with -// code that needs X18 to be preserved. -def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>; - -// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. -// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, -// and not (LR,FP) pairs. -def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, - X25, X26, X27, X28, FP, LR, - D8, D9, D10, D11, - D12, D13, D14, D15)>; - -// The Control Flow Guard check call uses a custom calling convention that also -// preserves X0-X8 and Q0-Q7. -def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS, - (sequence "X%u", 0, 8), - (sequence "Q%u", 0, 7))>; - -// AArch64 PCS for vector functions (VPCS) -// must (additionally) preserve full Q8-Q23 registers -def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, - X25, X26, X27, X28, LR, FP, - (sequence "Q%u", 8, 23))>; - -// Functions taking SVE arguments or returning an SVE type -// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 -def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23), - (sequence "P%u", 4, 15), - X19, X20, X21, X22, X23, X24, - X25, X26, X27, X28, LR, FP)>; - -def CSR_AArch64_AAPCS_SwiftTail - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>; - -// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since -// 'this' and the pointer return value are both passed in X0 in these cases, -// this can be partially modelled by treating X0 as a callee-saved register; -// only the resulting RegMask is used; the SaveList is ignored -// -// (For generic ARM 64-bit ABI code, clang will not generate constructors or -// destructors with 'this' returns, so this RegMask will not be used in that -// case) -def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; - -def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; - -// The ELF stub used for TLS-descriptor access saves every feasible -// register. Only X0 and LR are clobbered. -def CSR_AArch64_TLS_ELF - : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, - (sequence "Q%u", 0, 31))>; - -def CSR_AArch64_AllRegs - : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, - (sequence "X%u", 0, 28), FP, LR, SP, - (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), - (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), - (sequence "Q%u", 0, 31))>; - -def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; - -def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, - (sequence "X%u", 9, 15))>; - -def CSR_AArch64_StackProbe_Windows - : CalleeSavedRegs<(add (sequence "X%u", 0, 15), - (sequence "X%u", 18, 28), FP, SP, - (sequence "Q%u", 0, 31))>; - -// Darwin variants of AAPCS. -// Darwin puts the frame-record at the top of the callee-save area. -def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, - D8, D9, D10, D11, - D12, D13, D14, D15)>; - -def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, - X22, X23, X24, X25, X26, X27, - X28, (sequence "Q%u", 8, 23))>; -def CSR_Darwin_AArch64_AAPCS_ThisReturn - : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>; - -def CSR_Darwin_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; - -def CSR_Darwin_AArch64_AAPCS_SwiftTail - : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>; - -// The function used by Darwin to obtain the address of a thread-local variable -// guarantees more than a normal AAPCS function. x16 and x17 are used on the -// fast path for calculation, but other registers except X0 (argument/return) -// and LR (it is a call, after all) are preserved. -def CSR_Darwin_AArch64_TLS - : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), - FP, - (sequence "Q%u", 0, 31))>; - -// We can only handle a register pair with adjacent registers, the register pair -// should belong to the same class as well. Since the access function on the -// fast path calls a function that follows CSR_Darwin_AArch64_TLS, -// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS. -def CSR_Darwin_AArch64_CXX_TLS - : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, - (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19), - (sequence "D%u", 0, 31))>; - -// CSRs that are handled by prologue, epilogue. -def CSR_Darwin_AArch64_CXX_TLS_PE - : CalleeSavedRegs<(add LR, FP)>; - -// CSRs that are handled explicitly via copies. -def CSR_Darwin_AArch64_CXX_TLS_ViaCopy - : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>; - -def CSR_Darwin_AArch64_RT_MostRegs - : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>; - -// Variants of the standard calling conventions for shadow call stack. -// These all preserve x18 in addition to any other registers. -def CSR_AArch64_NoRegs_SCS - : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; -def CSR_AArch64_AllRegs_SCS - : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; -def CSR_AArch64_AAPCS_SwiftError_SCS - : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; -def CSR_AArch64_RT_MostRegs_SCS - : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; -def CSR_AArch64_AAVPCS_SCS - : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>; -def CSR_AArch64_SVE_AAPCS_SCS - : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>; -def CSR_AArch64_AAPCS_SCS - : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/suite/synctools/tablegen/AArch64/AArch64Combine.td b/suite/synctools/tablegen/AArch64/AArch64Combine.td deleted file mode 100644 index 1994e0eb7f..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64Combine.td +++ /dev/null @@ -1,233 +0,0 @@ -//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -include "llvm/Target/GlobalISel/Combine.td" - -def fconstant_to_constant : GICombineRule< - (defs root:$root), - (match (wip_match_opcode G_FCONSTANT):$root, - [{ return matchFConstantToConstant(*${root}, MRI); }]), - (apply [{ applyFConstantToConstant(*${root}); }])>; - -def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">; -def icmp_redundant_trunc : GICombineRule< - (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo), - (match (wip_match_opcode G_ICMP):$root, - [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]), - (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>; - -// AArch64-specific offset folding for G_GLOBAL_VALUE. -def fold_global_offset_matchdata : GIDefMatchData<"std::pair">; -def fold_global_offset : GICombineRule< - (defs root:$root, fold_global_offset_matchdata:$matchinfo), - (match (wip_match_opcode G_GLOBAL_VALUE):$root, - [{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]), - (apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}]) ->; - -def AArch64PreLegalizerCombinerHelper: GICombinerHelper< - "AArch64GenPreLegalizerCombinerHelper", [all_combines, - fconstant_to_constant, - icmp_redundant_trunc, - fold_global_offset]> { - let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; - let StateClass = "AArch64PreLegalizerCombinerHelperState"; - let AdditionalArguments = []; -} - -def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper< - "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> { - let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule"; - let StateClass = "AArch64O0PreLegalizerCombinerHelperState"; - let AdditionalArguments = []; -} - -// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a -// target-specific opcode. -def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">; - -def rev : GICombineRule< - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchREV(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) ->; - -def zip : GICombineRule< - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchZip(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) ->; - -def uzp : GICombineRule< - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) ->; - -def dup: GICombineRule < - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchDup(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) ->; - -def trn : GICombineRule< - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) ->; - -def ext: GICombineRule < - (defs root:$root, shuffle_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyEXT(*${root}, ${matchinfo}); }]) ->; - -def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple">; -def shuf_to_ins: GICombineRule < - (defs root:$root, shuf_to_ins_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchINS(*${root}, MRI, ${matchinfo}); }]), - (apply [{ return applyINS(*${root}, MRI, B, ${matchinfo}); }]) ->; - -def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">; -def vashr_vlshr_imm : GICombineRule< - (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo), - (match (wip_match_opcode G_ASHR, G_LSHR):$root, - [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]) ->; - -def form_duplane_matchdata : - GIDefMatchData<"std::pair">; -def form_duplane : GICombineRule < - (defs root:$root, form_duplane_matchdata:$matchinfo), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, - [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }]) ->; - -def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, - form_duplane, - shuf_to_ins]>; - -def adjust_icmp_imm_matchdata : - GIDefMatchData<"std::pair">; -def adjust_icmp_imm : GICombineRule < - (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo), - (match (wip_match_opcode G_ICMP):$root, - [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }]) ->; - -def swap_icmp_operands : GICombineRule < - (defs root:$root), - (match (wip_match_opcode G_ICMP):$root, - [{ return trySwapICmpOperands(*${root}, MRI); }]), - (apply [{ applySwapICmpOperands(*${root}, Observer); }]) ->; - -def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>; - -def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple">; -def extractvecelt_pairwise_add : GICombineRule< - (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo), - (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, - [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }]) ->; - -def mul_const_matchdata : GIDefMatchData<"std::function">; -def mul_const : GICombineRule< - (defs root:$root, mul_const_matchdata:$matchinfo), - (match (wip_match_opcode G_MUL):$root, - [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }]) ->; - -def build_vector_to_dup : GICombineRule< - (defs root:$root), - (match (wip_match_opcode G_BUILD_VECTOR):$root, - [{ return matchBuildVectorToDup(*${root}, MRI); }]), - (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }]) ->; - -def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>; - -def lower_vector_fcmp : GICombineRule< - (defs root:$root), - (match (wip_match_opcode G_FCMP):$root, - [{ return lowerVectorFCMP(*${root}, MRI, B); }]), - (apply [{}])>; - -def form_truncstore_matchdata : GIDefMatchData<"Register">; -def form_truncstore : GICombineRule< - (defs root:$root, form_truncstore_matchdata:$matchinfo), - (match (wip_match_opcode G_STORE):$root, - [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }]) ->; - -def fold_merge_to_zext : GICombineRule< - (defs root:$d), - (match (wip_match_opcode G_MERGE_VALUES):$d, - [{ return matchFoldMergeToZext(*${d}, MRI); }]), - (apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }]) ->; - -def mutate_anyext_to_zext : GICombineRule< - (defs root:$d), - (match (wip_match_opcode G_ANYEXT):$d, - [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]), - (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }]) ->; - -def split_store_zero_128 : GICombineRule< - (defs root:$d), - (match (wip_match_opcode G_STORE):$d, - [{ return matchSplitStoreZero128(*${d}, MRI); }]), - (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }]) ->; - -// Post-legalization combines which should happen at all optimization levels. -// (E.g. ones that facilitate matching for the selector) For example, matching -// pseudos. -def AArch64PostLegalizerLoweringHelper - : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", - [shuffle_vector_lowering, vashr_vlshr_imm, - icmp_lowering, build_vector_lowering, - lower_vector_fcmp, form_truncstore]> { - let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; -} - -// Post-legalization combines which are primarily optimizations. -def AArch64PostLegalizerCombinerHelper - : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", - [copy_prop, erase_undef_store, combines_for_extload, - sext_trunc_sextload, mutate_anyext_to_zext, - hoist_logic_op_with_same_opcode_hands, - redundant_and, xor_of_and_with_same_reg, - extractvecelt_pairwise_add, redundant_or, - mul_const, redundant_sext_inreg, - form_bitfield_extract, rotate_out_of_range, - icmp_to_true_false_known_bits, merge_unmerge, - select_combines, fold_merge_to_zext, - constant_fold, identity_combines, - ptr_add_immed_chain, overlapping_and, - split_store_zero_128]> { - let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; -} diff --git a/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def b/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def deleted file mode 100644 index 87aef1dfe8..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def +++ /dev/null @@ -1,275 +0,0 @@ -//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file defines all the static objects used by AArch64RegisterBankInfo. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -namespace llvm { -RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{ - /* StartIdx, Length, RegBank */ - // 0: FPR 16-bit value. - {0, 16, AArch64::FPRRegBank}, - // 1: FPR 32-bit value. - {0, 32, AArch64::FPRRegBank}, - // 2: FPR 64-bit value. - {0, 64, AArch64::FPRRegBank}, - // 3: FPR 128-bit value. - {0, 128, AArch64::FPRRegBank}, - // 4: FPR 256-bit value. - {0, 256, AArch64::FPRRegBank}, - // 5: FPR 512-bit value. - {0, 512, AArch64::FPRRegBank}, - // 6: GPR 32-bit value. - {0, 32, AArch64::GPRRegBank}, - // 7: GPR 64-bit value. - {0, 64, AArch64::GPRRegBank}, - // 8: GPR 128-bit value. - {0, 128, AArch64::GPRRegBank}, -}; - -// ValueMappings. -RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{ - /* BreakDown, NumBreakDowns */ - // 0: invalid - {nullptr, 0}, - // 3-operands instructions (all binary operations should end up with one of - // those mapping). - // 1: FPR 16-bit value. <-- This must match First3OpsIdx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 4: FPR 32-bit value. <-- This must match First3OpsIdx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 7: FPR 64-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 10: FPR 128-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, - // 13: FPR 256-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, - // 16: FPR 512-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, - // 19: GPR 32-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 22: GPR 64-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 25: GPR 128-bit value. <-- This must match Last3OpsIdx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, - // Cross register bank copies. - // 28: FPR 16-bit value to GPR 16-bit. <-- This must match - // FirstCrossRegCpyIdx. - // Note: This is the kind of copy we see with physical registers. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 30: FPR 32-bit value to GPR 32-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 32: FPR 64-bit value to GPR 64-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 34: FPR 128-bit value to GPR 128-bit value (invalid) - {nullptr, 1}, - {nullptr, 1}, - // 36: FPR 256-bit value to GPR 256-bit value (invalid) - {nullptr, 1}, - {nullptr, 1}, - // 38: FPR 512-bit value to GPR 512-bit value (invalid) - {nullptr, 1}, - {nullptr, 1}, - // 40: GPR 32-bit value to FPR 32-bit value. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match - // LastCrossRegCpyIdx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 52: Shift scalar with 64 bit shift imm - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, -}; - -bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx, - unsigned ValStartIdx, - unsigned ValLength, - const RegisterBank &RB) { - const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min]; - return Map.StartIdx == ValStartIdx && Map.Length == ValLength && - Map.RegBank == &RB; -} - -bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx, - unsigned FirstInBank, - unsigned Size, - unsigned Offset) { - unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min; - const ValueMapping &Map = - AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset]; - return Map.BreakDown == &PartMappings[PartialMapBaseIdx] && - Map.NumBreakDowns == 1; -} - -bool AArch64GenRegisterBankInfo::checkPartialMappingIdx( - PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias, - ArrayRef Order) { - if (Order.front() != FirstAlias) - return false; - if (Order.back() != LastAlias) - return false; - if (Order.front() > Order.back()) - return false; - - PartialMappingIdx Previous = Order.front(); - bool First = true; - for (const auto &Current : Order) { - if (First) { - First = false; - continue; - } - if (Previous + 1 != Current) - return false; - Previous = Current; - } - return true; -} - -unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, - unsigned Size) { - if (RBIdx == PMI_FirstGPR) { - if (Size <= 32) - return 0; - if (Size <= 64) - return 1; - if (Size <= 128) - return 2; - return -1; - } - if (RBIdx == PMI_FirstFPR) { - if (Size <= 16) - return 0; - if (Size <= 32) - return 1; - if (Size <= 64) - return 2; - if (Size <= 128) - return 3; - if (Size <= 256) - return 4; - if (Size <= 512) - return 5; - return -1; - } - return -1; -} - -const RegisterBankInfo::ValueMapping * -AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx, - unsigned Size) { - assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that"); - unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size); - if (BaseIdxOffset == -1u) - return &ValMappings[InvalidIdx]; - - unsigned ValMappingIdx = - First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) * - ValueMappingIdx::DistanceBetweenRegBanks; - assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx && - "Mapping out of bound"); - - return &ValMappings[ValMappingIdx]; -} - -AArch64GenRegisterBankInfo::PartialMappingIdx - AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{ - PMI_None, // CCR - PMI_FirstFPR, // FPR - PMI_FirstGPR, // GPR - }; - -const RegisterBankInfo::ValueMapping * -AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID, - unsigned SrcBankID, unsigned Size) { - assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID"); - assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID"); - PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID]; - PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID]; - assert(DstRBIdx != PMI_None && "No such mapping"); - assert(SrcRBIdx != PMI_None && "No such mapping"); - - if (DstRBIdx == SrcRBIdx) - return getValueMapping(DstRBIdx, Size); - - assert(Size <= 64 && "GPR cannot handle that size"); - unsigned ValMappingIdx = - FirstCrossRegCpyIdx + - (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) * - ValueMappingIdx::DistanceBetweenCrossRegCpy; - assert(ValMappingIdx >= FirstCrossRegCpyIdx && - ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound"); - return &ValMappings[ValMappingIdx]; -} - -const RegisterBankInfo::ValueMapping * -AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize, - unsigned SrcSize) { - // We support: - // - For Scalar: - // - 16 to 32. - // - 16 to 64. - // - 32 to 64. - // => FPR 16 to FPR 32|64 - // => FPR 32 to FPR 64 - // - For vectors: - // - v4f16 to v4f32 - // - v2f32 to v2f64 - // => FPR 64 to FPR 128 - - // Check that we have been asked sensible sizes. - if (SrcSize == 16) { - assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension"); - if (DstSize == 32) - return &ValMappings[FPExt16To32Idx]; - return &ValMappings[FPExt16To64Idx]; - } - - if (SrcSize == 32) { - assert(DstSize == 64 && "Unexpected float extension"); - return &ValMappings[FPExt32To64Idx]; - } - assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension"); - return &ValMappings[FPExt64To128Idx]; -} -} // End llvm namespace. diff --git a/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td b/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td deleted file mode 100644 index b220929514..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td +++ /dev/null @@ -1,521 +0,0 @@ -//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Atomic operand code-gen constructs. -// -//===----------------------------------------------------------------------===// - -//===---------------------------------- -// Atomic fences -//===---------------------------------- -let AddedComplexity = 15, Size = 0 in -def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), - [(atomic_fence timm:$ordering, 0)]>, Sched<[]>; -def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>; -def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; - -//===---------------------------------- -// Atomic loads -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A atomic load operation that actually needs acquire semantics. -class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquireOrStronger = 1; -} - -// An atomic load operation that does not need either acquire or release -// semantics. -class relaxed_load - : PatFrag<(ops node:$ptr), (base node:$ptr)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquireOrStronger = 0; -} - -// 8-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend8:$offset)), - (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; -def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend8:$offset)), - (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; -def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)), - (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(relaxed_load - (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), - (LDURBBi GPR64sp:$Rn, simm9:$offset)>; - -// 16-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend)), - (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; -def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), - (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; -def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)), - (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(relaxed_load - (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), - (LDURHHi GPR64sp:$Rn, simm9:$offset)>; - -// 32-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend32:$extend)), - (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; -def : Pat<(relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend32:$extend)), - (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; -def : Pat<(relaxed_load (am_indexed32 GPR64sp:$Rn, - uimm12s4:$offset)), - (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; -def : Pat<(relaxed_load - (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), - (LDURWi GPR64sp:$Rn, simm9:$offset)>; - -// 64-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend64:$extend)), - (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; -def : Pat<(relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend64:$extend)), - (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; -def : Pat<(relaxed_load (am_indexed64 GPR64sp:$Rn, - uimm12s8:$offset)), - (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(relaxed_load - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (LDURXi GPR64sp:$Rn, simm9:$offset)>; - -// FP 32-bit loads -def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend32:$extend))))), - (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; -def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend32:$extend))))), - (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; -def : Pat<(f32 (bitconvert (i32 (relaxed_load (am_indexed32 GPR64sp:$Rn, - uimm12s8:$offset))))), - (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(f32 (bitconvert (i32 (relaxed_load - (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), - (LDURSi GPR64sp:$Rn, simm9:$offset)>; - -// FP 64-bit loads -def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend64:$extend))))), - (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; -def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend64:$extend))))), - (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; -def : Pat<(f64 (bitconvert (i64 (relaxed_load (am_indexed64 GPR64sp:$Rn, - uimm12s8:$offset))))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(f64 (bitconvert (i64 (relaxed_load - (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - -//===---------------------------------- -// Atomic stores -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A store operation that actually needs release semantics. -class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingReleaseOrStronger = 1; -} - -// An atomic store operation that doesn't actually need to be atomic on AArch64. -class relaxed_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingReleaseOrStronger = 0; -} - -// 8-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRB GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store - (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), - GPR32:$val), - (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; -def : Pat<(relaxed_store - (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), - GPR32:$val), - (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; -def : Pat<(relaxed_store - (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), - (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(relaxed_store - (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), - (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; - -// 16-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRH GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend), - GPR32:$val), - (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; -def : Pat<(relaxed_store (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend), - GPR32:$val), - (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; -def : Pat<(relaxed_store - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), - (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(relaxed_store - (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), - (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; - -// 32-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRW GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend32:$extend), - GPR32:$val), - (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; -def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend32:$extend), - GPR32:$val), - (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; -def : Pat<(relaxed_store - (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), - (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; -def : Pat<(relaxed_store - (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), - (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; - -// 64-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR64:$val), - (STLRX GPR64:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend), - GPR64:$val), - (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; -def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend), - GPR64:$val), - (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; -def : Pat<(relaxed_store - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), - (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(relaxed_store - (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), - (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; - -// FP 32-bit stores -def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend32:$extend), - (i32 (bitconvert (f32 FPR32Op:$val)))), - (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; -def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend32:$extend), - (i32 (bitconvert (f32 FPR32Op:$val)))), - (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; -def : Pat<(relaxed_store - (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))), - (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>; -def : Pat<(relaxed_store - (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))), - (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>; - -// FP 64-bit stores -def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend64:$extend), - (i64 (bitconvert (f64 FPR64Op:$val)))), - (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; -def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend64:$extend), - (i64 (bitconvert (f64 FPR64Op:$val)))), - (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; -def : Pat<(relaxed_store - (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))), - (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>; -def : Pat<(relaxed_store - (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))), - (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>; - -//===---------------------------------- -// Low-level exclusive operations -//===---------------------------------- - -// Load-exclusives. - -def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; -} - -def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; -} - -def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; -} - -def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; -} - -def : Pat<(ldxr_1 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; -def : Pat<(ldxr_2 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; -def : Pat<(ldxr_4 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; -def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; - -def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; -def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; -def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; - -// Load-exclusives. - -def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; -} - -def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; -} - -def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; -} - -def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; -} - -def : Pat<(ldaxr_1 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; -def : Pat<(ldaxr_2 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; -def : Pat<(ldaxr_4 GPR64sp:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; -def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; - -def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; -def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; -def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; - -// Store-exclusives. - -def stxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; -} - -def stxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; -} - -def stxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; -} - -def stxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; -} - - -def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), - (STXRX GPR64:$val, GPR64sp:$addr)>; - -def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), - (STXRB GPR32:$val, GPR64sp:$addr)>; -def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), - (STXRH GPR32:$val, GPR64sp:$addr)>; -def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), - (STXRW GPR32:$val, GPR64sp:$addr)>; - -def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; - -// Store-release-exclusives. - -def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stlxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; -} - -def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stlxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; -} - -def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stlxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; -} - -def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_aarch64_stlxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]> { - let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; -} - - -def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), - (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), - (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), - (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), - (STLXRX GPR64:$val, GPR64sp:$addr)>; - -def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), - (STLXRB GPR32:$val, GPR64sp:$addr)>; -def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), - (STLXRH GPR32:$val, GPR64sp:$addr)>; -def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), - (STLXRW GPR32:$val, GPR64sp:$addr)>; - -def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), - (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), - (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; -def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), - (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; - - -// And clear exclusive. - -def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; - -//===---------------------------------- -// Atomic cmpxchg for -O0 -//===---------------------------------- - -// The fast register allocator used during -O0 inserts spills to cover any VRegs -// live across basic block boundaries. When this happens between an LDXR and an -// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to -// fail. - -// Unfortunately, this means we have to have an alternative (expanded -// post-regalloc) path for -O0 compilations. Fortunately this path can be -// significantly more naive than the standard expansion: we conservatively -// assume seq_cst, strong cmpxchg and omit clrex on failure. - -let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch", - mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), - (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, - Sched<[WriteAtomic]>; - -def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), - (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, - Sched<[WriteAtomic]>; - -def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), - (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, - Sched<[WriteAtomic]>; - -def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), - (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>, - Sched<[WriteAtomic]>; -} - -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", - mayLoad = 1, mayStore = 1 in { -class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch), - (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, - GPR64:$newLo, GPR64:$newHi), []>, - Sched<[WriteAtomic]>; -def CMP_SWAP_128 : cmp_swap_128; -def CMP_SWAP_128_RELEASE : cmp_swap_128; -def CMP_SWAP_128_ACQUIRE : cmp_swap_128; -def CMP_SWAP_128_MONOTONIC : cmp_swap_128; -} - -// v8.1 Atomic instructions: -let Predicates = [HasLSE] in { - defm : LDOPregister_patterns<"LDADD", "atomic_load_add">; - defm : LDOPregister_patterns<"LDSET", "atomic_load_or">; - defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">; - defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">; - defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">; - defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">; - defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; - defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; - defm : LDOPregister_patterns<"SWP", "atomic_swap">; - defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; - - // These two patterns are only needed for global isel, selection dag isel - // converts atomic load-sub into a sub and atomic load-add, and likewise for - // and -> clr. - defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; - defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; -} diff --git a/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td b/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td deleted file mode 100644 index 4c1e41b7ef..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td +++ /dev/null @@ -1,11504 +0,0 @@ -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Describe AArch64 instructions format here -// - -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -class Format val> { - bits<2> Value = val; -} - -def PseudoFrm : Format<0>; -def NormalFrm : Format<1>; // Do we need any others? - -// Enum describing whether an instruction is -// destructive in its first source operand. -class DestructiveInstTypeEnum val> { - bits<4> Value = val; -} -def NotDestructive : DestructiveInstTypeEnum<0>; -// Destructive in its first operand and can be MOVPRFX'd, but has no other -// special properties. -def DestructiveOther : DestructiveInstTypeEnum<1>; -def DestructiveUnary : DestructiveInstTypeEnum<2>; -def DestructiveBinaryImm : DestructiveInstTypeEnum<3>; -def DestructiveBinaryShImmUnpred : DestructiveInstTypeEnum<4>; -def DestructiveBinary : DestructiveInstTypeEnum<5>; -def DestructiveBinaryComm : DestructiveInstTypeEnum<6>; -def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>; -def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>; -def DestructiveUnaryPassthru : DestructiveInstTypeEnum<9>; - -class FalseLanesEnum val> { - bits<2> Value = val; -} -def FalseLanesNone : FalseLanesEnum<0>; -def FalseLanesZero : FalseLanesEnum<1>; -def FalseLanesUndef : FalseLanesEnum<2>; - -// AArch64 Instruction Format -class AArch64Inst : Instruction { - field bits<32> Inst; // Instruction encoding. - // Mask of bits that cause an encoding to be UNPREDICTABLE. - // If a bit is set, then if the corresponding bit in the - // target encoding differs from its value in the "Inst" field, - // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; - let Namespace = "AArch64"; - Format F = f; - bits<2> Form = F.Value; - - // Defaults - bit isWhile = 0; - bit isPTestLike = 0; - FalseLanesEnum FalseLanes = FalseLanesNone; - DestructiveInstTypeEnum DestructiveInstType = NotDestructive; - ElementSizeEnum ElementSize = ElementSizeNone; - - let TSFlags{10} = isPTestLike; - let TSFlags{9} = isWhile; - let TSFlags{8-7} = FalseLanes.Value; - let TSFlags{6-3} = DestructiveInstType.Value; - let TSFlags{2-0} = ElementSize.Value; - - let Pattern = []; - let Constraints = cstr; -} - -class InstSubst - : InstAlias, Requires<[UseNegativeImmediates]>; - -// Pseudo instructions (don't have encoding information) -class Pseudo pattern, string cstr = ""> - : AArch64Inst { - dag OutOperandList = oops; - dag InOperandList = iops; - let Pattern = pattern; - let isCodeGenOnly = 1; - let isPseudo = 1; -} - -// Real instructions (have encoding information) -class EncodedI pattern> : AArch64Inst { - let Pattern = pattern; - let Size = 4; -} - -// Normal instructions -class I pattern> - : EncodedI { - dag OutOperandList = oops; - dag InOperandList = iops; - let AsmString = !strconcat(asm, operands); -} - -class TriOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; -class UnOpFrag : PatFrag<(ops node:$LHS), res>; - -// Helper fragment for an extract of the high portion of a 128-bit vector. -def extract_high_v16i8 : - UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; -def extract_high_v8i16 : - UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; -def extract_high_v4i32 : - UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; -def extract_high_v2i64 : - UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; - -//===----------------------------------------------------------------------===// -// Asm Operand Classes. -// - -// Shifter operand for arithmetic shifted encodings. -def ShifterOperand : AsmOperandClass { - let Name = "Shifter"; -} - -// Shifter operand for mov immediate encodings. -def MovImm32ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm32Shifter"; - let RenderMethod = "addShifterOperands"; - let DiagnosticType = "InvalidMovImm32Shift"; -} -def MovImm64ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm64Shifter"; - let RenderMethod = "addShifterOperands"; - let DiagnosticType = "InvalidMovImm64Shift"; -} - -// Shifter operand for arithmetic register shifted encodings. -class ArithmeticShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "ArithmeticShifter" # width; - let PredicateMethod = "isArithmeticShifter<" # width # ">"; - let RenderMethod = "addShifterOperands"; - let DiagnosticType = "AddSubRegShift" # width; -} - -def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>; -def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>; - -// Shifter operand for logical register shifted encodings. -class LogicalShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "LogicalShifter" # width; - let PredicateMethod = "isLogicalShifter<" # width # ">"; - let RenderMethod = "addShifterOperands"; - let DiagnosticType = "AddSubRegShift" # width; -} - -def LogicalShifterOperand32 : LogicalShifterOperand<32>; -def LogicalShifterOperand64 : LogicalShifterOperand<64>; - -// Shifter operand for logical vector 128/64-bit shifted encodings. -def LogicalVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "LogicalVecShifter"; - let RenderMethod = "addShifterOperands"; -} -def LogicalVecHalfWordShifterOperand : AsmOperandClass { - let SuperClasses = [LogicalVecShifterOperand]; - let Name = "LogicalVecHalfWordShifter"; - let RenderMethod = "addShifterOperands"; -} - -// The "MSL" shifter on the vector MOVI instruction. -def MoveVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MoveVecShifter"; - let RenderMethod = "addShifterOperands"; -} - -// Extend operand for arithmetic encodings. -def ExtendOperand : AsmOperandClass { - let Name = "Extend"; - let DiagnosticType = "AddSubRegExtendLarge"; -} -def ExtendOperand64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "Extend64"; - let DiagnosticType = "AddSubRegExtendSmall"; -} -// 'extend' that's a lsl of a 64-bit register. -def ExtendOperandLSL64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "ExtendLSL64"; - let RenderMethod = "addExtend64Operands"; - let DiagnosticType = "AddSubRegExtendLarge"; -} - -// 8-bit floating-point immediate encodings. -def FPImmOperand : AsmOperandClass { - let Name = "FPImm"; - let ParserMethod = "tryParseFPImm"; - let DiagnosticType = "InvalidFPImm"; -} - -def CondCode : AsmOperandClass { - let Name = "CondCode"; - let DiagnosticType = "InvalidCondCode"; -} - -// A 32-bit register pasrsed as 64-bit -def GPR32as64Operand : AsmOperandClass { - let Name = "GPR32as64"; - let ParserMethod = - "tryParseGPROperand"; -} -def GPR32as64 : RegisterOperand { - let ParserMatchClass = GPR32as64Operand; -} - -// A 64-bit register pasrsed as 32-bit -def GPR64as32Operand : AsmOperandClass { - let Name = "GPR64as32"; - let ParserMethod = - "tryParseGPROperand"; -} -def GPR64as32 : RegisterOperand { - let ParserMatchClass = GPR64as32Operand; -} - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } - -class UImmScaledMemoryIndexed : AsmOperandClass { - let Name = "UImm" # Width # "s" # Scale; - let DiagnosticType = "InvalidMemoryIndexed" # Scale # "UImm" # Width; - let RenderMethod = "addImmScaledOperands<" # Scale # ">"; - let PredicateMethod = "isUImmScaled<" # Width # ", " # Scale # ">"; -} - -class SImmScaledMemoryIndexed : AsmOperandClass { - let Name = "SImm" # Width # "s" # Scale; - let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm" # Width; - let RenderMethod = "addImmScaledOperands<" # Scale # ">"; - let PredicateMethod = "isSImmScaled<" # Width # ", " # Scale # ">"; -} - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// ADR[P] instruction labels. -def AdrpOperand : AsmOperandClass { - let Name = "AdrpLabel"; - let ParserMethod = "tryParseAdrpLabel"; - let DiagnosticType = "InvalidLabel"; -} -def adrplabel : Operand { - let EncoderMethod = "getAdrLabelOpValue"; - let PrintMethod = "printAdrpLabel"; - let ParserMatchClass = AdrpOperand; - let OperandType = "OPERAND_PCREL"; -} - -def AdrOperand : AsmOperandClass { - let Name = "AdrLabel"; - let ParserMethod = "tryParseAdrLabel"; - let DiagnosticType = "InvalidLabel"; -} -def adrlabel : Operand { - let EncoderMethod = "getAdrLabelOpValue"; - let ParserMatchClass = AdrOperand; -} - -class SImmOperand : AsmOperandClass { - let Name = "SImm" # width; - let DiagnosticType = "InvalidMemoryIndexedSImm" # width; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isSImm<" # width # ">"; -} - - -class AsmImmRange : AsmOperandClass { - let Name = "Imm" # Low # "_" # High; - let DiagnosticType = "InvalidImm" # Low # "_" # High; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isImmInRange<" # Low # "," # High # ">"; -} - -// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets. -def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>; -def simm10Scaled : Operand { - let ParserMatchClass = SImm10s8Operand; - let DecoderMethod = "DecodeSImm<10>"; - let PrintMethod = "printImmScale<8>"; -} - -def simm9s16 : Operand { - let ParserMatchClass = SImmScaledMemoryIndexed<9, 16>; - let DecoderMethod = "DecodeSImm<9>"; - let PrintMethod = "printImmScale<16>"; -} - -// uimm6 predicate - True if the immediate is in the range [0, 63]. -def UImm6Operand : AsmOperandClass { - let Name = "UImm6"; - let DiagnosticType = "InvalidImm0_63"; -} - -def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { - let ParserMatchClass = UImm6Operand; -} - -def uimm16 : Operand, ImmLeaf= 0 && Imm < 65536;}]>{ - let ParserMatchClass = AsmImmRange<0, 65535>; -} - -def SImm9Operand : SImmOperand<9>; -def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { - let ParserMatchClass = SImm9Operand; - let DecoderMethod = "DecodeSImm<9>"; -} - -def SImm8Operand : SImmOperand<8>; -def simm8 : Operand, ImmLeaf= -128 && Imm < 128; }]> { - let ParserMatchClass = SImm8Operand; - let DecoderMethod = "DecodeSImm<8>"; -} - -def SImm6Operand : SImmOperand<6>; -def simm6_32b : Operand, ImmLeaf= -32 && Imm < 32; }]> { - let ParserMatchClass = SImm6Operand; - let DecoderMethod = "DecodeSImm<6>"; -} - -def SImm5Operand : SImmOperand<5>; -def simm5_64b : Operand, ImmLeaf= -16 && Imm < 16; }]> { - let ParserMatchClass = SImm5Operand; - let DecoderMethod = "DecodeSImm<5>"; -} - -def simm5_32b : Operand, ImmLeaf= -16 && Imm < 16; }]> { - let ParserMatchClass = SImm5Operand; - let DecoderMethod = "DecodeSImm<5>"; -} - -def simm5_8b : Operand, ImmLeaf= -16 && (int8_t)Imm < 16; }]> { - let ParserMatchClass = SImm5Operand; - let DecoderMethod = "DecodeSImm<5>"; - let PrintMethod = "printSImm<8>"; -} - -def simm5_16b : Operand, ImmLeaf= -16 && (int16_t)Imm < 16; }]> { - let ParserMatchClass = SImm5Operand; - let DecoderMethod = "DecodeSImm<5>"; - let PrintMethod = "printSImm<16>"; -} - -// simm7sN predicate - True if the immediate is a multiple of N in the range -// [-64 * N, 63 * N]. - -def SImm7s4Operand : SImmScaledMemoryIndexed<7, 4>; -def SImm7s8Operand : SImmScaledMemoryIndexed<7, 8>; -def SImm7s16Operand : SImmScaledMemoryIndexed<7, 16>; - -def simm7s4 : Operand { - let ParserMatchClass = SImm7s4Operand; - let PrintMethod = "printImmScale<4>"; -} - -def simm7s8 : Operand { - let ParserMatchClass = SImm7s8Operand; - let PrintMethod = "printImmScale<8>"; -} - -def simm7s16 : Operand { - let ParserMatchClass = SImm7s16Operand; - let PrintMethod = "printImmScale<16>"; -} - -def am_sve_fi : ComplexPattern; - -def am_indexed7s8 : ComplexPattern; -def am_indexed7s16 : ComplexPattern; -def am_indexed7s32 : ComplexPattern; -def am_indexed7s64 : ComplexPattern; -def am_indexed7s128 : ComplexPattern; - -def am_indexedu6s128 : ComplexPattern; -def am_indexeds9s128 : ComplexPattern; - -def UImmS1XForm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); -}]>; -def UImmS2XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64); -}]>; -def UImmS4XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 4, SDLoc(N), MVT::i64); -}]>; -def UImmS8XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64); -}]>; - -// uimm5sN predicate - True if the immediate is a multiple of N in the range -// [0 * N, 32 * N]. -def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; -def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>; -def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>; - -def uimm5s2 : Operand, ImmLeaf= 0 && Imm < (32*2) && ((Imm % 2) == 0); }], - UImmS2XForm> { - let ParserMatchClass = UImm5s2Operand; - let PrintMethod = "printImmScale<2>"; -} -def uimm5s4 : Operand, ImmLeaf= 0 && Imm < (32*4) && ((Imm % 4) == 0); }], - UImmS4XForm> { - let ParserMatchClass = UImm5s4Operand; - let PrintMethod = "printImmScale<4>"; -} -def uimm5s8 : Operand, ImmLeaf= 0 && Imm < (32*8) && ((Imm % 8) == 0); }], - UImmS8XForm> { - let ParserMatchClass = UImm5s8Operand; - let PrintMethod = "printImmScale<8>"; -} - -// tuimm5sN predicate - similiar to uimm5sN, but use TImmLeaf (TargetConstant) -// instead of ImmLeaf (Constant) -def tuimm5s2 : Operand, TImmLeaf= 0 && Imm < (32*2) && ((Imm % 2) == 0); }], - UImmS2XForm> { - let ParserMatchClass = UImm5s2Operand; - let PrintMethod = "printImmScale<2>"; -} -def tuimm5s4 : Operand, TImmLeaf= 0 && Imm < (32*4) && ((Imm % 4) == 0); }], - UImmS4XForm> { - let ParserMatchClass = UImm5s4Operand; - let PrintMethod = "printImmScale<4>"; -} -def tuimm5s8 : Operand, TImmLeaf= 0 && Imm < (32*8) && ((Imm % 8) == 0); }], - UImmS8XForm> { - let ParserMatchClass = UImm5s8Operand; - let PrintMethod = "printImmScale<8>"; -} - -// uimm6sN predicate - True if the immediate is a multiple of N in the range -// [0 * N, 64 * N]. -def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>; -def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>; -def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>; -def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>; -def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>; - -def uimm6s1 : Operand, ImmLeaf= 0 && Imm < 64; }]> { - let ParserMatchClass = UImm6s1Operand; -} -def uimm6s2 : Operand, ImmLeaf= 0 && Imm < (64*2) && ((Imm % 2) == 0); }]> { - let PrintMethod = "printImmScale<2>"; - let ParserMatchClass = UImm6s2Operand; -} -def uimm6s4 : Operand, ImmLeaf= 0 && Imm < (64*4) && ((Imm % 4) == 0); }]> { - let PrintMethod = "printImmScale<4>"; - let ParserMatchClass = UImm6s4Operand; -} -def uimm6s8 : Operand, ImmLeaf= 0 && Imm < (64*8) && ((Imm % 8) == 0); }]> { - let PrintMethod = "printImmScale<8>"; - let ParserMatchClass = UImm6s8Operand; -} -def uimm6s16 : Operand, ImmLeaf= 0 && Imm < (64*16) && ((Imm % 16) == 0); }]> { - let PrintMethod = "printImmScale<16>"; - let ParserMatchClass = UImm6s16Operand; -} - -def SImmS2XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 2, SDLoc(N), MVT::i64); -}]>; -def SImmS3XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 3, SDLoc(N), MVT::i64); -}]>; -def SImmS4XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 4, SDLoc(N), MVT::i64); -}]>; -def SImmS16XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 16, SDLoc(N), MVT::i64); -}]>; -def SImmS32XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 32, SDLoc(N), MVT::i64); -}]>; - -// simm6sN predicate - True if the immediate is a multiple of N in the range -// [-32 * N, 31 * N]. -def SImm6s1Operand : SImmScaledMemoryIndexed<6, 1>; -def simm6s1 : Operand, ImmLeaf= -32 && Imm < 32; }]> { - let ParserMatchClass = SImm6s1Operand; - let DecoderMethod = "DecodeSImm<6>"; -} - -// simm4sN predicate - True if the immediate is a multiple of N in the range -// [ -8* N, 7 * N]. -def SImm4s1Operand : SImmScaledMemoryIndexed<4, 1>; -def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>; -def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>; -def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>; -def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>; -def SImm4s32Operand : SImmScaledMemoryIndexed<4, 32>; - -def simm4s1 : Operand, ImmLeaf=-8 && Imm <= 7; }]> { - let ParserMatchClass = SImm4s1Operand; - let DecoderMethod = "DecodeSImm<4>"; -} - -def simm4s2 : Operand, ImmLeaf=-16 && Imm <= 14 && (Imm % 2) == 0x0; }], SImmS2XForm> { - let PrintMethod = "printImmScale<2>"; - let ParserMatchClass = SImm4s2Operand; - let DecoderMethod = "DecodeSImm<4>"; -} - -def simm4s3 : Operand, ImmLeaf=-24 && Imm <= 21 && (Imm % 3) == 0x0; }], SImmS3XForm> { - let PrintMethod = "printImmScale<3>"; - let ParserMatchClass = SImm4s3Operand; - let DecoderMethod = "DecodeSImm<4>"; -} - -def simm4s4 : Operand, ImmLeaf=-32 && Imm <= 28 && (Imm % 4) == 0x0; }], SImmS4XForm> { - let PrintMethod = "printImmScale<4>"; - let ParserMatchClass = SImm4s4Operand; - let DecoderMethod = "DecodeSImm<4>"; -} -def simm4s16 : Operand, ImmLeaf=-128 && Imm <= 112 && (Imm % 16) == 0x0; }], SImmS16XForm> { - let PrintMethod = "printImmScale<16>"; - let ParserMatchClass = SImm4s16Operand; - let DecoderMethod = "DecodeSImm<4>"; -} -def simm4s32 : Operand, ImmLeaf=-256 && Imm <= 224 && (Imm % 32) == 0x0; }], SImmS32XForm> { - let PrintMethod = "printImmScale<32>"; - let ParserMatchClass = SImm4s32Operand; - let DecoderMethod = "DecodeSImm<4>"; -} - -def Imm1_8Operand : AsmImmRange<1, 8>; -def Imm1_16Operand : AsmImmRange<1, 16>; -def Imm1_32Operand : AsmImmRange<1, 32>; -def Imm1_64Operand : AsmImmRange<1, 64>; - -class BranchTarget : AsmOperandClass { - let Name = "BranchTarget" # N; - let DiagnosticType = "InvalidLabel"; - let PredicateMethod = "isBranchTarget<" # N # ">"; -} - -class PCRelLabel : BranchTarget { - let Name = "PCRelLabel" # N; -} - -def BranchTarget14Operand : BranchTarget<14>; -def BranchTarget26Operand : BranchTarget<26>; -def PCRelLabel19Operand : PCRelLabel<19>; - -def MovWSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovWSymbolG3"; - let RenderMethod = "addImmOperands"; -} - -def movw_symbol_g3 : Operand { - let ParserMatchClass = MovWSymbolG3AsmOperand; -} - -def MovWSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovWSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movw_symbol_g2 : Operand { - let ParserMatchClass = MovWSymbolG2AsmOperand; -} - -def MovWSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovWSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movw_symbol_g1 : Operand { - let ParserMatchClass = MovWSymbolG1AsmOperand; -} - -def MovWSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovWSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movw_symbol_g0 : Operand { - let ParserMatchClass = MovWSymbolG0AsmOperand; -} - -class fixedpoint_i32 - : Operand, - ComplexPattern", [fpimm, ld]> { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm32"; - let ParserMatchClass = Imm1_32Operand; -} - -class fixedpoint_i64 - : Operand, - ComplexPattern", [fpimm, ld]> { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm64"; - let ParserMatchClass = Imm1_64Operand; -} - -def fixedpoint_f16_i32 : fixedpoint_i32; -def fixedpoint_f32_i32 : fixedpoint_i32; -def fixedpoint_f64_i32 : fixedpoint_i32; - -def fixedpoint_f16_i64 : fixedpoint_i64; -def fixedpoint_f32_i64 : fixedpoint_i64; -def fixedpoint_f64_i64 : fixedpoint_i64; - -def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR16 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR16Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR32 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def vecshiftR32Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR64 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} -def vecshiftR64Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; - let ParserMatchClass = Imm1_32Operand; -} - -// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftR8 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def tvecshiftR16 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def tvecshiftR32 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def tvecshiftR64 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} - -def Imm0_0Operand : AsmImmRange<0, 0>; -def Imm0_1Operand : AsmImmRange<0, 1>; -def Imm0_3Operand : AsmImmRange<0, 3>; -def Imm0_7Operand : AsmImmRange<0, 7>; -def Imm0_15Operand : AsmImmRange<0, 15>; -def Imm0_31Operand : AsmImmRange<0, 31>; -def Imm0_63Operand : AsmImmRange<0, 63>; - -def vecshiftL8 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def vecshiftL16 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def vecshiftL32 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def vecshiftL64 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} - -// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftL8 : Operand, TImmLeaf { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def tvecshiftL16 : Operand, TImmLeaf { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def tvecshiftL32 : Operand, TImmLeaf { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def tvecshiftL64 : Operand, TImmLeaf { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} - -// Crazy immediate formats used by 32-bit and 64-bit logical immediate -// instructions for splatting repeating bit patterns across the immediate. -def logical_imm32_XFORM : SDNodeXFormgetZExtValue(), 32); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); -}]>; -def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); -}]>; - -def gi_logical_imm32_XFORM : GICustomOperandRenderer<"renderLogicalImm32">, - GISDNodeXFormEquiv; -def gi_logical_imm64_XFORM : GICustomOperandRenderer<"renderLogicalImm64">, - GISDNodeXFormEquiv; - -let DiagnosticType = "LogicalSecondSource" in { - def LogicalImm32Operand : AsmOperandClass { - let Name = "LogicalImm32"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmOperands"; - } - def LogicalImm64Operand : AsmOperandClass { - let Name = "LogicalImm64"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmOperands"; - } - def LogicalImm32NotOperand : AsmOperandClass { - let Name = "LogicalImm32Not"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmNotOperands"; - } - def LogicalImm64NotOperand : AsmOperandClass { - let Name = "LogicalImm64Not"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmNotOperands"; - } -} -def logical_imm32 : Operand, IntImmLeaf { - let PrintMethod = "printLogicalImm"; - let ParserMatchClass = LogicalImm32Operand; -} -def logical_imm64 : Operand, IntImmLeaf { - let PrintMethod = "printLogicalImm"; - let ParserMatchClass = LogicalImm64Operand; -} -def logical_imm32_not : Operand { - let ParserMatchClass = LogicalImm32NotOperand; -} -def logical_imm64_not : Operand { - let ParserMatchClass = LogicalImm64NotOperand; -} - -// immXX_0_65535 predicates - True if the immediate is in the range [0,65535]. -let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in { -def timm32_0_65535 : Operand, TImmLeaf; - -def timm64_0_65535 : Operand, TImmLeaf; -} - -// imm0_255 predicate - True if the immediate is in the range [0,255]. -def Imm0_255Operand : AsmImmRange<0,255>; - -def imm0_255 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_255Operand; - let PrintMethod = "printImm"; -} - -// imm0_127 predicate - True if the immediate is in the range [0,127] -def Imm0_127Operand : AsmImmRange<0, 127>; -def imm0_127 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_127Operand; - let PrintMethod = "printImm"; -} - -def imm0_127_64b : Operand, ImmLeaf { - let ParserMatchClass = Imm0_127Operand; - let PrintMethod = "printImm"; -} - -// NOTE: These imm0_N operands have to be of type i64 because i64 is the size -// for all shift-amounts. - -// imm0_63 predicate - True if the immediate is in the range [0,63] -def imm0_63 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_63Operand; -} - -def timm0_63 : Operand, TImmLeaf { - let ParserMatchClass = Imm0_63Operand; -} - -// imm0_31 predicate - True if the immediate is in the range [0,31] -def imm0_31 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - -// timm0_31 predicate - same ass imm0_31, but use TargetConstant (TimmLeaf) -// instead of Constant (ImmLeaf) -def timm0_31 : Operand, TImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - -// True if the 32-bit immediate is in the range [0,31] -def imm32_0_31 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - -// imm0_1 predicate - True if the immediate is in the range [0,1] -def imm0_1 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_1Operand; -} - -// timm0_1 - as above, but use TargetConstant (TImmLeaf) -def timm0_1 : Operand, TImmLeaf { - let ParserMatchClass = Imm0_1Operand; -} - -// imm0_15 predicate - True if the immediate is in the range [0,15] -def imm0_15 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; -} - -// imm0_7 predicate - True if the immediate is in the range [0,7] -def imm0_7 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_7Operand; -} - -// imm0_3 predicate - True if the immediate is in the range [0,3] -def imm0_3 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_3Operand; -} - -// timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] -def timm32_0_7 : Operand, TImmLeaf { - let ParserMatchClass = Imm0_7Operand; -} - -// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] -def imm32_0_15 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; -} - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr -// {5-0} - imm6 -class arith_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = !cast( - "ArithmeticShifterOperand" # width); -} - -def arith_shift32 : arith_shift; -def arith_shift64 : arith_shift; - -class arith_shifted_reg - : Operand, - ComplexPattern { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, !cast("arith_shift" # width)); -} - -def arith_shifted_reg32 : arith_shifted_reg; -def arith_shifted_reg64 : arith_shifted_reg; - -def gi_arith_shifted_reg32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_arith_shifted_reg64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror -// {5-0} - imm6 -class logical_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = !cast( - "LogicalShifterOperand" # width); -} - -def logical_shift32 : logical_shift<32>; -def logical_shift64 : logical_shift<64>; - -class logical_shifted_reg - : Operand, - ComplexPattern { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, shiftop); -} - -def logical_shifted_reg32 : logical_shifted_reg; -def logical_shifted_reg64 : logical_shifted_reg; - -def gi_logical_shifted_reg32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_logical_shifted_reg64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -// A logical vector shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0, #8, #16, or #24 -def logical_vec_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecShifterOperand; -} - -// A logical vector half-word shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #8 -def logical_vec_hw_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecHalfWordShifterOperand; -} - -// A vector move shifter operand: -// {0} - imm1: #8 or #16 -def move_vec_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getMoveVecShifterOpValue"; - let ParserMatchClass = MoveVecShifterOperand; -} - -let DiagnosticType = "AddSubSecondSource" in { - def AddSubImmOperand : AsmOperandClass { - let Name = "AddSubImm"; - let ParserMethod = "tryParseImmWithOptionalShift"; - let RenderMethod = "addImmWithOptionalShiftOperands<12>"; - } - def AddSubImmNegOperand : AsmOperandClass { - let Name = "AddSubImmNeg"; - let ParserMethod = "tryParseImmWithOptionalShift"; - let RenderMethod = "addImmNegWithOptionalShiftOperands<12>"; - } -} -// An ADD/SUB immediate shifter operand: -// second operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #12 -class addsub_shifted_imm - : Operand, ComplexPattern { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let ParserMatchClass = AddSubImmOperand; - let MIOperandInfo = (ops i32imm, i32imm); -} - -class addsub_shifted_imm_neg - : Operand { - let EncoderMethod = "getAddSubImmOpValue"; - let ParserMatchClass = AddSubImmNegOperand; - let MIOperandInfo = (ops i32imm, i32imm); -} - -def addsub_shifted_imm32 : addsub_shifted_imm; -def addsub_shifted_imm64 : addsub_shifted_imm; -def addsub_shifted_imm32_neg : addsub_shifted_imm_neg; -def addsub_shifted_imm64_neg : addsub_shifted_imm_neg; - -def gi_addsub_shifted_imm32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_addsub_shifted_imm64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -class neg_addsub_shifted_imm - : Operand, ComplexPattern { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let ParserMatchClass = AddSubImmOperand; - let MIOperandInfo = (ops i32imm, i32imm); -} - -def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; -def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; - -def gi_neg_addsub_shifted_imm32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_neg_addsub_shifted_imm64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -// An extend operand: -// {5-3} - extend type -// {2-0} - imm3 -def arith_extend : Operand { - let PrintMethod = "printArithExtend"; - let ParserMatchClass = ExtendOperand; -} -def arith_extend64 : Operand { - let PrintMethod = "printArithExtend"; - let ParserMatchClass = ExtendOperand64; -} - -// 'extend' that's a lsl of a 64-bit register. -def arith_extendlsl64 : Operand { - let PrintMethod = "printArithExtend"; - let ParserMatchClass = ExtendOperandLSL64; -} - -class arith_extended_reg32 : Operand, - ComplexPattern { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend); -} - -class arith_extended_reg32to64 : Operand, - ComplexPattern { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend64); -} - -def arith_extended_reg32_i32 : arith_extended_reg32; -def gi_arith_extended_reg32_i32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def arith_extended_reg32_i64 : arith_extended_reg32; -def gi_arith_extended_reg32_i64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def arith_extended_reg32to64_i64 : arith_extended_reg32to64; -def gi_arith_extended_reg32to64_i64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -// Floating-point immediate. - -def fpimm16XForm : SDNodeXFormgetValueAPF(); - uint32_t enc = AArch64_AM::getFP16Imm(InVal); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); - }]>; - -def fpimm32XForm : SDNodeXFormgetValueAPF(); - uint32_t enc = AArch64_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); - }]>; - -def fpimm64XForm : SDNodeXFormgetValueAPF(); - uint32_t enc = AArch64_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); - }]>; - -def fpimm16 : Operand, - FPImmLeaf { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm32 : Operand, - FPImmLeaf { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} -def fpimm64 : Operand, - FPImmLeaf { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm8 : Operand { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm0 : FPImmLeaf; - -def fpimm_half : FPImmLeaf; - -def fpimm_one : FPImmLeaf; - -def fpimm_two : FPImmLeaf; - -def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">, - GISDNodeXFormEquiv; -def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">, - GISDNodeXFormEquiv; -def gi_fpimm64 : GICustomOperandRenderer<"renderFPImm64">, - GISDNodeXFormEquiv; - -// Vector lane operands -class AsmVectorIndex : AsmOperandClass { - let Name = NamePrefix # "IndexRange" # Min # "_" # Max; - let DiagnosticType = "Invalid" # Name; - let PredicateMethod = "isVectorIndex<" # Min # ", " # Max # ">"; - let RenderMethod = "addVectorIndexOperands"; -} - -class AsmVectorIndexOpnd - : Operand { - let ParserMatchClass = mc; - let PrintMethod = "printVectorIndex"; -} - -multiclass VectorIndex { - def "" : AsmVectorIndexOpnd, ImmLeaf; - def _timm : AsmVectorIndexOpnd, TImmLeaf; -} - -def VectorIndex0Operand : AsmVectorIndex<0, 0>; -def VectorIndex1Operand : AsmVectorIndex<1, 1>; -def VectorIndexBOperand : AsmVectorIndex<0, 15>; -def VectorIndexHOperand : AsmVectorIndex<0, 7>; -def VectorIndexSOperand : AsmVectorIndex<0, 3>; -def VectorIndexDOperand : AsmVectorIndex<0, 1>; - -defm VectorIndex0 : VectorIndex; -defm VectorIndex1 : VectorIndex; -defm VectorIndexB : VectorIndex; -defm VectorIndexH : VectorIndex; -defm VectorIndexS : VectorIndex; -defm VectorIndexD : VectorIndex; - -defm VectorIndex132b : VectorIndex; -defm VectorIndexB32b : VectorIndex; -defm VectorIndexH32b : VectorIndex; -defm VectorIndexS32b : VectorIndex; -defm VectorIndexD32b : VectorIndex; - -def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; -def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; -def SVEVectorIndexExtDupSOperand : AsmVectorIndex<0, 15, "SVE">; -def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; -def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; - -defm sve_elm_idx_extdup_b - : VectorIndex; -defm sve_elm_idx_extdup_h - : VectorIndex; -defm sve_elm_idx_extdup_s - : VectorIndex; -defm sve_elm_idx_extdup_d - : VectorIndex; -defm sve_elm_idx_extdup_q - : VectorIndex; - -def sme_elm_idx0_0 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_0Operand; - let PrintMethod = "printMatrixIndex"; -} -def sme_elm_idx0_1 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_1Operand; - let PrintMethod = "printMatrixIndex"; -} -def sme_elm_idx0_3 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_3Operand; - let PrintMethod = "printMatrixIndex"; -} -def sme_elm_idx0_7 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_7Operand; - let PrintMethod = "printMatrixIndex"; -} -def sme_elm_idx0_15 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; - let PrintMethod = "printMatrixIndex"; -} - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def simdimmtype10 : Operand, - FPImmLeafgetValueAPF(); - uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); - }]>> { - let ParserMatchClass = SIMDImmType10Operand; - let PrintMethod = "printSIMDType10Operand"; -} - - -//--- -// System management -//--- - -// Base encoding for system instruction operands. -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class BaseSystemI pattern = []> - : I { - let Inst{31-22} = 0b1101010100; - let Inst{21} = L; -} - -// System instructions which do not have an Rt register. -class SimpleSystemI pattern = []> - : BaseSystemI { - let Inst{4-0} = 0b11111; -} - -// System instructions which have an Rt register. -class RtSystemI pattern = []> - : BaseSystemI, - Sched<[WriteSys]> { - bits<5> Rt; - let Inst{4-0} = Rt; -} - -// System instructions for transactional memory extension -class TMBaseSystemI CRm, bits<3> op2, dag oops, dag iops, - string asm, string operands, list pattern> - : BaseSystemI, - Sched<[WriteSys]> { - let Inst{20-12} = 0b000110011; - let Inst{11-8} = CRm; - let Inst{7-5} = op2; - let DecoderMethod = ""; - - let mayLoad = 1; - let mayStore = 1; -} - -// System instructions for transactional memory - single input operand -class TMSystemI CRm, string asm, list pattern> - : TMBaseSystemI<0b1, CRm, 0b011, - (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> { - bits<5> Rt; - let Inst{4-0} = Rt; -} - -// System instructions that pass a register argument -// This class assumes the register is for input rather than output. -class RegInputSystemI CRm, bits<3> Op2, string asm, - list pattern = []> - : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> { - let Inst{20-12} = 0b000110001; - let Inst{11-8} = CRm; - let Inst{7-5} = Op2; -} - -// System instructions for transactional memory - no operand -class TMSystemINoOperand CRm, string asm, list pattern> - : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> { - let Inst{4-0} = 0b11111; -} - -// System instructions for exit from transactions -class TMSystemException op1, string asm, list pattern> - : I<(outs), (ins timm64_0_65535:$imm), asm, "\t$imm", "", pattern>, - Sched<[WriteSys]> { - bits<16> imm; - let Inst{31-24} = 0b11010100; - let Inst{23-21} = op1; - let Inst{20-5} = imm; - let Inst{4-0} = 0b00000; -} - -// Hint instructions that take both a CRm and a 3-bit immediate. -// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot -// model patterns with sufficiently fine granularity -let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in - class HintI - : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "", - [(int_aarch64_hint imm0_127:$imm)]>, - Sched<[WriteHint]> { - bits <7> imm; - let Inst{20-12} = 0b000110010; - let Inst{11-5} = imm; - } - -// System instructions taking a single literal operand which encodes into -// CRm. op2 differentiates the opcodes. -def BarrierAsmOperand : AsmOperandClass { - let Name = "Barrier"; - let ParserMethod = "tryParseBarrierOperand"; -} -def barrier_op : Operand { - let PrintMethod = "printBarrierOption"; - let ParserMatchClass = BarrierAsmOperand; -} -def BarriernXSAsmOperand : AsmOperandClass { - let Name = "BarriernXS"; - let ParserMethod = "tryParseBarriernXSOperand"; -} -def barrier_nxs_op : Operand { - let PrintMethod = "printBarriernXSOption"; - let ParserMatchClass = BarriernXSAsmOperand; -} -class CRmSystemI opc, string asm, - list pattern = []> - : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>, - Sched<[WriteBarrier]> { - bits<4> CRm; - let Inst{20-12} = 0b000110011; - let Inst{11-8} = CRm; - let Inst{7-5} = opc; -} - -class SystemNoOperands op2, string asm, list pattern = []> - : SimpleSystemI<0, (ins), asm, "", pattern>, - Sched<[]> { - bits<4> CRm; - let CRm = 0b0011; - let Inst{31-12} = 0b11010101000000110010; - let Inst{11-8} = CRm; - let Inst{7-5} = op2; - let Inst{4-0} = 0b11111; -} - -// MRS/MSR system instructions. These have different operand classes because -// a different subset of registers can be accessed through each instruction. -def MRSSystemRegisterOperand : AsmOperandClass { - let Name = "MRSSystemRegister"; - let ParserMethod = "tryParseSysReg"; - let DiagnosticType = "MRS"; -} -// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate. -def mrs_sysreg_op : Operand { - let ParserMatchClass = MRSSystemRegisterOperand; - let DecoderMethod = "DecodeMRSSystemRegister"; - let PrintMethod = "printMRSSystemRegister"; -} - -def MSRSystemRegisterOperand : AsmOperandClass { - let Name = "MSRSystemRegister"; - let ParserMethod = "tryParseSysReg"; - let DiagnosticType = "MSR"; -} -def msr_sysreg_op : Operand { - let ParserMatchClass = MSRSystemRegisterOperand; - let DecoderMethod = "DecodeMSRSystemRegister"; - let PrintMethod = "printMSRSystemRegister"; -} - -def PSBHintOperand : AsmOperandClass { - let Name = "PSBHint"; - let ParserMethod = "tryParsePSBHint"; -} -def psbhint_op : Operand { - let ParserMatchClass = PSBHintOperand; - let PrintMethod = "printPSBHintOp"; - let MCOperandPredicate = [{ - // Check, if operand is valid, to fix exhaustive aliasing in disassembly. - // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields. - if (!MCOp.isImm()) - return false; - return AArch64PSBHint::lookupPSBByEncoding(MCOp.getImm()) != nullptr; - }]; -} - -def BTIHintOperand : AsmOperandClass { - let Name = "BTIHint"; - let ParserMethod = "tryParseBTIHint"; -} -def btihint_op : Operand { - let ParserMatchClass = BTIHintOperand; - let PrintMethod = "printBTIHintOp"; - let MCOperandPredicate = [{ - // "bti" is an alias to "hint" only for certain values of CRm:Op2 fields. - if (!MCOp.isImm()) - return false; - return AArch64BTIHint::lookupBTIByEncoding(MCOp.getImm() ^ 32) != nullptr; - }]; -} - -class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), - "mrs", "\t$Rt, $systemreg"> { - bits<16> systemreg; - let Inst{20-5} = systemreg; - let DecoderNamespace = "Fallback"; - // The MRS is set as a NZCV setting instruction. Not all MRS instructions - // require doing this. The alternative was to explicitly model each one, but - // it feels like it is unnecessary because it seems there are no negative - // consequences setting these flags for all. - let Defs = [NZCV]; -} - -// FIXME: Some of these def NZCV, others don't. Best way to model that? -// Explicitly modeling each of the system register as a register class -// would do it, but feels like overkill at this point. -class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), - "msr", "\t$systemreg, $Rt"> { - bits<16> systemreg; - let Inst{20-5} = systemreg; - let DecoderNamespace = "Fallback"; -} - -def SystemPStateFieldWithImm0_15Operand : AsmOperandClass { - let Name = "SystemPStateFieldWithImm0_15"; - let ParserMethod = "tryParseSysReg"; -} -def pstatefield4_op : Operand { - let ParserMatchClass = SystemPStateFieldWithImm0_15Operand; - let PrintMethod = "printSystemPStateField"; -} - -// Instructions to modify PSTATE, no input reg -let Defs = [NZCV] in -class PstateWriteSimple - : SimpleSystemI<0, iops, asm, operands> { - - let Inst{20-19} = 0b00; - let Inst{15-12} = 0b0100; -} - -class MSRpstateImm0_15 - : PstateWriteSimple<(ins pstatefield4_op:$pstatefield, imm0_15:$imm), "msr", - "\t$pstatefield, $imm">, - Sched<[WriteSys]> { - - bits<6> pstatefield; - bits<4> imm; - let Inst{18-16} = pstatefield{5-3}; - let Inst{11-8} = imm; - let Inst{7-5} = pstatefield{2-0}; - - let DecoderMethod = "DecodeSystemPStateInstruction"; - // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns - // Fail the decoder should attempt to decode the instruction as MSRI. - let hasCompleteDecoder = 0; -} - -def SystemPStateFieldWithImm0_1Operand : AsmOperandClass { - let Name = "SystemPStateFieldWithImm0_1"; - let ParserMethod = "tryParseSysReg"; -} -def pstatefield1_op : Operand { - let ParserMatchClass = SystemPStateFieldWithImm0_1Operand; - let PrintMethod = "printSystemPStateField"; -} - -class MSRpstateImm0_1 - : PstateWriteSimple<(ins pstatefield1_op:$pstatefield, imm0_1:$imm), "msr", - "\t$pstatefield, $imm">, - Sched<[WriteSys]> { - - bits<6> pstatefield; - bit imm; - let Inst{18-16} = pstatefield{5-3}; - let Inst{11-9} = 0b000; - let Inst{8} = imm; - let Inst{7-5} = pstatefield{2-0}; - - let DecoderMethod = "DecodeSystemPStateInstruction"; - // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns - // Fail the decoder should attempt to decode the instruction as MSRI. - let hasCompleteDecoder = 0; -} - -// SYS and SYSL generic system instructions. -def SysCRAsmOperand : AsmOperandClass { - let Name = "SysCR"; - let ParserMethod = "tryParseSysCROperand"; -} - -def sys_cr_op : Operand { - let PrintMethod = "printSysCROperand"; - let ParserMatchClass = SysCRAsmOperand; -} - -class SystemXtI - : RtSystemI { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - -class SystemLXtI - : RtSystemI { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - - -// Branch (register) instructions: -// -// case opc of -// 0001 blr -// 0000 br -// 0101 dret -// 0100 eret -// 0010 ret -// otherwise UNDEFINED -class BaseBranchReg opc, dag oops, dag iops, string asm, - string operands, list pattern> - : I, Sched<[WriteBrReg]> { - let Inst{31-25} = 0b1101011; - let Inst{24-21} = opc; - let Inst{20-16} = 0b11111; - let Inst{15-10} = 0b000000; - let Inst{4-0} = 0b00000; -} - -class BranchReg opc, string asm, list pattern> - : BaseBranchReg { - bits<5> Rn; - let Inst{9-5} = Rn; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in -class SpecialReturn opc, string asm> - : BaseBranchReg { - let Inst{9-5} = 0b11111; -} - -let mayLoad = 1 in -class RCPCLoad sz, string asm, RegisterClass RC> - : I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>, - Sched<[]> { - bits<5> Rn; - bits<5> Rt; - let Inst{31-30} = sz; - let Inst{29-10} = 0b11100010111111110000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -class AuthBase M, dag oops, dag iops, string asm, string operands, - list pattern> - : I, Sched<[]> { - let isAuthenticated = 1; - let Inst{31-25} = 0b1101011; - let Inst{20-11} = 0b1111100001; - let Inst{10} = M; - let Inst{4-0} = 0b11111; -} - -class AuthBranchTwoOperands op, bits<1> M, string asm> - : AuthBase { - bits<5> Rn; - bits<5> Rm; - let Inst{24-22} = 0b100; - let Inst{21} = op; - let Inst{9-5} = Rn; - let Inst{4-0} = Rm; -} - -class AuthOneOperand opc, bits<1> M, string asm> - : AuthBase { - bits<5> Rn; - let Inst{24} = 0; - let Inst{23-21} = opc; - let Inst{9-5} = Rn; -} - -let Uses = [LR,SP] in -class AuthReturn op, bits<1> M, string asm> - : AuthBase { - let Inst{24} = 0; - let Inst{23-21} = op; - let Inst{9-0} = 0b1111111111; -} - -let mayLoad = 1 in -class BaseAuthLoad - : I, Sched<[]> { - bits<10> offset; - bits<5> Rn; - bits<5> Rt; - let isAuthenticated = 1; - let Inst{31-24} = 0b11111000; - let Inst{23} = M; - let Inst{22} = offset{9}; - let Inst{21} = 1; - let Inst{20-12} = offset{8-0}; - let Inst{11} = W; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeAuthLoadInstruction"; -} - -multiclass AuthLoad { - def indexed : BaseAuthLoad; - def writeback : BaseAuthLoad; - - def : InstAlias(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>; - - def : InstAlias(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0), 0>; -} - -//--- -// Conditional branch instruction. -//--- - -// Condition code. -// 4-bit immediate. Pretty-printed as -def ccode : Operand { - let PrintMethod = "printCondCode"; - let ParserMatchClass = CondCode; -} -def inv_ccode : Operand { - // AL and NV are invalid in the aliases which use inv_ccode - let PrintMethod = "printInverseCondCode"; - let ParserMatchClass = CondCode; - let MCOperandPredicate = [{ - return MCOp.isImm() && - MCOp.getImm() != AArch64CC::AL && - MCOp.getImm() != AArch64CC::NV; - }]; -} - -// Conditional branch target. 19-bit immediate. The low two bits of the target -// offset are implied zero and so are not part of the immediate. -def am_brcond : Operand { - let EncoderMethod = "getCondBranchTargetOpValue"; - let DecoderMethod = "DecodePCRelLabel19"; - let PrintMethod = "printAlignedLabel"; - let ParserMatchClass = PCRelLabel19Operand; - let OperandType = "OPERAND_PCREL"; -} - -class BranchCond - : I<(outs), (ins ccode:$cond, am_brcond:$target), - mnemonic, ".$cond\t$target", "", - [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - let Uses = [NZCV]; - - bits<4> cond; - bits<19> target; - let Inst{31-24} = 0b01010100; - let Inst{23-5} = target; - let Inst{4} = bit4; - let Inst{3-0} = cond; -} - -//--- -// Compare-and-branch instructions. -//--- -class BaseCmpBranch - : I<(outs), (ins regtype:$Rt, am_brcond:$target), - asm, "\t$Rt, $target", "", - [(node regtype:$Rt, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<19> target; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = target; - let Inst{4-0} = Rt; -} - -multiclass CmpBranch { - def W : BaseCmpBranch { - let Inst{31} = 0; - } - def X : BaseCmpBranch { - let Inst{31} = 1; - } -} - -//--- -// Test-bit-and-branch instructions. -//--- -// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of -// the target offset are implied zero and so are not part of the immediate. -def am_tbrcond : Operand { - let EncoderMethod = "getTestBranchTargetOpValue"; - let PrintMethod = "printAlignedLabel"; - let ParserMatchClass = BranchTarget14Operand; - let OperandType = "OPERAND_PCREL"; -} - -// AsmOperand classes to emit (or not) special diagnostics -def TBZImm0_31Operand : AsmOperandClass { - let Name = "TBZImm0_31"; - let PredicateMethod = "isImmInRange<0,31>"; - let RenderMethod = "addImmOperands"; -} -def TBZImm32_63Operand : AsmOperandClass { - let Name = "Imm32_63"; - let PredicateMethod = "isImmInRange<32,63>"; - let DiagnosticType = "InvalidImm0_63"; - let RenderMethod = "addImmOperands"; -} - -class tbz_imm0_31 : Operand, ImmLeaf { - let ParserMatchClass = matcher; -} - -def tbz_imm0_31_diag : tbz_imm0_31; -def tbz_imm0_31_nodiag : tbz_imm0_31; - -def tbz_imm32_63 : Operand, ImmLeaf 31) && (((uint32_t)Imm) < 64); -}]> { - let ParserMatchClass = TBZImm32_63Operand; -} - -class BaseTestBranch - : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target), - asm, "\t$Rt, $bit_off, $target", "", - [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<6> bit_off; - bits<14> target; - - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = bit_off{4-0}; - let Inst{18-5} = target; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeTestAndBranch"; -} - -multiclass TestBranch { - def W : BaseTestBranch { - let Inst{31} = 0; - } - - def X : BaseTestBranch { - let Inst{31} = 1; - } - - // Alias X-reg with 0-31 imm to W-Reg. - def : InstAlias(NAME#"W") GPR32as64:$Rd, - tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>; - def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target), - (!cast(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32), - tbz_imm0_31_diag:$imm, bb:$target)>; -} - -//--- -// Unconditional branch (immediate) instructions. -//--- -def am_b_target : Operand { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedLabel"; - let ParserMatchClass = BranchTarget26Operand; - let OperandType = "OPERAND_PCREL"; -} -def am_bl_target : Operand { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedLabel"; - let ParserMatchClass = BranchTarget26Operand; - let OperandType = "OPERAND_PCREL"; -} - -class BImm pattern> - : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { - bits<26> addr; - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = addr; - - let DecoderMethod = "DecodeUnconditionalBranch"; -} - -class BranchImm pattern> - : BImm; -class CallImm pattern> - : BImm; - -//--- -// Basic one-operand data processing instructions. -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandData opc, RegisterClass regtype, string asm, - SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set regtype:$Rd, (node regtype:$Rn))]>, - Sched<[WriteI, ReadI]> { - bits<5> Rd; - bits<5> Rn; - - let Inst{30-13} = 0b101101011000000000; - let Inst{12-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass OneOperandData opc, string asm, - SDPatternOperator node = null_frag> { - def Wr : BaseOneOperandData { - let Inst{31} = 0; - } - - def Xr : BaseOneOperandData { - let Inst{31} = 1; - } -} - -class OneWRegData opc, string asm, SDPatternOperator node> - : BaseOneOperandData { - let Inst{31} = 0; -} - -class OneXRegData opc, string asm, SDPatternOperator node> - : BaseOneOperandData { - let Inst{31} = 1; -} - -class SignAuthOneData opcode_prefix, bits<2> opcode, string asm, - SDPatternOperator op> - : I<(outs GPR64:$dst), (ins GPR64:$Rd, GPR64sp:$Rn), asm, "\t$Rd, $Rn", - "$dst = $Rd", [(set GPR64:$dst, (op GPR64:$Rd, opcode, GPR64sp:$Rn))]>, - Sched<[WriteI, ReadI]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-15} = 0b11011010110000010; - let Inst{14-12} = opcode_prefix; - let Inst{11-10} = opcode; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SignAuthZero opcode_prefix, bits<2> opcode, string asm, - SDPatternOperator op> - : I<(outs GPR64:$dst), (ins GPR64:$Rd), asm, "\t$Rd", "$dst = $Rd", - [(set GPR64:$dst, (op GPR64:$Rd, opcode, (i64 0)))]>, - Sched<[]> { - bits<5> Rd; - let Inst{31-15} = 0b11011010110000010; - let Inst{14-12} = opcode_prefix; - let Inst{11-10} = opcode; - let Inst{9-5} = 0b11111; - let Inst{4-0} = Rd; -} - -class SignAuthTwoOperand opc, string asm, - SDPatternOperator OpNode> - : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64sp:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64sp:$Rm))]>, - Sched<[WriteI, ReadI, ReadI]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-21} = 0b10011010110; - let Inst{20-16} = Rm; - let Inst{15-14} = 0b00; - let Inst{13-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class ClearAuth data, string asm> - : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> { - bits<5> Rd; - let Inst{31-11} = 0b110110101100000101000; - let Inst{10} = data; - let Inst{9-5} = 0b11111; - let Inst{4-0} = Rd; -} - -// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions -class BaseFlagManipulation - : I<(outs), iops, asm, ops, "", []>, - Sched<[WriteI, ReadI, ReadI]> { - let Uses = [NZCV]; - let Defs = [NZCV]; - bits<5> Rn; - let Inst{31} = sf; - let Inst{30-15} = 0b0111010000000000; - let Inst{14} = sz; - let Inst{13-10} = 0b0010; - let Inst{9-5} = Rn; - let Inst{4-0} = 0b01101; -} - -class FlagRotate - : BaseFlagManipulation<0b1, 0b0, iops, asm, ops> { - bits<6> imm; - bits<4> mask; - let Inst{20-15} = imm; - let Inst{13-10} = 0b0001; - let Inst{4} = 0b0; - let Inst{3-0} = mask; -} - -//--- -// Basic two-operand data processing instructions. -//--- -class BaseBaseAddSubCarry pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteI, ReadI, ReadI]> { - let Uses = [NZCV]; - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30} = isSub; - let Inst{28-21} = 0b11010000; - let Inst{20-16} = Rm; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseAddSubCarry - : BaseBaseAddSubCarry; - -class BaseAddSubCarrySetFlags - : BaseBaseAddSubCarry { - let Defs = [NZCV]; -} - -multiclass AddSubCarry { - def Wr : BaseAddSubCarry { - let Inst{31} = 0; - let Inst{29} = 0; - } - def Xr : BaseAddSubCarry { - let Inst{31} = 1; - let Inst{29} = 0; - } - - // Sets flags. - def SWr : BaseAddSubCarrySetFlags { - let Inst{31} = 0; - let Inst{29} = 1; - } - def SXr : BaseAddSubCarrySetFlags { - let Inst{31} = 1; - let Inst{29} = 1; - } -} - -class BaseTwoOperand opc, RegisterClass regtype, string asm, - SDPatternOperator OpNode, - RegisterClass in1regtype = regtype, - RegisterClass in2regtype = regtype> - : I<(outs regtype:$Rd), (ins in1regtype:$Rn, in2regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode in1regtype:$Rn, in2regtype:$Rm))]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-14} = 0b00; - let Inst{13-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseDiv - : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { - let Inst{10} = isSigned; -} - -multiclass Div { - def Wr : BaseDiv, - Sched<[WriteID32, ReadID, ReadID]> { - let Inst{31} = 0; - } - def Xr : BaseDiv, - Sched<[WriteID64, ReadID, ReadID]> { - let Inst{31} = 1; - } -} - -class BaseShift shift_type, RegisterClass regtype, string asm, - SDPatternOperator OpNode = null_frag> - : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, - Sched<[WriteIS, ReadI]> { - let Inst{11-10} = shift_type; -} - -multiclass Shift shift_type, string asm, SDNode OpNode> { - def Wr : BaseShift { - let Inst{31} = 0; - } - - def Xr : BaseShift { - let Inst{31} = 1; - } - - def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), - (!cast(NAME # "Wr") GPR32:$Rn, - (EXTRACT_SUBREG i64:$Rm, sub_32))>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (sext GPR32:$Rm)))), - (!cast(NAME # "Xr") GPR64:$Rn, - (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>; - - def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (zext GPR32:$Rm)))), - (!cast(NAME # "Xr") GPR64:$Rn, - (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>; -} - -class ShiftAlias - : InstAlias; - -class BaseMulAccum opc, RegisterClass multype, - RegisterClass addtype, string asm, - list pattern> - : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{30-24} = 0b0011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass MulAccum { - // MADD/MSUB generation is decided by MachineCombiner.cpp - def Wrrr : BaseMulAccum, - Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { - let Inst{31} = 0; - } - - def Xrrr : BaseMulAccum, - Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { - let Inst{31} = 1; - } -} - -class WideMulAccum opc, string asm, - SDNode AccNode, SDNode ExtNode> - : BaseMulAccum, - Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { - let Inst{31} = 1; -} - -class MulHi opc, string asm, SDNode OpNode> - : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, - Sched<[WriteIM64, ReadIM, ReadIM]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-24} = 0b10011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 - // (i.e. all bits 1) but is ignored by the processor. - let PostEncoderMethod = "fixMulHigh"; -} - -class MulAccumWAlias - : InstAlias; -class MulAccumXAlias - : InstAlias; -class WideMulAccumAlias - : InstAlias; - -class BaseCRC32 sz, bit C, RegisterClass StreamReg, - SDPatternOperator OpNode, string asm> - : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, - Sched<[WriteISReg, ReadI, ReadISReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = sf; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b010; - let Inst{12} = C; - let Inst{11-10} = sz; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - let Predicates = [HasCRC]; -} - -//--- -// Address generation. -//--- - -class ADRI pattern> - : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", - pattern>, - Sched<[WriteI]> { - bits<5> Xd; - bits<21> label; - let Inst{31} = page; - let Inst{30-29} = label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = label{20-2}; - let Inst{4-0} = Xd; - - let DecoderMethod = "DecodeAdrInstruction"; -} - -//--- -// Move immediate. -//--- - -def movimm32_imm : Operand { - let ParserMatchClass = AsmImmRange<0, 65535>; - let EncoderMethod = "getMoveWideImmOpValue"; - let PrintMethod = "printImm"; -} -def movimm32_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm32ShifterOperand; -} -def movimm64_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm64ShifterOperand; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseMoveImmediate opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "", []>, - Sched<[WriteImm]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass MoveImmediate opc, string asm> { - def Wi : BaseMoveImmediate { - let Inst{31} = 0; - } - - def Xi : BaseMoveImmediate { - let Inst{31} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseInsertImmediate opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), - (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, - Sched<[WriteI, ReadI]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass InsertImmediate opc, string asm> { - def Wi : BaseInsertImmediate { - let Inst{31} = 0; - } - - def Xi : BaseInsertImmediate { - let Inst{31} = 1; - } -} - -//--- -// Add/Subtract -//--- - -class BaseAddSubImm - : I<(outs dstRegtype:$Rd), inputs, asm_inst, asm_ops, "", [pattern]>, - Sched<[WriteI, ReadI]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b10001; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class AddSubImmShift - : BaseAddSubImm { - bits<14> imm; - let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 - let Inst{21-10} = imm{11-0}; - let DecoderMethod = "DecodeAddSubImmShift"; -} - -class BaseAddSubRegPseudo - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI, ReadI, ReadI]>; - -class BaseAddSubSReg - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, - Sched<[WriteISReg, ReadI, ReadISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift{7-6}; - let Inst{21} = 0; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -class BaseAddSubEReg - : I<(outs dstRegtype:$R1), - (ins src1Regtype:$R2, src2Regtype:$R3), - asm, "\t$R1, $R2, $R3", "", - [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, - Sched<[WriteIEReg, ReadI, ReadIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15-13} = ext{5-3}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseAddSubEReg64 - : I<(outs dstRegtype:$Rd), - (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), - asm, "\t$Rd, $Rn, $Rm$ext", "", []>, - Sched<[WriteIEReg, ReadI, ReadIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15} = ext{5}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -// Aliases for register+register add/subtract. -class AddSubRegAlias - : InstAlias; - -multiclass AddSub { - let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { - // Add/Subtract immediate - // Increase the weight of the immediate variant to try to match it before - // the extended register variant. - // We used to match the register variant before the immediate when the - // register argument could be implicitly zero-extended. - let AddedComplexity = 6 in - def Wri : AddSubImmShift { - let Inst{31} = 0; - } - let AddedComplexity = 6 in - def Xri : AddSubImmShift { - let Inst{31} = 1; - } - - // Add/Subtract register - Only used for CodeGen - def Wrr : BaseAddSubRegPseudo; - def Xrr : BaseAddSubRegPseudo; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg { - let Inst{31} = 1; - } - } - - // Add/Subtract extended register - let AddedComplexity = 1, hasSideEffects = 0 in { - def Wrx : BaseAddSubEReg { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64 { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - - // add Rd, Rb, -imm -> sub Rd, Rn, imm - def : InstSubst(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, - addsub_shifted_imm32_neg:$imm), 0>; - def : InstSubst(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, - addsub_shifted_imm64_neg:$imm), 0>; - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when either the destination or - // first source register is SP. - def : AddSubRegAlias(NAME#"Wrx"), - GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias(NAME#"Wrx"), - GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias(NAME#"Xrx64"), - GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0 - def : AddSubRegAlias(NAME#"Xrx64"), - GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0 -} - -multiclass AddSubS { - let isCompare = 1, Defs = [NZCV] in { - // Add/Subtract immediate - def Wri : AddSubImmShift { - let Inst{31} = 0; - } - def Xri : AddSubImmShift { - let Inst{31} = 1; - } - - // Add/Subtract register - def Wrr : BaseAddSubRegPseudo; - def Xrr : BaseAddSubRegPseudo; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg { - let Inst{31} = 1; - } - - // Add/Subtract extended register - let AddedComplexity = 1 in { - def Wrx : BaseAddSubEReg { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64 { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - } // Defs = [NZCV] - - // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm - def : InstSubst(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, - addsub_shifted_imm32_neg:$imm), 0>; - def : InstSubst(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, - addsub_shifted_imm64_neg:$imm), 0>; - - // Compare aliases - def : InstAlias(NAME#"Wri") - WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; - def : InstAlias(NAME#"Xri") - XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; - def : InstAlias(NAME#"Wrx") - WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx") - XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx64") - XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; - def : InstAlias(NAME#"Wrs") - WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; - def : InstAlias(NAME#"Xrs") - XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; - - // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm - def : InstSubst(NAME#"Wri") - WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; - def : InstSubst(NAME#"Xri") - XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; - - // Compare shorthands - def : InstAlias(NAME#"Wrs") - WZR, GPR32:$src1, GPR32:$src2, 0), 5>; - def : InstAlias(NAME#"Xrs") - XZR, GPR64:$src1, GPR64:$src2, 0), 5>; - def : InstAlias(NAME#"Wrx") - WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; - def : InstAlias(NAME#"Xrx64") - XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when the first source register - // is SP. - def : AddSubRegAlias(NAME#"Wrx"), - GPR32, GPR32sponly, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias(NAME#"Xrx64"), - GPR64, GPR64sponly, GPR64, 24>; // UXTX #0 -} - -class AddSubG - : BaseAddSubImm< - isSub, 0, GPR64sp, asm_inst, "\t$Rd, $Rn, $imm6, $imm4", - (ins GPR64sp:$Rn, uimm6s16:$imm6, imm0_15:$imm4), - (set GPR64sp:$Rd, (OpNode GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4))> { - bits<6> imm6; - bits<4> imm4; - let Inst{31} = 1; - let Inst{23-22} = 0b10; - let Inst{21-16} = imm6; - let Inst{15-14} = 0b00; - let Inst{13-10} = imm4; - let Unpredictable{15-14} = 0b11; -} - -class SUBP - : BaseTwoOperand<0b0000, GPR64, asm_instr, OpNode, GPR64sp, GPR64sp> { - let Inst{31} = 1; - let Inst{29} = setsFlags; -} - -//--- -// Extract -//--- -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisPtrTy<3>]>; -def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; - -class BaseExtractImm patterns> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), - asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, - Sched<[WriteExtr, ReadExtrHi]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> imm; - - let Inst{30-23} = 0b00100111; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15-10} = imm; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ExtractImm { - def Wrri : BaseExtractImm { - let Inst{31} = 0; - let Inst{22} = 0; - // imm<5> must be zero. - let imm{5} = 0; - } - def Xrri : BaseExtractImm { - - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Bitfield -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImm opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "", []>, - Sched<[WriteIS, ReadI]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImm opc, string asm> { - def Wri : BaseBitfieldImm { - let Inst{31} = 0; - let Inst{22} = 0; - // imms<5> and immr<5> must be zero, else ReservedValue(). - let Inst{21} = 0; - let Inst{15} = 0; - } - def Xri : BaseBitfieldImm { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImmWith2RegArgs opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, - imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, - Sched<[WriteIS, ReadI]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImmWith2RegArgs opc, string asm> { - def Wri : BaseBitfieldImmWith2RegArgs { - let Inst{31} = 0; - let Inst{22} = 0; - // imms<5> and immr<5> must be zero, else ReservedValue(). - let Inst{21} = 0; - let Inst{15} = 0; - } - def Xri : BaseBitfieldImmWith2RegArgs { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Logical -//--- - -// Logical (immediate) -class BaseLogicalImm opc, RegisterClass dregtype, - RegisterClass sregtype, Operand imm_type, string asm, - list pattern> - : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteI, ReadI]> { - bits<5> Rd; - bits<5> Rn; - bits<13> imm; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = imm{12}; - let Inst{21-16} = imm{11-6}; - let Inst{15-10} = imm{5-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeLogicalImmInstruction"; -} - -// Logical (shifted register) -class BaseLogicalSReg opc, bit N, RegisterClass regtype, - logical_shifted_reg shifted_regtype, string asm, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteISReg, ReadI, ReadISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift{7-6}; - let Inst{21} = N; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -// Aliases for register+register logical instructions. -class LogicalRegAlias - : InstAlias; - -multiclass LogicalImm opc, string mnemonic, SDNode OpNode, - string Alias> { - let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in - def Wri : BaseLogicalImm { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in - def Xri : BaseLogicalImm { - let Inst{31} = 1; - } - - def : InstSubst(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, - logical_imm32_not:$imm), 0>; - def : InstSubst(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, - logical_imm64_not:$imm), 0>; -} - -multiclass LogicalImmS opc, string mnemonic, SDNode OpNode, - string Alias> { - let isCompare = 1, Defs = [NZCV] in { - def Wri : BaseLogicalImm { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - def Xri : BaseLogicalImm { - let Inst{31} = 1; - } - } // end Defs = [NZCV] - - def : InstSubst(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, - logical_imm32_not:$imm), 0>; - def : InstSubst(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, - logical_imm64_not:$imm), 0>; -} - -class BaseLogicalRegPseudo - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI, ReadI, ReadI]>; - -// Split from LogicalImm as not all instructions have both. -multiclass LogicalReg opc, bit N, string mnemonic, - SDPatternOperator OpNode> { - let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def Wrr : BaseLogicalRegPseudo; - def Xrr : BaseLogicalRegPseudo; - } - - def Wrs : BaseLogicalSReg { - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg { - let Inst{31} = 1; - } - - def : LogicalRegAlias(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias(NAME#"Xrs"), GPR64>; -} - -// Split from LogicalReg to allow setting NZCV Defs -multiclass LogicalRegS opc, bit N, string mnemonic, - SDPatternOperator OpNode = null_frag> { - let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def Wrr : BaseLogicalRegPseudo; - def Xrr : BaseLogicalRegPseudo; - - def Wrs : BaseLogicalSReg { - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg { - let Inst{31} = 1; - } - } // Defs = [NZCV] - - def : LogicalRegAlias(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias(NAME#"Xrs"), GPR64>; -} - -//--- -// Conditionally set flags -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondComparisonImm - : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", - [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), - (i32 imm:$cond), NZCV))]>, - Sched<[WriteI, ReadI]> { - let Uses = [NZCV]; - let Defs = [NZCV]; - - bits<5> Rn; - bits<5> imm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = imm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondComparisonReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", - [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), - (i32 imm:$cond), NZCV))]>, - Sched<[WriteI, ReadI, ReadI]> { - let Uses = [NZCV]; - let Defs = [NZCV]; - - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -multiclass CondComparison { - // immediate operand variants - def Wi : BaseCondComparisonImm { - let Inst{31} = 0; - } - def Xi : BaseCondComparisonImm { - let Inst{31} = 1; - } - // register operand variants - def Wr : BaseCondComparisonReg { - let Inst{31} = 0; - } - def Xr : BaseCondComparisonReg { - let Inst{31} = 1; - } -} - -//--- -// Conditional select -//--- - -class BaseCondSelect op2, RegisterClass regtype, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, - Sched<[WriteI, ReadI, ReadI]> { - let Uses = [NZCV]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass CondSelect op2, string asm> { - def Wr : BaseCondSelect { - let Inst{31} = 0; - } - def Xr : BaseCondSelect { - let Inst{31} = 1; - } -} - -class BaseCondSelectOp op2, RegisterClass regtype, string asm, - PatFrag frag> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (AArch64csel regtype:$Rn, (frag regtype:$Rm), - (i32 imm:$cond), NZCV))]>, - Sched<[WriteI, ReadI, ReadI]> { - let Uses = [NZCV]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N), - MVT::i32); -}]>; - -multiclass CondSelectOp op2, string asm, PatFrag frag> { - def Wr : BaseCondSelectOp { - let Inst{31} = 0; - } - def Xr : BaseCondSelectOp { - let Inst{31} = 1; - } - - def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), - (!cast(NAME # Wr) GPR32:$Rn, GPR32:$Rm, - (inv_cond_XFORM imm:$cond))>; - - def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), - (!cast(NAME # Xr) GPR64:$Rn, GPR64:$Rm, - (inv_cond_XFORM imm:$cond))>; -} - -//--- -// Special Mask Value -//--- -def maski8_or_more : Operand, - ImmLeaf { -} -def maski16_or_more : Operand, - ImmLeaf { -} - - -//--- -// Load/store -//--- - -// (unsigned immediate) -// Indexed for 8-bit registers. offset is in range [0,4095]. -def am_indexed8 : ComplexPattern; -def am_indexed16 : ComplexPattern; -def am_indexed32 : ComplexPattern; -def am_indexed64 : ComplexPattern; -def am_indexed128 : ComplexPattern; - -// (unsigned immediate) -// Indexed for 8-bit registers. offset is in range [0,63]. -def am_indexed8_6b : ComplexPattern", []>; -def am_indexed16_6b : ComplexPattern", []>; -def am_indexed32_6b : ComplexPattern", []>; -def am_indexed64_6b : ComplexPattern", []>; - -def gi_am_indexed8 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_am_indexed16 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_am_indexed32 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_am_indexed64 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_am_indexed128 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; - -class UImm12OffsetOperand : AsmOperandClass { - let Name = "UImm12Offset" # Scale; - let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; - let PredicateMethod = "isUImm12Offset<" # Scale # ">"; - let DiagnosticType = "InvalidMemoryIndexed" # Scale; -} - -def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>; -def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>; -def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>; -def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>; -def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>; - -class uimm12_scaled : Operand { - let ParserMatchClass - = !cast("UImm12OffsetScale" # Scale # "Operand"); - let EncoderMethod - = "getLdStUImm12OpValue"; - let PrintMethod = "printUImm12Offset<" # Scale # ">"; -} - -def uimm12s1 : uimm12_scaled<1>; -def uimm12s2 : uimm12_scaled<2>; -def uimm12s4 : uimm12_scaled<4>; -def uimm12s8 : uimm12_scaled<8>; -def uimm12s16 : uimm12_scaled<16>; - -class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list pattern> - : I { - bits<5> Rt; - - bits<5> Rn; - bits<12> offset; - - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = offset; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeUnsignedLdStInstruction"; -} - -multiclass LoadUI sz, bit V, bits<2> opc, DAGOperand regtype, - Operand indextype, string asm, list pattern> { - let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in - def ui : BaseLoadStoreUI, - Sched<[WriteLD]>; - - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass StoreUI sz, bit V, bits<2> opc, DAGOperand regtype, - Operand indextype, string asm, list pattern> { - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def ui : BaseLoadStoreUI, - Sched<[WriteST]>; - - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to -// substitute zero-registers automatically. -// -// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back -// into StoreUI. -multiclass StoreUIz sz, bit V, bits<2> opc, RegisterOperand regtype, - Operand indextype, string asm, list pattern> { - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def ui : BaseLoadStoreUI, - Sched<[WriteST]>; - - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -def PrefetchOperand : AsmOperandClass { - let Name = "Prefetch"; - let ParserMethod = "tryParsePrefetch"; -} -def prfop : Operand { - let PrintMethod = "printPrefetchOp"; - let ParserMatchClass = PrefetchOperand; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUI sz, bit V, bits<2> opc, string asm, list pat> - : BaseLoadStoreUI, - Sched<[WriteLD]>; - -//--- -// Load literal -//--- - -// Load literal address: 19-bit immediate. The low two bits of the target -// offset are implied zero and so are not part of the immediate. -def am_ldrlit : Operand { - let EncoderMethod = "getLoadLiteralOpValue"; - let DecoderMethod = "DecodePCRelLabel19"; - let PrintMethod = "printAlignedLabel"; - let ParserMatchClass = PCRelLabel19Operand; - let OperandType = "OPERAND_PCREL"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0, AddedComplexity = 20 in -class LoadLiteral opc, bit V, RegisterOperand regtype, string asm, list pat> - : I<(outs regtype:$Rt), (ins am_ldrlit:$label), - asm, "\t$Rt, $label", "", pat>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchLiteral opc, bit V, string asm, list pat> - : I<(outs), (ins prfop:$Rt, am_ldrlit:$label), - asm, "\t$Rt, $label", "", pat>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -//--- -// Load/store register offset -//--- - -def ro_Xindexed8 : ComplexPattern", []>; -def ro_Xindexed16 : ComplexPattern", []>; -def ro_Xindexed32 : ComplexPattern", []>; -def ro_Xindexed64 : ComplexPattern", []>; -def ro_Xindexed128 : ComplexPattern", []>; - -def gi_ro_Xindexed8 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Xindexed16 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Xindexed32 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Xindexed64 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Xindexed128 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; - -def ro_Windexed8 : ComplexPattern", []>; -def ro_Windexed16 : ComplexPattern", []>; -def ro_Windexed32 : ComplexPattern", []>; -def ro_Windexed64 : ComplexPattern", []>; -def ro_Windexed128 : ComplexPattern", []>; - -def gi_ro_Windexed8 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Windexed16 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Windexed32 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Windexed64 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; -def gi_ro_Windexed128 : - GIComplexOperandMatcher">, - GIComplexPatternEquiv; - -class MemExtendOperand : AsmOperandClass { - let Name = "Mem" # Reg # "Extend" # Width; - let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; - let RenderMethod = "addMemExtendOperands"; - let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width; -} - -def MemWExtend8Operand : MemExtendOperand<"W", 8> { - // The address "[x0, x1, lsl #0]" actually maps to the variant which performs - // the trivial shift. - let RenderMethod = "addMemExtend8Operands"; -} -def MemWExtend16Operand : MemExtendOperand<"W", 16>; -def MemWExtend32Operand : MemExtendOperand<"W", 32>; -def MemWExtend64Operand : MemExtendOperand<"W", 64>; -def MemWExtend128Operand : MemExtendOperand<"W", 128>; - -def MemXExtend8Operand : MemExtendOperand<"X", 8> { - // The address "[x0, x1, lsl #0]" actually maps to the variant which performs - // the trivial shift. - let RenderMethod = "addMemExtend8Operands"; -} -def MemXExtend16Operand : MemExtendOperand<"X", 16>; -def MemXExtend32Operand : MemExtendOperand<"X", 32>; -def MemXExtend64Operand : MemExtendOperand<"X", 64>; -def MemXExtend128Operand : MemExtendOperand<"X", 128>; - -class ro_extend - : Operand { - let ParserMatchClass = ParserClass; - let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">"; - let DecoderMethod = "DecodeMemExtend"; - let EncoderMethod = "getMemExtendOpValue"; - let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift); -} - -def ro_Wextend8 : ro_extend; -def ro_Wextend16 : ro_extend; -def ro_Wextend32 : ro_extend; -def ro_Wextend64 : ro_extend; -def ro_Wextend128 : ro_extend; - -def ro_Xextend8 : ro_extend; -def ro_Xextend16 : ro_extend; -def ro_Xextend32 : ro_extend; -def ro_Xextend64 : ro_extend; -def ro_Xextend128 : ro_extend; - -class ROAddrMode { - // CodeGen-level pattern covering the entire addressing mode. - ComplexPattern Wpat = windex; - ComplexPattern Xpat = xindex; - - // Asm-level Operand covering the valid "uxtw #3" style syntax. - Operand Wext = wextend; - Operand Xext = xextend; -} - -def ro8 : ROAddrMode; -def ro16 : ROAddrMode; -def ro32 : ROAddrMode; -def ro64 : ROAddrMode; -def ro128 : ROAddrMode; - -class LoadStore8RO sz, bit V, bits<2> opc, string asm, dag ins, - dag outs, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -class ROInstAlias - : InstAlias; - -multiclass Load8RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator loadop> { - let AddedComplexity = 10 in - def roW : LoadStore8RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore8RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -multiclass Store8RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator storeop> { - let AddedComplexity = 10 in - def roW : LoadStore8RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore8RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -class LoadStore16RO sz, bit V, bits<2> opc, string asm, dag ins, - dag outs, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -multiclass Load16RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator loadop> { - let AddedComplexity = 10 in - def roW : LoadStore16RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore16RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -multiclass Store16RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator storeop> { - let AddedComplexity = 10 in - def roW : LoadStore16RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore16RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -class LoadStore32RO sz, bit V, bits<2> opc, string asm, dag ins, - dag outs, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -multiclass Load32RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator loadop> { - let AddedComplexity = 10 in - def roW : LoadStore32RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore32RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -multiclass Store32RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator storeop> { - let AddedComplexity = 10 in - def roW : LoadStore32RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10 in - def roX : LoadStore32RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -class LoadStore64RO sz, bit V, bits<2> opc, string asm, dag ins, - dag outs, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -multiclass Load64RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator loadop> { - let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in - def roW : LoadStore64RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in - def roX : LoadStore64RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -multiclass Store64RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator storeop> { - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def roW : LoadStore64RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def roX : LoadStore64RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -class LoadStore128RO sz, bit V, bits<2> opc, string asm, dag ins, - dag outs, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -multiclass Load128RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, ValueType Ty, SDPatternOperator loadop> { - let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in - def roW : LoadStore128RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in - def roX : LoadStore128RO, - Sched<[WriteLDIdx, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -multiclass Store128RO sz, bit V, bits<2> opc, DAGOperand regtype, - string asm> { - let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def roW : LoadStore128RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b0; - } - - let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in - def roX : LoadStore128RO, - Sched<[WriteSTIdx, ReadST, ReadAdrBase]> { - let Inst{13} = 0b1; - } - - def : ROInstAlias(NAME # "roX")>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class BasePrefetchRO sz, bit V, bits<2> opc, dag outs, dag ins, - string asm, list pat> - : I, - Sched<[WriteLD]> { - bits<5> Rt; - bits<5> Rn; - bits<5> Rm; - bits<2> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15} = extend{1}; // sign extend Rm? - let Inst{14} = 1; - let Inst{12} = extend{0}; // do shift? - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; -} - -multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { - def roW : BasePrefetchRO { - let Inst{13} = 0b0; - } - - def roX : BasePrefetchRO { - let Inst{13} = 0b1; - } - - def : InstAlias<"prfm $Rt, [$Rn, $Rm]", - (!cast(NAME # "roX") prfop:$Rt, - GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; -} - -//--- -// Load/store unscaled immediate -//--- - -def am_unscaled8 : ComplexPattern; -def am_unscaled16 : ComplexPattern; -def am_unscaled32 : ComplexPattern; -def am_unscaled64 : ComplexPattern; -def am_unscaled128 :ComplexPattern; - -def gi_am_unscaled8 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; -def gi_am_unscaled16 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; -def gi_am_unscaled32 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; -def gi_am_unscaled64 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; -def gi_am_unscaled128 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - - -class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list pattern> - : I { - bits<5> Rt; - bits<5> Rn; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -// Armv8.4 LDAPR & STLR with Immediate Offset instruction -multiclass BaseLoadUnscaleV84 sz, bits<2> opc, - DAGOperand regtype > { - def i : BaseLoadStoreUnscale, - Sched<[WriteST]> { - let Inst{29} = 0; - let Inst{24} = 1; - } - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass BaseStoreUnscaleV84 sz, bits<2> opc, - DAGOperand regtype > { - def i : BaseLoadStoreUnscale, - Sched<[WriteST]> { - let Inst{29} = 0; - let Inst{24} = 1; - } - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass LoadUnscaled sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, list pattern> { - let AddedComplexity = 1 in // try this before LoadUI - def i : BaseLoadStoreUnscale, - Sched<[WriteLD]>; - - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass StoreUnscaled sz, bit V, bits<2> opc, DAGOperand regtype, - string asm, list pattern> { - let AddedComplexity = 1 in // try this before StoreUI - def i : BaseLoadStoreUnscale, - Sched<[WriteST]>; - - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, - list pat> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in - def i : BaseLoadStoreUnscale, - Sched<[WriteLD]>; - - def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; -} - -//--- -// Load/store unscaled immediate, unprivileged -//--- - -class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, - dag oops, dag iops, string asm> - : I { - bits<5> Rt; - bits<5> Rn; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -multiclass LoadUnprivileged sz, bit V, bits<2> opc, - RegisterClass regtype, string asm> { - let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in - def i : BaseLoadStoreUnprivileged, - Sched<[WriteLD]>; - - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -multiclass StoreUnprivileged sz, bit V, bits<2> opc, - RegisterClass regtype, string asm> { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def i : BaseLoadStoreUnprivileged, - Sched<[WriteST]>; - - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; -} - -//--- -// Load/store pre-indexed -//--- - -class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b11; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, - string asm> - : BaseLoadStorePreIdx, - Sched<[WriteAdr, WriteLD]>; - -let mayStore = 1, mayLoad = 0 in -class StorePreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, - string asm, SDPatternOperator storeop, ValueType Ty> - : BaseLoadStorePreIdx, - Sched<[WriteAdr, WriteST]>; -} // hasSideEffects = 0 - -//--- -// Load/store post-indexed -//--- - -class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr, list pat> - : I { - bits<5> Rt; - bits<5> Rn; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, - string asm> - : BaseLoadStorePostIdx, - Sched<[WriteAdr, WriteLD]>; - -let mayStore = 1, mayLoad = 0 in -class StorePostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, - string asm, SDPatternOperator storeop, ValueType Ty> - : BaseLoadStorePostIdx, - Sched<[WriteAdr, WriteST]>; -} // hasSideEffects = 0 - - -//--- -// Load/store pair -//--- - -// (indexed, offset) - -class BaseLoadStorePairOffset opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b010; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -multiclass LoadPairOffset opc, bit V, RegisterOperand regtype, - Operand indextype, string asm> { - let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in - def i : BaseLoadStorePairOffset, - Sched<[WriteLD, WriteLDHi]>; - - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, - GPR64sp:$Rn, 0)>; -} - - -multiclass StorePairOffset opc, bit V, RegisterOperand regtype, - Operand indextype, string asm> { - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def i : BaseLoadStorePairOffset, - Sched<[WriteSTP]>; - - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, - GPR64sp:$Rn, 0)>; -} - -// (pre-indexed) -class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b011; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPreIdx opc, bit V, RegisterOperand regtype, - Operand indextype, string asm> - : BaseLoadStorePairPreIdx, - Sched<[WriteAdr, WriteLD, WriteLDHi]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPreIdx opc, bit V, RegisterOperand regtype, - Operand indextype, string asm> - : BaseLoadStorePairPreIdx, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (post-indexed) - -class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b001; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPostIdx opc, bit V, RegisterOperand regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, - Sched<[WriteAdr, WriteLD, WriteLDHi]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPostIdx opc, bit V, RegisterOperand regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (no-allocate) - -class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b000; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -multiclass LoadPairNoAlloc opc, bit V, DAGOperand regtype, - Operand indextype, string asm> { - let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in - def i : BaseLoadStorePairNoAlloc, - Sched<[WriteLD, WriteLDHi]>; - - - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, - GPR64sp:$Rn, 0)>; -} - -multiclass StorePairNoAlloc opc, bit V, DAGOperand regtype, - Operand indextype, string asm> { - let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in - def i : BaseLoadStorePairNoAlloc, - Sched<[WriteSTP]>; - - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, - GPR64sp:$Rn, 0)>; -} - -//--- -// Load/store exclusive -//--- - -// True exclusive operations write to and/or read from the system's exclusive -// monitors, which as far as a compiler is concerned can be modelled as a -// random shared memory address. Hence LoadExclusive mayStore. -// -// Since these instructions have the undefined register bits set to 1 in -// their canonical form, we need a post encoder method to set those bits -// to 1 when encoding these instructions. We do this using the -// fixLoadStoreExclusive function. This function has template parameters: -// -// fixLoadStoreExclusive -// -// hasRs indicates that the instruction uses the Rs field, so we won't set -// it to 1 (and the same for Rt2). We don't need template parameters for -// the other register fields since Rt and Rn are always used. -// -let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in -class BaseLoadStoreExclusive sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : I { - let Inst{31-30} = sz; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; - - let DecoderMethod = "DecodeExclusiveLdStInstruction"; -} - -// Neither Rs nor Rt2 operands. -class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : BaseLoadStoreExclusive { - bits<5> Rt; - bits<5> Rn; - let Inst{20-16} = 0b11111; - let Unpredictable{20-16} = 0b11111; - let Inst{14-10} = 0b11111; - let Unpredictable{14-10} = 0b11111; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -// Simple load acquires don't set the exclusive monitor -let mayLoad = 1, mayStore = 0 in -class LoadAcquire sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteLD]>; - -class LoadExclusive sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteLD]>; - -class LoadExclusivePair sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteLD, WriteLDHi]> { - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; -} - -// Simple store release operations do not check the exclusive monitor. -let mayLoad = 0, mayStore = 1 in -class StoreRelease sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteST]>; - -let mayLoad = 1, mayStore = 1 in -class StoreExclusive sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteSTX]> { - bits<5> Ws; - bits<5> Rt; - bits<5> Rn; - let Inst{20-16} = Ws; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let Constraints = "@earlyclobber $Ws"; - let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; -} - -class StoreExclusivePair sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteSTX]> { - bits<5> Ws; - bits<5> Rt; - bits<5> Rt2; - bits<5> Rn; - let Inst{20-16} = Ws; - let Inst{14-10} = Rt2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let Constraints = "@earlyclobber $Ws"; -} - -// Armv8.5-A Memory Tagging Extension -class BaseMemTag opc1, bits<2> opc2, string asm_insn, - string asm_opnds, string cstr, dag oops, dag iops> - : I, - Sched<[]> { - bits<5> Rn; - - let Inst{31-24} = 0b11011001; - let Inst{23-22} = opc1; - let Inst{21} = 1; - // Inst{20-12} defined by subclass - let Inst{11-10} = opc2; - let Inst{9-5} = Rn; - // Inst{4-0} defined by subclass -} - -class MemTagVector - : BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds, - "", oops, iops> { - bits<5> Rt; - - let Inst{20-12} = 0b000000000; - let Inst{4-0} = Rt; - - let mayLoad = Load; -} - -class MemTagLoad - : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "$Rt = $wback", - (outs GPR64:$wback), - (ins GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)> { - bits<5> Rt; - bits<9> offset; - - let Inst{20-12} = offset; - let Inst{4-0} = Rt; - - let mayLoad = 1; -} - -class BaseMemTagStore opc1, bits<2> opc2, string asm_insn, - string asm_opnds, string cstr, dag oops, dag iops> - : BaseMemTag { - bits<5> Rt; - bits<9> offset; - - let Inst{20-12} = offset; - let Inst{4-0} = Rt; - - let mayStore = 1; -} - -multiclass MemTagStore opc1, string insn> { - def Offset : - BaseMemTagStore; - def PreIndex : - BaseMemTagStore; - def PostIndex : - BaseMemTagStore; - - def : InstAlias(NAME # "Offset") GPR64sp:$Rt, GPR64sp:$Rn, 0)>; -} - -//--- -// Exception generation -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", []>, - Sched<[WriteSys]> { - bits<16> imm; - let Inst{31-24} = 0b11010100; - let Inst{23-21} = op1; - let Inst{20-5} = imm; - let Inst{4-2} = 0b000; - let Inst{1-0} = ll; -} - -//--- -// UDF : Permanently UNDEFINED instructions. Format: Opc = 0x0000, 16 bit imm. -//-- -let hasSideEffects = 1, isTrap = 1, mayLoad = 0, mayStore = 0 in { -class UDFType opc, string asm> - : I<(outs), (ins uimm16:$imm), - asm, "\t$imm", "", []>, - Sched<[]> { - bits<16> imm; - let Inst{31-16} = opc; - let Inst{15-0} = imm; -} -} -let Predicates = [HasFPARMv8] in { - -//--- -// Floating point to integer conversion -//--- - -class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm, list pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-29} = 0b00; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - Operand immType, string asm, list pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-29} = 0b00; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, - SDPatternOperator OpN> { - // Unscaled half-precision to 32-bit - def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm, - [(set GPR32:$Rd, (OpN (f16 FPR16:$Rn)))]> { - let Inst{31} = 0; // 32-bit GPR flag - let Predicates = [HasFullFP16]; - } - - // Unscaled half-precision to 64-bit - def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm, - [(set GPR64:$Rd, (OpN (f16 FPR16:$Rn)))]> { - let Inst{31} = 1; // 64-bit GPR flag - let Predicates = [HasFullFP16]; - } - - // Unscaled single-precision to 32-bit - def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, - [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled single-precision to 64-bit - def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, - [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Unscaled double-precision to 32-bit - def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, - [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled double-precision to 64-bit - def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, - [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 1; // 64-bit GPR flag - } -} - -multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, - SDPatternOperator OpN> { - // Scaled half-precision to 32-bit - def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, - fixedpoint_f16_i32, asm, - [(set GPR32:$Rd, (OpN (fmul (f16 FPR16:$Rn), - fixedpoint_f16_i32:$scale)))]> { - let Inst{31} = 0; // 32-bit GPR flag - let scale{5} = 1; - let Predicates = [HasFullFP16]; - } - - // Scaled half-precision to 64-bit - def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, - fixedpoint_f16_i64, asm, - [(set GPR64:$Rd, (OpN (fmul (f16 FPR16:$Rn), - fixedpoint_f16_i64:$scale)))]> { - let Inst{31} = 1; // 64-bit GPR flag - let Predicates = [HasFullFP16]; - } - - // Scaled single-precision to 32-bit - def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, - fixedpoint_f32_i32, asm, - [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn, - fixedpoint_f32_i32:$scale)))]> { - let Inst{31} = 0; // 32-bit GPR flag - let scale{5} = 1; - } - - // Scaled single-precision to 64-bit - def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, - fixedpoint_f32_i64, asm, - [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn, - fixedpoint_f32_i64:$scale)))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Scaled double-precision to 32-bit - def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, - fixedpoint_f64_i32, asm, - [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn, - fixedpoint_f64_i32:$scale)))]> { - let Inst{31} = 0; // 32-bit GPR flag - let scale{5} = 1; - } - - // Scaled double-precision to 64-bit - def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, - fixedpoint_f64_i64, asm, - [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn, - fixedpoint_f64_i64:$scale)))]> { - let Inst{31} = 1; // 64-bit GPR flag - } -} - -//--- -// Integer to floating point conversion -//--- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseIntegerToFP pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-24} = 0b0011110; - let Inst{21-17} = 0b00001; - let Inst{16} = isUnsigned; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseIntegerToFPUnscaled - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-24} = 0b0011110; - let Inst{21-17} = 0b10001; - let Inst{16} = isUnsigned; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass IntegerToFP { - // Unscaled - def UWHri: BaseIntegerToFPUnscaled { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def UWSri: BaseIntegerToFPUnscaled { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - } - - def UWDri: BaseIntegerToFPUnscaled { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - } - - def UXHri: BaseIntegerToFPUnscaled { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def UXSri: BaseIntegerToFPUnscaled { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - } - - def UXDri: BaseIntegerToFPUnscaled { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - } - - // Scaled - def SWHri: BaseIntegerToFP { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let scale{5} = 1; - let Predicates = [HasFullFP16]; - } - - def SWSri: BaseIntegerToFP { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - let scale{5} = 1; - } - - def SWDri: BaseIntegerToFP { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - let scale{5} = 1; - } - - def SXHri: BaseIntegerToFP { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def SXSri: BaseIntegerToFP { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - } - - def SXDri: BaseIntegerToFP { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - } -} - -//--- -// Unscaled integer <-> floating point conversion (i.e. FMOV) -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversion rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", - // We use COPY_TO_REGCLASS for these bitconvert operations. - // copyPhysReg() expands the resultant COPY instructions after - // regalloc is done. This gives greater freedom for the allocator - // and related passes (coalescing, copy propagation, et. al.) to - // be more effective. - [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-24} = 0b0011110; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionToHigh rmode, bits<3> opcode, - RegisterClass srcType, RegisterOperand dstType, string asm, - string kind> - : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, - "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeFMOVLaneInstruction"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, - RegisterOperand srcType, RegisterClass dstType, string asm, - string kind> - : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, - "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeFMOVLaneInstruction"; -} - - -multiclass UnscaledConversion { - def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - } - - def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - } - - def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b11; // 16-bit FPR flag - let Predicates = [HasFullFP16]; - } - - def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{23-22} = 0b00; // 32-bit FPR flag - } - - def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{23-22} = 0b01; // 64-bit FPR flag - } - - def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, - asm, ".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } - - def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, - asm, ".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } -} - -//--- -// Floating point conversion -//--- - -class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, - RegisterClass srcType, string asm, list pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{23-22} = type; - let Inst{21-17} = 0b10001; - let Inst{16-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPConversion { - // Double-precision to Half-precision - def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, - [(set (f16 FPR16:$Rd), (any_fpround FPR64:$Rn))]>; - - // Double-precision to Single-precision - def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, - [(set FPR32:$Rd, (any_fpround FPR64:$Rn))]>; - - // Half-precision to Double-precision - def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, - [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>; - - // Half-precision to Single-precision - def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, - [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>; - - // Single-precision to Double-precision - def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; - - // Single-precision to Half-precision - def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, - [(set (f16 FPR16:$Rd), (any_fpround FPR32:$Rn))]>; -} - -//--- -// Single operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSingleOperandFPData opcode, RegisterClass regtype, - ValueType vt, string asm, SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{21} = 0b1; - let Inst{20-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SingleOperandFPData opcode, string asm, - SDPatternOperator node = null_frag> { - - def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> { - let Inst{23-22} = 0b11; // 16-bit size flag - let Predicates = [HasFullFP16]; - } - - def Sr : BaseSingleOperandFPData<{0b00,opcode}, FPR32, f32, asm, node> { - let Inst{23-22} = 0b00; // 32-bit size flag - } - - def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> { - let Inst{23-22} = 0b01; // 64-bit size flag - } -} - -multiclass SingleOperandFPNo16 opcode, string asm, - SDPatternOperator node = null_frag>{ - - def Sr : BaseSingleOperandFPData { - let Inst{23-22} = 0b00; // 32-bit registers - } - - def Dr : BaseSingleOperandFPData { - let Inst{23-22} = 0b01; // 64-bit registers - } -} - -// FRInt[32|64][Z|N] instructions -multiclass FRIntNNT opcode, string asm, SDPatternOperator node = null_frag> : - SingleOperandFPNo16<{0b0100,opcode}, asm, node>; - -//--- -// Two operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPData opcode, RegisterClass regtype, - string asm, list pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pat>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass TwoOperandFPData opcode, string asm, - SDPatternOperator node = null_frag> { - def Hrr : BaseTwoOperandFPData { - let Inst{23-22} = 0b11; // 16-bit size flag - let Predicates = [HasFullFP16]; - } - - def Srr : BaseTwoOperandFPData { - let Inst{23-22} = 0b00; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData { - let Inst{23-22} = 0b01; // 64-bit size flag - } -} - -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { - def Hrr : BaseTwoOperandFPData { - let Inst{23-22} = 0b11; // 16-bit size flag - let Predicates = [HasFullFP16]; - } - - def Srr : BaseTwoOperandFPData { - let Inst{23-22} = 0b00; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData { - let Inst{23-22} = 0b01; // 64-bit size flag - } -} - - -//--- -// Three operand floating point data processing -//--- - -class BaseThreeOperandFPData pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, - Sched<[WriteFMul]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{31-24} = 0b00011111; - let Inst{21} = isNegated; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ThreeOperandFPData { - def Hrrr : BaseThreeOperandFPData { - let Inst{23-22} = 0b11; // 16-bit size flag - let Predicates = [HasFullFP16]; - } - - def Srrr : BaseThreeOperandFPData { - let Inst{23-22} = 0b00; // 32-bit size flag - } - - def Drrr : BaseThreeOperandFPData { - let Inst{23-22} = 0b01; // 64-bit size flag - } -} - -//--- -// Floating point data comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandFPComparison pat> - : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b1000; - - // Rm should be 0b00000 canonically, but we need to accept any value. - let PostEncoderMethod = "fixOneOperandFPComparison"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPComparison pat> - : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rm; - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b0000; -} - -multiclass FPComparison { - let Defs = [NZCV] in { - def Hrr : BaseTwoOperandFPComparison { - let Inst{23-22} = 0b11; - let Predicates = [HasFullFP16]; - } - - def Hri : BaseOneOperandFPComparison { - let Inst{23-22} = 0b11; - let Predicates = [HasFullFP16]; - } - - def Srr : BaseTwoOperandFPComparison { - let Inst{23-22} = 0b00; - } - - def Sri : BaseOneOperandFPComparison { - let Inst{23-22} = 0b00; - } - - def Drr : BaseTwoOperandFPComparison { - let Inst{23-22} = 0b01; - } - - def Dri : BaseOneOperandFPComparison { - let Inst{23-22} = 0b01; - } - } // Defs = [NZCV] -} - -//--- -// Floating point conditional comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison pat> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, - Sched<[WriteFCmp]> { - let Uses = [NZCV]; - let Defs = [NZCV]; - - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = nzcv; -} - -multiclass FPCondComparison { - def Hrr : BaseFPCondComparison { - let Inst{23-22} = 0b11; - let Predicates = [HasFullFP16]; - } - - def Srr : BaseFPCondComparison { - let Inst{23-22} = 0b00; - } - - def Drr : BaseFPCondComparison { - let Inst{23-22} = 0b01; - } -} - -//--- -// Floating point conditional select -//--- - -class BaseFPCondSelect - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (AArch64csel (vt regtype:$Rn), regtype:$Rm, - (i32 imm:$cond), NZCV))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b11; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPCondSelect { - let Uses = [NZCV] in { - def Hrrr : BaseFPCondSelect { - let Inst{23-22} = 0b11; - let Predicates = [HasFullFP16]; - } - - def Srrr : BaseFPCondSelect { - let Inst{23-22} = 0b00; - } - - def Drrr : BaseFPCondSelect { - let Inst{23-22} = 0b01; - } - } // Uses = [NZCV] -} - -//--- -// Floating move immediate -//--- - -class BaseFPMoveImmediate - : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", - [(set regtype:$Rd, fpimmtype:$imm)]>, - Sched<[WriteFImm]> { - bits<5> Rd; - bits<8> imm; - let Inst{31-24} = 0b00011110; - let Inst{21} = 1; - let Inst{20-13} = imm; - let Inst{12-5} = 0b10000000; - let Inst{4-0} = Rd; -} - -multiclass FPMoveImmediate { - def Hi : BaseFPMoveImmediate { - let Inst{23-22} = 0b11; - let Predicates = [HasFullFP16]; - } - - def Si : BaseFPMoveImmediate { - let Inst{23-22} = 0b00; - } - - def Di : BaseFPMoveImmediate { - let Inst{23-22} = 0b01; - } -} -} // end of 'let Predicates = [HasFPARMv8]' - -//---------------------------------------------------------------------------- -// AdvSIMD -//---------------------------------------------------------------------------- - -let Predicates = [HasNEON] in { - -//---------------------------------------------------------------------------- -// AdvSIMD three register vector instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-21} = size; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-21} = size; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorPseudo pattern> - : Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>, - Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>; - -multiclass SIMDLogicalThreeVectorPseudo { - def v8i8 : BaseSIMDThreeSameVectorPseudo; - def v16i8 : BaseSIMDThreeSameVectorPseudo; - - def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), - (v4i16 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), - (v2i32 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), - (v1i64 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), - (v8i16 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), - (v4i32 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), - (v2i64 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; -} - -// All operand sizes distinguished in the encoding. -multiclass SIMDThreeSameVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorExtraPatterns { - def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)), - (!cast(inst#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), - (!cast(inst#"v4i16") V64:$LHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), - (!cast(inst#"v2i32") V64:$LHS, V64:$RHS)>; - - def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)), - (!cast(inst#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), - (!cast(inst#"v8i16") V128:$LHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), - (!cast(inst#"v4i32") V128:$LHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), - (!cast(inst#"v2i64") V128:$LHS, V128:$RHS)>; -} - -// As above, but D sized elements unsupported. -multiclass SIMDThreeSameVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, - asm, ".8b", - [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, - asm, ".16b", - [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, - asm, ".4h", - [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, - asm, ".8h", - [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, - asm, ".2s", - [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, - asm, ".4s", - [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; -} - -multiclass SIMDThreeSameVectorBHSTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// As above, but only B sized elements supported. -multiclass SIMDThreeSameVectorB opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), - (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; -} - -// As above, but only floating point elements supported. -multiclass SIMDThreeSameVectorFP opc, - string asm, SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, - asm, ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; - def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, - asm, ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, - asm, ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, - asm, ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, - asm, ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPCmp opc, - string asm, - SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; - def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPTied opc, - string asm, SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, - asm, ".4h", - [(set (v4f16 V64:$dst), - (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; - def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, - asm, ".8h", - [(set (v8f16 V128:$dst), - (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, - asm, ".2s", - [(set (v2f32 V64:$dst), - (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, - asm, ".4s", - [(set (v4f32 V128:$dst), - (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, - asm, ".2d", - [(set (v2f64 V128:$dst), - (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -// As above, but D and B sized elements unsupported. -multiclass SIMDThreeSameVectorHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// Logical three vector ops share opcode bits, and only use B sized elements. -multiclass SIMDLogicalThreeVector size, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; - - def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; -} - -multiclass SIMDLogicalThreeVectorTied size, - string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (v16i8 V128:$Rm)))]>; - - def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), - (v4i16 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), - (v2i32 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), - (v1i64 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), - (v8i16 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), - (v4i32 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), - (v2i64 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; -} - -// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract -// bytes from S-sized elements. -class BaseSIMDThreeSameVectorDot : - BaseSIMDThreeSameVectorTied { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); -} - -multiclass SIMDThreeSameVectorDot { - def v8i8 : BaseSIMDThreeSameVectorDot<0, U, Mixed, asm, ".2s", ".8b", V64, - v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128, - v4i32, v16i8, OpNode>; -} - -// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions -// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from -// 8H to 4S, when Q=1). -class BaseSIMDThreeSameVectorFML size, string asm, string kind1, - string kind2, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDThreeSameVectorTied { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); - let Inst{13} = b13; -} - -multiclass SIMDThreeSameVectorFML size, string asm, - SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameVectorFML<0, U, b13, size, asm, ".2s", ".2h", V64, - v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameVectorFML<1, U, b13, size, asm, ".4s", ".4h", V128, - v4f32, v8f16, OpNode>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD two register vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVector size, bits<5> opcode, - bits<2> size2, RegisterOperand regtype, string asm, - string dstkind, string srckind, list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - let Inst{20-19} = size2; - let Inst{18-17} = 0b00; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVectorTied size, bits<5> opcode, - bits<2> size2, RegisterOperand regtype, - string asm, string dstkind, string srckind, - list pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - let Inst{20-19} = size2; - let Inst{18-17} = 0b00; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Supports B, H, and S element sizes. -multiclass SIMDTwoVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -class BaseSIMDVectorLShiftLongBySize size, - RegisterOperand regtype, string asm, string dstkind, - string srckind, string amount> - : I<(outs V128:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # - "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = size; - let Inst{21-10} = 0b100001001110; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorLShiftLongBySizeBHS { - let hasSideEffects = 0 in { - def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, - "shll", ".8h", ".8b", "8">; - def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, - "shll2", ".8h", ".16b", "8">; - def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, - "shll", ".4s", ".4h", "16">; - def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, - "shll2", ".4s", ".8h", "16">; - def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, - "shll", ".2d", ".2s", "32">; - def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, - "shll2", ".2d", ".4s", "32">; - } -} - -// Supports all element sizes. -multiclass SIMDLongTwoVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -multiclass SIMDLongTwoVectorTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), - (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), - (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), - (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), - (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), - (v4i32 V128:$Rn)))]>; -} - -// Supports all element sizes, except 1xD. -multiclass SIMDTwoVectorBHSDTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorBHSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - - -// Supports only B element sizes. -multiclass SIMDTwoVectorB size, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - -} - -// Supports only B and H element sizes. -multiclass SIMDTwoVectorBH opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; -} - -// Supports H, S and D element sizes, uses high bit of the size field -// as an extra opcode bit. -multiclass SIMDTwoVectorFP opc, string asm, - SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, - asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; - def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, - asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -// Supports only S and D element sizes -multiclass SIMDTwoVectorSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - - def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -multiclass FRIntNNTVector : - SIMDTwoVectorSD; - -// Supports only S element size. -multiclass SIMDTwoVectorS opc, string asm, - SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - - -multiclass SIMDTwoVectorFPToInt opc, string asm, - SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; - def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorIntToFP opc, string asm, - SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, - asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, - asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDMixedTwoVector size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list pattern> - : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDMixedTwoVectorTied size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list pattern> - : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDMixedTwoVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, - asm#"2", ".16b", ".8h", []>; - def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; - def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), - (!cast(NAME # "v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), - (!cast(NAME # "v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), - (!cast(NAME # "v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -class BaseSIMDCmpTwoVector size, bits<2> size2, - bits<5> opcode, RegisterOperand regtype, string asm, - string kind, string zero, ValueType dty, - ValueType sty, SDNode OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # - "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", - [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - let Inst{20-19} = size2; - let Inst{18-17} = 0b00; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Comparisons support all element sizes, except 1xD. -multiclass SIMDCmpTwoVector opc, string asm, - SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, - asm, ".8b", "0", - v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, - asm, ".16b", "0", - v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, - asm, ".4h", "0", - v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, - asm, ".8h", "0", - v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, - asm, ".2s", "0", - v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, - asm, ".4s", "0", - v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, - asm, ".2d", "0", - v2i64, v2i64, OpNode>; -} - -// FP Comparisons support only S and D element sizes (and H for v8.2a). -multiclass SIMDFPCmpTwoVector opc, - string asm, SDNode OpNode> { - - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, - asm, ".4h", "0.0", - v4i16, v4f16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, - asm, ".8h", "0.0", - v8i16, v8f16, OpNode>; - } // Predicates = [HasNEON, HasFullFP16] - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, - asm, ".2s", "0.0", - v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, - asm, ".4s", "0.0", - v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, - asm, ".2d", "0.0", - v2i64, v2f64, OpNode>; - - let Predicates = [HasNEON, HasFullFP16] in { - def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; - } - def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; - let Predicates = [HasNEON, HasFullFP16] in { - def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; - } - def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDFPCvtTwoVector size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list pattern> - : I<(outs outtype:$Rd), (ins intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDFPWidenTwoVector opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, - asm, ".4s", ".4h", []>; - def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, - asm#"2", ".4s", ".8h", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, - asm, ".2d", ".2s", []>; - def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, - asm#"2", ".2d", ".4s", []>; -} - -multiclass SIMDFPNarrowTwoVector opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, - asm, ".4h", ".4s", []>; - def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", []>; - def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; -} - -multiclass SIMDFPInexactCvtTwoVector opc, string asm, - Intrinsic OpNode> { - def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", - [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; - def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), - (!cast(NAME # "v4f32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register different-size vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVector size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list pattern> - : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVectorTied size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// FIXME: TableGen doesn't know how to deal with expanded types that also -// change the element count (in this case, placing the results in -// the high elements of the result register rather than the low -// elements). Until that's fixed, we can't code-gen those. -multiclass SIMDNarrowThreeVectorBHS opc, string asm, - Intrinsic IntOp> { - def v8i16_v8i8 : BaseSIMDDifferentThreeVector; - def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v4i16 : BaseSIMDDifferentThreeVector; - def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v2i64_v2i32 : BaseSIMDDifferentThreeVector; - def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied; - - - // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in - // a version attached to an instruction. - def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), - (v8i16 V128:$Rm))), - (!cast(NAME # "v8i16_v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), - (v4i32 V128:$Rm))), - (!cast(NAME # "v4i32_v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), - (v2i64 V128:$Rm))), - (!cast(NAME # "v2i64_v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDDifferentThreeVectorBD opc, string asm, - Intrinsic IntOp> { - def v8i8 : BaseSIMDDifferentThreeVector; - def v16i8 : BaseSIMDDifferentThreeVector; - let Predicates = [HasAES] in { - def v1i64 : BaseSIMDDifferentThreeVector; - def v2i64 : BaseSIMDDifferentThreeVector; - } - - def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), - (v8i8 (extract_high_v16i8 V128:$Rm)))), - (!cast(NAME#"v16i8") V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDLongThreeVectorHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorBHSabdl opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorTiedBHSabal opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDLongThreeVectorBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorTiedBHS opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, - SDPatternOperator Accum> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDWideThreeVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector -//---------------------------------------------------------------------------- - -class BaseSIMDBitwiseExtract - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # - "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", - [(set (vty regtype:$Rd), - (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, - Sched<[!if(size, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> imm; - let Inst{31} = 0; - let Inst{30} = size; - let Inst{29-21} = 0b101110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-11} = imm; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDBitwiseExtract { - def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> { - let imm{3} = 0; - } - def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; -} - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -class BaseSIMDZipVector size, bits<3> opc, RegisterOperand regtype, - string asm, string kind, SDNode OpNode, ValueType valty> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "", - [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDZipVectoropc, string asm, - SDNode OpNode> { - def v8i8 : BaseSIMDZipVector<0b000, opc, V64, - asm, ".8b", OpNode, v8i8>; - def v16i8 : BaseSIMDZipVector<0b001, opc, V128, - asm, ".16b", OpNode, v16i8>; - def v4i16 : BaseSIMDZipVector<0b010, opc, V64, - asm, ".4h", OpNode, v4i16>; - def v8i16 : BaseSIMDZipVector<0b011, opc, V128, - asm, ".8h", OpNode, v8i16>; - def v2i32 : BaseSIMDZipVector<0b100, opc, V64, - asm, ".2s", OpNode, v2i32>; - def v4i32 : BaseSIMDZipVector<0b101, opc, V128, - asm, ".4s", OpNode, v4i32>; - def v2i64 : BaseSIMDZipVector<0b111, opc, V128, - asm, ".2d", OpNode, v2i64>; - - def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)), - (!cast(NAME#"v4i16") V64:$Rn, V64:$Rm)>; - def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)), - (!cast(NAME#"v8i16") V128:$Rn, V128:$Rm)>; - def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), - (!cast(NAME#"v2i32") V64:$Rn, V64:$Rm)>; - def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), - (!cast(NAME#"v4i32") V128:$Rn, V128:$Rm)>; - def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), - (!cast(NAME#"v2i64") V128:$Rn, V128:$Rm)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register scalar instructions -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar size, bits<5> opcode, - RegisterClass regtype, string asm, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-21} = size; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, - dag oops, dag iops, string asm, - list pattern> - : I, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = R; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDThreeScalarD opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; -} - -multiclass SIMDThreeScalarBHSD opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; - def v1i8 : BaseSIMDThreeScalar; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; - def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), - (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass SIMDThreeScalarHS opc, string asm, - SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; -} - -multiclass SIMDThreeScalarHSTied opc, string asm> { - def v1i32: BaseSIMDThreeScalarTied; - def v1i16: BaseSIMDThreeScalarTied; -} - -multiclass SIMDFPThreeScalar opc, string asm, - SDPatternOperator OpNode = null_frag, - Predicate pred = HasNEON> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - let Predicates = [pred] in { - def NAME#64 : BaseSIMDThreeScalar; - def NAME#32 : BaseSIMDThreeScalar; - } - let Predicates = [pred, HasFullFP16] in { - def NAME#16 : BaseSIMDThreeScalar; - } - } - - def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass SIMDThreeScalarFPCmp opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def NAME#64 : BaseSIMDThreeScalar; - def NAME#32 : BaseSIMDThreeScalar; - let Predicates = [HasNEON, HasFullFP16] in { - def NAME#16 : BaseSIMDThreeScalar; - } // Predicates = [HasNEON, HasFullFP16] - } - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -class BaseSIMDThreeScalarMixed size, bits<5> opcode, - dag oops, dag iops, string asm, string cstr, list pat> - : I, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed; - def i32 : BaseSIMDThreeScalarMixed; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedTiedHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed; - def i32 : BaseSIMDThreeScalarMixed; -} - -//---------------------------------------------------------------------------- -// AdvSIMD two register scalar instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar size, bits<2> size2, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list pat> - : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, - "\t$Rd, $Rn", "", pat>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - let Inst{20-19} = size2; - let Inst{18-17} = 0b00; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalarTied size, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list pat> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, - "\t$Rd, $Rn", "$Rd = $dst", pat>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar size, bits<2> size2, bits<5> opcode, - RegisterClass regtype, string asm, string zero> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "\t$Rd, $Rn, #" # zero, "", []>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - let Inst{20-19} = size2; - let Inst{18-17} = 0b00; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDInexactCvtTwoScalar opcode, string asm> - : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", - [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-17} = 0b011111100110000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDCmpTwoScalarD opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - - def : Pat<(v1i64 (OpNode FPR64:$Rn)), - (!cast(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDFPCmpTwoScalar opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - def v1i32rz : BaseSIMDCmpTwoScalar; - let Predicates = [HasNEON, HasFullFP16] in { - def v1i16rz : BaseSIMDCmpTwoScalar; - } - - def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; - def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; - let Predicates = [HasNEON, HasFullFP16] in { - def : InstAlias(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; - } - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), - (!cast(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarD opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), - (!cast(NAME # "v1i64") FPR64:$Rn)>; -} - -multiclass SIMDFPTwoScalar opc, string asm, - Predicate pred = HasNEON> { - let Predicates = [pred] in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - } - let Predicates = [pred, HasFullFP16] in { - def v1f16 : BaseSIMDTwoScalar; - } -} - -multiclass SIMDFPTwoScalarCVT opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - let Predicates = [HasNEON, HasFullFP16] in { - def v1i16 : BaseSIMDTwoScalar; - } -} - -multiclass SIMDTwoScalarBHSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), - (!cast(NAME # v1i64) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarBHSDTied opc, string asm, - Intrinsic OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalarTied; - def v1i32 : BaseSIMDTwoScalarTied; - def v1i16 : BaseSIMDTwoScalarTied; - def v1i8 : BaseSIMDTwoScalarTied; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), - (!cast(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; -} - - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDTwoScalarMixedBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDPairwiseScalar size, bits<5> opcode, - RegisterOperand regtype, RegisterOperand vectype, - string asm, string kind> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDPairwiseScalarD opc, string asm> { - def v2i64p : BaseSIMDPairwiseScalar; -} - -multiclass SIMDFPPairwiseScalar opc, string asm> { - let Predicates = [HasNEON, HasFullFP16] in { - def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, - asm, ".2h">; - } - def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, - asm, ".2s">; - def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128, - asm, ".2d">; -} - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDAcrossLanes size, bits<5> opcode, - RegisterClass regtype, RegisterOperand vectype, - string asm, string kind, list pattern> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDAcrossLanesBHS opcode, - string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, - asm, ".4s", []>; -} - -multiclass SIMDAcrossLanesHSD opcode, string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, - asm, ".4s", []>; -} - -multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, - Intrinsic intOp> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, - asm, ".4h", - [(set (f16 FPR16:$Rd), (intOp (v4f16 V64:$Rn)))]>; - def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, - asm, ".8h", - [(set (f16 FPR16:$Rd), (intOp (v8f16 V128:$Rn)))]>; - } // Predicates = [HasNEON, HasFullFP16] - def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, - asm, ".4s", - [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -// FIXME: There has got to be a better way to factor these. ugh. - -class BaseSIMDInsDup pattern> - : I, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{15} = 0; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDDupFromMain imm5, string size, ValueType vectype, - RegisterOperand vecreg, RegisterClass regtype> - : BaseSIMDInsDup { - let Inst{20-16} = imm5; - let Inst{14-11} = 0b0001; -} - -class SIMDDupFromElement - : BaseSIMDInsDup { - let Inst{14-11} = 0b0000; -} - -class SIMDDup64FromElement - : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, - VectorIndexD, AArch64duplane64> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; -} - -class SIMDDup32FromElement - : SIMDDupFromElement { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; -} - -class SIMDDup16FromElement - : SIMDDupFromElement { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; -} - -class SIMDDup8FromElement - : SIMDDupFromElement { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; -} - -class BaseSIMDMov imm4, RegisterClass regtype, - Operand idxtype, string asm, list pattern> - : BaseSIMDInsDup { - let Inst{14-11} = imm4; -} - -class SIMDSMov - : BaseSIMDMov; -class SIMDUMov - : BaseSIMDMov; - -class SIMDMovAlias - : InstAlias; - -multiclass SMov { - // SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME) - // streaming mode. - let Predicates = [HasNEONorStreamingSVE] in { - def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> { - let Inst{20-16} = 0b00001; - } - def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> { - let Inst{20-16} = 0b00001; - } - def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> { - let Inst{20-16} = 0b00010; - } - def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> { - let Inst{20-16} = 0b00010; - } - def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> { - let Inst{20-16} = 0b00100; - } - } - def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } -} - -multiclass UMov { - // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME) - // streaming mode. - let Predicates = [HasNEONorStreamingSVE] in { - def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> { - let Inst{20-16} = 0b00001; - } - def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> { - let Inst{20-16} = 0b00010; - } - def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> { - let Inst{20-16} = 0b00100; - } - def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> { - let Inst{20-16} = 0b01000; - } - def : SIMDMovAlias<"mov", ".s", - !cast(NAME # vi32_idx0), - GPR32, VectorIndex0>; - def : SIMDMovAlias<"mov", ".d", - !cast(NAME # vi64_idx0), - GPR64, VectorIndex0>; - } - def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - def : SIMDMovAlias<"mov", ".s", - !cast(NAME#"vi32"), - GPR32, VectorIndexS>; - def : SIMDMovAlias<"mov", ".d", - !cast(NAME#"vi64"), - GPR64, VectorIndexD>; -} - -class SIMDInsFromMain - : BaseSIMDInsDup<1, 0, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", - "{\t$Rd" # size # "$idx, $Rn" # - "|" # size # "\t$Rd$idx, $Rn}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { - let Inst{14-11} = 0b0011; -} - -class SIMDInsFromElement - : BaseSIMDInsDup<1, 1, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", - "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # - "|" # size # "\t$Rd$idx, $Rn$idx2}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert - (vectype V128:$Rd), - (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), - idxtype:$idx))]>; - -class SIMDInsMainMovAlias - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # - "|" # size #"\t$dst$idx, $src}", - (inst V128:$dst, idxtype:$idx, regtype:$src)>; -class SIMDInsElementMovAlias - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" - # "|" # size #"\t$dst$idx, $src$idx2}", - (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; - - -multiclass SIMDIns { - def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { - bits<4> idx; - bits<4> idx2; - let Inst{20-17} = idx; - let Inst{16} = 1; - let Inst{14-11} = idx2; - } - def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { - bits<3> idx; - bits<3> idx2; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - let Inst{14-12} = idx2; - let Inst{11} = {?}; - } - def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { - bits<2> idx; - bits<2> idx2; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - let Inst{14-13} = idx2; - let Inst{12-11} = {?,?}; - } - def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { - bits<1> idx; - bits<1> idx2; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - let Inst{14} = idx2; - let Inst{13-11} = {?,?,?}; - } - - // For all forms of the INS instruction, the "mov" mnemonic is the - // preferred alias. Why they didn't just call the instruction "mov" in - // the first place is a very good question indeed... - def : SIMDInsMainMovAlias<".b", !cast(NAME#"vi8gpr"), - GPR32, VectorIndexB>; - def : SIMDInsMainMovAlias<".h", !cast(NAME#"vi16gpr"), - GPR32, VectorIndexH>; - def : SIMDInsMainMovAlias<".s", !cast(NAME#"vi32gpr"), - GPR32, VectorIndexS>; - def : SIMDInsMainMovAlias<".d", !cast(NAME#"vi64gpr"), - GPR64, VectorIndexD>; - - def : SIMDInsElementMovAlias<".b", !cast(NAME#"vi8lane"), - VectorIndexB>; - def : SIMDInsElementMovAlias<".h", !cast(NAME#"vi16lane"), - VectorIndexH>; - def : SIMDInsElementMovAlias<".s", !cast(NAME#"vi32lane"), - VectorIndexS>; - def : SIMDInsElementMovAlias<".d", !cast(NAME#"vi64lane"), - VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookup len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookupTied len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -class SIMDTableLookupAlias - : InstAlias; - -multiclass SIMDTableLookup { - def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v16i8Four"), - V128, VecListFour128>; -} - -multiclass SIMDTableLookupTied { - def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v16i8Four"), - V128, VecListFour128>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar DUP -//---------------------------------------------------------------------------- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDScalarDUP - : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm, - "{\t$dst, $src" # kind # "$idx" # - "|\t$dst, $src$idx}", "", []>, - Sched<[WriteVd]> { - bits<5> dst; - bits<5> src; - let Inst{31-21} = 0b01011110000; - let Inst{15-10} = 0b000001; - let Inst{9-5} = src; - let Inst{4-0} = dst; -} - -class SIMDScalarDUPAlias - : InstAlias; - - -multiclass SIMDScalarDUP { - def i8 : BaseSIMDScalarDUP { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def i16 : BaseSIMDScalarDUP { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def i32 : BaseSIMDScalarDUP { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def i64 : BaseSIMDScalarDUP { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src), - VectorIndexD:$idx)))), - (!cast(NAME # i64) V128:$src, VectorIndexD:$idx)>; - - // 'DUP' mnemonic aliases. - def : SIMDScalarDUPAlias<"dup", ".b", - !cast(NAME#"i8"), - FPR8, V128, VectorIndexB>; - def : SIMDScalarDUPAlias<"dup", ".h", - !cast(NAME#"i16"), - FPR16, V128, VectorIndexH>; - def : SIMDScalarDUPAlias<"dup", ".s", - !cast(NAME#"i32"), - FPR32, V128, VectorIndexS>; - def : SIMDScalarDUPAlias<"dup", ".d", - !cast(NAME#"i64"), - FPR64, V128, VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD modified immediate instructions -//---------------------------------------------------------------------------- - -class BaseSIMDModifiedImm pattern> - : I, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<8> imm8; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-19} = 0b0111100000; - let Inst{18-16} = imm8{7-5}; - let Inst{11} = op2; - let Inst{10} = 1; - let Inst{9-5} = imm8{4-0}; - let Inst{4-0} = Rd; -} - -class BaseSIMDModifiedImmVector pattern> - : BaseSIMDModifiedImm { - let DecoderMethod = "DecodeModImmInstruction"; -} - -class BaseSIMDModifiedImmVectorTied pattern> - : BaseSIMDModifiedImm { - let DecoderMethod = "DecodeModImmTiedInstruction"; -} - -class BaseSIMDModifiedImmVectorShift b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftTied b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVectorTied { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - - -class BaseSIMDModifiedImmVectorShiftHalf b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftHalfTied b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVectorTied { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -multiclass SIMDModifiedImmVectorShift hw_cmode, bits<2> w_cmode, - string asm> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, - asm, ".4h", []>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, - asm, ".8h", []>; - - def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, - asm, ".2s", []>; - def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, - asm, ".4s", []>; -} - -multiclass SIMDModifiedImmVectorShiftTied hw_cmode, - bits<2> w_cmode, string asm, - SDNode OpNode> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - - def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; -} - -class SIMDModifiedImmMoveMSL cmode, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<1> shift; - let Inst{15-13} = cmode{3-1}; - let Inst{12} = shift; -} - -class SIMDModifiedImmVectorNoShift cmode, - RegisterOperand vectype, - Operand imm_type, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - let Inst{15-12} = cmode; -} - -class SIMDModifiedImmScalarNoShift cmode, string asm, - list pattern> - : BaseSIMDModifiedImm { - let Inst{15-12} = cmode; - let DecoderMethod = "DecodeModImmInstruction"; -} - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexed size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list pattern> - : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), - asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexedTied size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list pattern> - : I<(outs dst_reg:$dst), - (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -//---------------------------------------------------------------------------- -// Armv8.6 BFloat16 Extension -//---------------------------------------------------------------------------- -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in { - -class BaseSIMDThreeSameVectorBFDot - : BaseSIMDThreeSameVectorTied { - let AsmString = !strconcat(asm, - "{\t$Rd" # kind1 # ", $Rn" # kind2 # - ", $Rm" # kind2 # "}"); -} - -multiclass SIMDThreeSameVectorBFDot { - def v4bf16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64, - v2f32, v4bf16>; - def v8bf16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128, - v4f32, v8bf16>; -} - -class BaseSIMDThreeSameVectorBF16DotI - : BaseSIMDIndexedTied { - - bits<2> idx; - let Inst{21} = idx{0}; // L - let Inst{11} = idx{1}; // H -} - -multiclass SIMDThreeSameVectorBF16DotI { - - def v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h", - ".2h", V64, v2f32, v4bf16>; - def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h", - ".2h", V128, v4f32, v8bf16>; -} - -class SIMDBF16MLAL - : BaseSIMDThreeSameVectorTied { - let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}"); -} - -class SIMDBF16MLALIndex - : I<(outs V128:$dst), - (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm, - "{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst", - [(set (v4f32 V128:$dst), - (v4f32 (OpNode (v4f32 V128:$Rd), - (v8bf16 V128:$Rn), - (v8bf16 - (AArch64duplane16 (v8bf16 V128_lo:$Rm), - VectorIndexH:$idx)))))]>, - Sched<[WriteVq]> { - bits<5> Rd; - bits<5> Rn; - bits<4> Rm; - bits<3> idx; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-22} = 0b00111111; - let Inst{21-20} = idx{1-0}; - let Inst{19-16} = Rm; - let Inst{15-12} = 0b1111; - let Inst{11} = idx{2}; // H - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDThreeSameVectorBF16MatrixMul - : BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101, - V128, asm, ".4s", - [(set (v4f32 V128:$dst), - (int_aarch64_neon_bfmmla (v4f32 V128:$Rd), - (v8bf16 V128:$Rn), - (v8bf16 V128:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h", - ", $Rm", ".8h", "}"); -} - -class SIMD_BFCVTN - : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128, - "bfcvtn", ".4h", ".4s", - [(set (v8bf16 V128:$Rd), - (int_aarch64_neon_bfcvtn (v4f32 V128:$Rn)))]>; - -class SIMD_BFCVTN2 - : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128, - "bfcvtn2", ".8h", ".4s", - [(set (v8bf16 V128:$dst), - (int_aarch64_neon_bfcvtn2 (v8bf16 V128:$Rd), (v4f32 V128:$Rn)))]>; - -class BF16ToSinglePrecision - : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", - [(set (bf16 FPR16:$Rd), (int_aarch64_neon_bfcvt (f32 FPR32:$Rn)))]>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-10} = 0b0001111001100011010000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} -} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0 - -//---------------------------------------------------------------------------- -// Armv8.6 Matrix Multiply Extension -//---------------------------------------------------------------------------- - -class SIMDThreeSameVectorMatMul - : BaseSIMDThreeSameVectorTied<1, U, 0b100, {0b1010, B}, V128, asm, ".4s", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (v16i8 V128:$Rn), - (v16i8 V128:$Rm)))]> { - let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b}"; -} - -//---------------------------------------------------------------------------- -// ARMv8.2-A Dot Product Instructions (Indexed) -class BaseSIMDThreeSameVectorDotIndex size, string asm, - string dst_kind, string lhs_kind, string rhs_kind, - RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDIndexedTied { - bits<2> idx; - let Inst{21} = idx{0}; // L - let Inst{11} = idx{1}; // H -} - -multiclass SIMDThreeSameVectorDotIndex size, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, Mixed, size, asm, ".2s", ".8b", ".4b", - V64, v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, Mixed, size, asm, ".4s", ".16b", ".4b", - V128, v4i32, v16i8, OpNode>; -} - -// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) -class BaseSIMDThreeSameVectorFMLIndex opc, string asm, - string dst_kind, string lhs_kind, - string rhs_kind, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDIndexedTied { - // idx = H:L:M - bits<3> idx; - let Inst{11} = idx{2}; // H - let Inst{21} = idx{1}; // L - let Inst{20} = idx{0}; // M -} - -multiclass SIMDThreeSameVectorFMLIndex opc, string asm, - SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h", - V64, v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h", - V128, v4f32, v8f16, OpNode>; -} - -multiclass SIMDFPIndexed opc, string asm, - SDPatternOperator OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, - V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4f16 V64:$Rd), - (OpNode (v4f16 V64:$Rn), - (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8f16 V128:$Rd), - (OpNode (v8f16 V128:$Rn), - (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - } // Predicates = [HasNEON, HasFullFP16] - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2f32 V64:$Rd), - (OpNode (v2f32 V64:$Rn), - (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4f32 V128:$Rd), - (OpNode (v4f32 V128:$Rn), - (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", - [(set (v2f64 V128:$Rd), - (OpNode (v2f64 V128:$Rn), - (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - let Predicates = [HasNEON, HasFullFP16] in { - def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, - FPR16Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", - [(set (f16 FPR16Op:$Rd), - (OpNode (f16 FPR16Op:$Rn), - (f16 (vector_extract (v8f16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - } // Predicates = [HasNEON, HasFullFP16] - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (f32 FPR32Op:$Rd), - (OpNode (f32 FPR32Op:$Rn), - (f32 (vector_extract (v4f32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", - [(set (f64 FPR64Op:$Rd), - (OpNode (f64 FPR64Op:$Rn), - (f64 (vector_extract (v2f64 V128:$Rm), - VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDFPIndexedTiedPatterns { - let Predicates = [HasNEON, HasFullFP16] in { - // Patterns for f16: DUPLANE, DUP scalar and vector_extract. - def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), - (AArch64duplane16 (v8f16 V128_lo:$Rm), - VectorIndexH:$idx))), - (!cast(INST # "v8i16_indexed") - V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>; - def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), - (AArch64dup (f16 FPR16Op_lo:$Rm)))), - (!cast(INST # "v8i16_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), (f16 FPR16Op_lo:$Rm), hsub), (i64 0))>; - - def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), - (AArch64duplane16 (v8f16 V128_lo:$Rm), - VectorIndexH:$idx))), - (!cast(INST # "v4i16_indexed") - V64:$Rd, V64:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>; - def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), - (AArch64dup (f16 FPR16Op_lo:$Rm)))), - (!cast(INST # "v4i16_indexed") V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), (f16 FPR16Op_lo:$Rm), hsub), (i64 0))>; - - def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn), - (vector_extract (v8f16 V128_lo:$Rm), VectorIndexH:$idx))), - (!cast(INST # "v1i16_indexed") FPR16:$Rd, FPR16:$Rn, - V128_lo:$Rm, VectorIndexH:$idx)>; - } // Predicates = [HasNEON, HasFullFP16] - - // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (AArch64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast(INST # v2i32_indexed) - V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (AArch64dup (f32 FPR32Op:$Rm)))), - (!cast(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - - // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (AArch64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast(INST # "v4i32_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (AArch64dup (f32 FPR32Op:$Rm)))), - (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (AArch64duplane64 (v2f64 V128:$Rm), - VectorIndexD:$idx))), - (!cast(INST # "v2i64_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (AArch64dup (f64 FPR64Op:$Rm)))), - (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // Covers 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), - (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), - (!cast(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexD:$idx)>; -} - -multiclass SIMDFPIndexedTied opc, string asm> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - } // Predicates = [HasNEON, HasFullFP16] - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - let Predicates = [HasNEON, HasFullFP16] in { - def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, - FPR16Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - } // Predicates = [HasNEON, HasFullFP16] - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDIndexedHSPatterns { - - def : Pat<(v4i16 (OpNodeLane - (v4i16 V64:$Rn), (v4i16 V64_lo:$Rm), - VectorIndexS32b:$idx)), - (!cast(NAME # v4i16_indexed) $Rn, - (SUBREG_TO_REG (i32 0), (v4i16 V64_lo:$Rm), dsub), - (UImmS1XForm $idx))>; - - def : Pat<(v4i16 (OpNodeLaneQ - (v4i16 V64:$Rn), (v8i16 V128_lo:$Rm), - VectorIndexH32b:$idx)), - (!cast(NAME # v4i16_indexed) $Rn, $Rm, - (UImmS1XForm $idx))>; - - def : Pat<(v8i16 (OpNodeLane - (v8i16 V128:$Rn), (v4i16 V64_lo:$Rm), - VectorIndexS32b:$idx)), - (!cast(NAME # v8i16_indexed) $Rn, - (SUBREG_TO_REG (i32 0), $Rm, dsub), - (UImmS1XForm $idx))>; - - def : Pat<(v8i16 (OpNodeLaneQ - (v8i16 V128:$Rn), (v8i16 V128_lo:$Rm), - VectorIndexH32b:$idx)), - (!cast(NAME # v8i16_indexed) $Rn, $Rm, - (UImmS1XForm $idx))>; - - def : Pat<(v2i32 (OpNodeLane - (v2i32 V64:$Rn), (v2i32 V64:$Rm), - VectorIndexD32b:$idx)), - (!cast(NAME # v2i32_indexed) $Rn, - (SUBREG_TO_REG (i32 0), (v2i32 V64_lo:$Rm), dsub), - (UImmS1XForm $idx))>; - - def : Pat<(v2i32 (OpNodeLaneQ - (v2i32 V64:$Rn), (v4i32 V128:$Rm), - VectorIndexS32b:$idx)), - (!cast(NAME # v2i32_indexed) $Rn, $Rm, - (UImmS1XForm $idx))>; - - def : Pat<(v4i32 (OpNodeLane - (v4i32 V128:$Rn), (v2i32 V64:$Rm), - VectorIndexD32b:$idx)), - (!cast(NAME # v4i32_indexed) $Rn, - (SUBREG_TO_REG (i32 0), $Rm, dsub), - (UImmS1XForm $idx))>; - - def : Pat<(v4i32 (OpNodeLaneQ - (v4i32 V128:$Rn), - (v4i32 V128:$Rm), - VectorIndexS32b:$idx)), - (!cast(NAME # v4i32_indexed) $Rn, $Rm, - (UImmS1XForm $idx))>; - -} - -multiclass SIMDIndexedHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR16Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i32 FPR32Op:$Rd), - (OpNode FPR32Op:$Rn, - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHSTied opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSD opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, - SDPatternOperator Accum> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_aarch64_neon_sqdmull - (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an - // intermediate EXTRACT_SUBREG would be untyped. - def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), - (i32 (vector_extract (v4i32 - (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx)))), - (i64 0))))), - (EXTRACT_SUBREG - (!cast(NAME # v4i16_indexed) - (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, - V128_lo:$Rm, VectorIndexH:$idx), - ssub)>; - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_aarch64_neon_sqdmull - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 - (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_aarch64_neon_sqdmull - (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_aarch64_neon_sqdmull - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 - (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i64 FPR64Op:$dst), - (Accum (i64 FPR64Op:$Rd), - (i64 (int_aarch64_neon_sqdmulls_scalar - (i32 FPR32Op:$Rn), - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedLongSD opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -multiclass SIMDVectorIndexedLongSDTied opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift by immediate -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShift opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list pattern> - : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list pattern> - : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, - Sched<[WriteVd]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDFPScalarRShift opc, string asm> { - let Predicates = [HasNEON, HasFullFP16] in { - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - } // Predicates = [HasNEON, HasFullFP16] - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -multiclass SIMDScalarRShiftD opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), - (!cast(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; -} - -multiclass SIMDScalarRShiftDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def d : BaseSIMDScalarShiftTied { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (!cast(NAME # "d") FPR64:$Rd, FPR64:$Rn, - vecshiftR64:$imm)>; -} - -multiclass SIMDScalarLShiftD opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), - (!cast(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarLShiftDTied opc, string asm> { - def d : BaseSIMDScalarShiftTied { - let Inst{21-16} = imm{5-0}; - } -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarRShiftBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } -} - -multiclass SIMDScalarLShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), - (!cast(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; -} - -multiclass SIMDScalarRShiftBHSD opc, string asm> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD vector x indexed element -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShift opc, bits<7> fixed_imm, - RegisterOperand dst_reg, RegisterOperand src_reg, - Operand immtype, - string asm, string dst_kind, string src_kind, - list pattern> - : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, - RegisterOperand vectype1, RegisterOperand vectype2, - Operand immtype, - string asm, string dst_kind, string src_kind, - list pattern> - : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorRShiftSD opc, string asm, - Intrinsic OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - } // Predicates = [HasNEON, HasFullFP16] - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftToFP opc, string asm, - Intrinsic OpNode> { - let Predicates = [HasNEON, HasFullFP16] in { - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, - asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, - asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - } // Predicates = [HasNEON, HasFullFP16] - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftNarrowBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V128, vecshiftR16Narrow, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR16Narrow, - asm#"2", ".16b", ".8h", []> { - bits<3> imm; - let Inst{18-16} = imm; - let hasSideEffects = 0; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V128, vecshiftR32Narrow, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR32Narrow, - asm#"2", ".8h", ".4s", []> { - bits<4> imm; - let Inst{19-16} = imm; - let hasSideEffects = 0; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V128, vecshiftR64Narrow, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR64Narrow, - asm#"2", ".4s", ".2d", []> { - bits<5> imm; - let Inst{20-16} = imm; - let hasSideEffects = 0; - } - - // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions - // themselves, so put them here instead. - - // Patterns involving what's effectively an insert high and a normal - // intrinsic, represented by CONCAT_VECTORS. - def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)), - (!cast(NAME # "v16i8_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)), - (!cast(NAME # "v8i16_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)), - (!cast(NAME # "v4i32_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR64Narrow:$imm)>; -} - -multiclass SIMDVectorLShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDVectorRShiftBHSDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftBHSDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftLongBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V128, V64, vecshiftL8, asm, ".8h", ".8b", - [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm#"2", ".8h", ".16b", - [(set (v8i16 V128:$Rd), - (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V128, V64, vecshiftL16, asm, ".4s", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm#"2", ".4s", ".8h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { - - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V128, V64, vecshiftL32, asm, ".2d", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm#"2", ".2d", ".4s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } -} - - -//--- -// Vector load/store -//--- -// SIMD ldX/stX no-index memory references don't allow the optional -// ", #0" constant and handle post-indexing explicitly, so we use -// a more specialized parse method for them. Otherwise, it's the same as -// the general GPR64sp handling. - -class BaseSIMDLdSt opcode, bits<2> size, - string asm, dag oops, dag iops, list pattern> - : I { - bits<5> Vt; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011000; - let Inst{22} = L; - let Inst{21-16} = 0b000000; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = Rn; - let Inst{4-0} = Vt; -} - -class BaseSIMDLdStPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : I { - bits<5> Vt; - bits<5> Rn; - bits<5> Xm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011001; - let Inst{22} = L; - let Inst{21} = 0; - let Inst{20-16} = Xm; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = Rn; - let Inst{4-0} = Vt; -} - -// The immediate form of AdvSIMD post-indexed addressing is encoded with -// register post-index addressing from the zero register. -multiclass SIMDLdStAliases { - // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" - // "ld1\t$Vt, [$Rn], #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(BaseName # Count # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # layout):$Vt, - XZR), 1>; - - // E.g. "ld1.8b { v0, v1 }, [x1], #16" - // "ld1.8b\t$Vt, [$Rn], #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(BaseName # Count # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # Size):$Vt, - XZR), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1]" - // "ld1\t$Vt, [$Rn]" - // may get mapped to - // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn) - def : InstAlias(BaseName # Count # "v" # layout) - !cast("VecList" # Count # Size):$Vt, - GPR64sp:$Rn), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1], x2" - // "ld1\t$Vt, [$Rn], $Xm" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm) - def : InstAlias(BaseName # Count # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # Size):$Vt, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass BaseSIMDLdN opcode> { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, - (outs !cast(veclist # "16b"):$Vt), - (ins GPR64sp:$Rn), []>; - def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, - (outs !cast(veclist # "8h"):$Vt), - (ins GPR64sp:$Rn), []>; - def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, - (outs !cast(veclist # "4s"):$Vt), - (ins GPR64sp:$Rn), []>; - def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, - (outs !cast(veclist # "2d"):$Vt), - (ins GPR64sp:$Rn), []>; - def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, - (outs !cast(veclist # "8b"):$Vt), - (ins GPR64sp:$Rn), []>; - def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, - (outs !cast(veclist # "4h"):$Vt), - (ins GPR64sp:$Rn), []>; - def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, - (outs !cast(veclist # "2s"):$Vt), - (ins GPR64sp:$Rn), []>; - - - def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, - (outs GPR64sp:$wback, - !cast(veclist # "16b"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, - (outs GPR64sp:$wback, - !cast(veclist # "8h"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, - (outs GPR64sp:$wback, - !cast(veclist # "4s"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, - (outs GPR64sp:$wback, - !cast(veclist # "2d"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, - (outs GPR64sp:$wback, - !cast(veclist # "8b"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, - (outs GPR64sp:$wback, - !cast(veclist # "4h"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, - (outs GPR64sp:$wback, - !cast(veclist # "2s"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; -} - -// Only ld1/st1 has a v1d version. -multiclass BaseSIMDStN opcode> { - let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { - def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "16b"):$Vt, - GPR64sp:$Rn), []>; - def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "8h"):$Vt, - GPR64sp:$Rn), []>; - def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "4s"):$Vt, - GPR64sp:$Rn), []>; - def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "2d"):$Vt, - GPR64sp:$Rn), []>; - def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "8b"):$Vt, - GPR64sp:$Rn), []>; - def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "4h"):$Vt, - GPR64sp:$Rn), []>; - def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "2s"):$Vt, - GPR64sp:$Rn), []>; - - def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "16b"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "8h"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "4s"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "2d"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "8b"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "4h"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "2s"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; -} - -multiclass BaseSIMDLd1 opcode> - : BaseSIMDLdN { - - // LD1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, - (outs !cast(veclist # "1d"):$Vt), - (ins GPR64sp:$Rn), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, - (outs GPR64sp:$wback, - !cast(veclist # "1d"):$Vt), - (ins GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; -} - -multiclass BaseSIMDSt1 opcode> - : BaseSIMDStN { - - // ST1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { - def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "1d"):$Vt, - GPR64sp:$Rn), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, - (outs GPR64sp:$wback), - (ins !cast(veclist # "1d"):$Vt, - GPR64sp:$Rn, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; -} - -multiclass SIMDLd1Multiple { - defm One : BaseSIMDLd1; - defm Two : BaseSIMDLd1; - defm Three : BaseSIMDLd1; - defm Four : BaseSIMDLd1; -} - -multiclass SIMDSt1Multiple { - defm One : BaseSIMDSt1; - defm Two : BaseSIMDSt1; - defm Three : BaseSIMDSt1; - defm Four : BaseSIMDSt1; -} - -multiclass SIMDLd2Multiple { - defm Two : BaseSIMDLdN; -} - -multiclass SIMDSt2Multiple { - defm Two : BaseSIMDStN; -} - -multiclass SIMDLd3Multiple { - defm Three : BaseSIMDLdN; -} - -multiclass SIMDSt3Multiple { - defm Three : BaseSIMDStN; -} - -multiclass SIMDLd4Multiple { - defm Four : BaseSIMDLdN; -} - -multiclass SIMDSt4Multiple { - defm Four : BaseSIMDStN; -} - -//--- -// AdvSIMD Load/store single-element -//--- - -class BaseSIMDLdStSingle opcode, - string asm, string operands, string cst, - dag oops, dag iops, list pattern> - : I { - bits<5> Vt; - bits<5> Rn; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = Rn; - let Inst{4-0} = Vt; -} - -class BaseSIMDLdStSingleTied opcode, - string asm, string operands, string cst, - dag oops, dag iops, list pattern> - : I { - bits<5> Vt; - bits<5> Rn; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = Rn; - let Inst{4-0} = Vt; -} - - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, - DAGOperand listtype> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "", - (outs listtype:$Vt), (ins GPR64sp:$Rn), - []> { - let Inst{30} = Q; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = S; - let Inst{11-10} = size; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdRPost opcode, bit S, bits<2> size, - string asm, DAGOperand listtype, DAGOperand GPR64pi> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm", - "$Rn = $wback", - (outs GPR64sp:$wback, listtype:$Vt), - (ins GPR64sp:$Rn, GPR64pi:$Xm), []> { - bits<5> Xm; - let Inst{30} = Q; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = S; - let Inst{11-10} = size; -} - -multiclass SIMDLdrAliases { - // E.g. "ld1r { v0.8b }, [x1], #1" - // "ld1r.8b\t$Vt, [$Rn], #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(BaseName # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # layout):$Vt, - XZR), 1>; - - // E.g. "ld1r.8b { v0 }, [x1], #1" - // "ld1r.8b\t$Vt, [$Rn], #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(BaseName # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # Size):$Vt, - XZR), 0>; - - // E.g. "ld1r.8b { v0 }, [x1]" - // "ld1r.8b\t$Vt, [$Rn]" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) - def : InstAlias(BaseName # "v" # layout) - !cast("VecList" # Count # Size):$Vt, - GPR64sp:$Rn), 0>; - - // E.g. "ld1r.8b { v0 }, [x1], x2" - // "ld1r.8b\t$Vt, [$Rn], $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) - def : InstAlias(BaseName # "v" # layout # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # Size):$Vt, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdR opcode, bit S, string asm, string Count, - int Offset1, int Offset2, int Offset4, int Offset8> { - def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "8b")>; - def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, - !cast("VecList" # Count #"16b")>; - def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, - !cast("VecList" # Count #"4h")>; - def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, - !cast("VecList" # Count #"8h")>; - def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, - !cast("VecList" # Count #"2s")>; - def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, - !cast("VecList" # Count #"4s")>; - def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, - !cast("VecList" # Count #"1d")>; - def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, - !cast("VecList" # Count #"2d")>; - - def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "8b"), - !cast("GPR64pi" # Offset1)>; - def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "16b"), - !cast("GPR64pi" # Offset1)>; - def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, - !cast("VecList" # Count # "4h"), - !cast("GPR64pi" # Offset2)>; - def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, - !cast("VecList" # Count # "8h"), - !cast("GPR64pi" # Offset2)>; - def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, - !cast("VecList" # Count # "2s"), - !cast("GPR64pi" # Offset4)>; - def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, - !cast("VecList" # Count # "4s"), - !cast("GPR64pi" # Offset4)>; - def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, - !cast("VecList" # Count # "1d"), - !cast("GPR64pi" # Offset8)>; - def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, - !cast("VecList" # Count # "2d"), - !cast("GPR64pi" # Offset8)>; - - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; -} - -class SIMDLdStSingleB opcode, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTied opcode, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBPost opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTiedPost opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} - -class SIMDLdStSingleH opcode, bit size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTied opcode, bit size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} - -class SIMDLdStSingleHPost opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTiedPost opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleS opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTied opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTiedPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleD opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTied opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTiedPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleBTied opcode, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - GPR64sp:$Rn), []>; - - def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, - (outs GPR64sp:$wback, listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleHTied opcode, bit size, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - GPR64sp:$Rn), []>; - - def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, - (outs GPR64sp:$wback, listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - GPR64sp:$Rn), []>; - - def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, - (outs GPR64sp:$wback, listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - GPR64sp:$Rn), []>; - - def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, - (outs GPR64sp:$wback, listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleB opcode, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleB<0, R, opcode, asm, - (outs), (ins listtype:$Vt, VectorIndexB:$idx, - GPR64sp:$Rn), []>; - - def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, - (outs GPR64sp:$wback), - (ins listtype:$Vt, VectorIndexB:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleH opcode, bit size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexH:$idx, - GPR64sp:$Rn), []>; - - def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, - (outs GPR64sp:$wback), - (ins listtype:$Vt, VectorIndexH:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleS opcode, bits<2> size,string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexS:$idx, - GPR64sp:$Rn), []>; - - def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, - (outs GPR64sp:$wback), - (ins listtype:$Vt, VectorIndexS:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleD opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexD:$idx, - GPR64sp:$Rn), []>; - - def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, - (outs GPR64sp:$wback), - (ins listtype:$Vt, VectorIndexD:$idx, - GPR64sp:$Rn, GPR64pi:$Xm)>; -} - -multiclass SIMDLdStSingleAliases { - // E.g. "ld1 { v0.8b }[0], [x1], #1" - // "ld1\t$Vt, [$Rn], #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(NAME # Type # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # layout):$Vt, - idxtype:$idx, XZR), 1>; - - // E.g. "ld1.8b { v0 }[0], [x1], #1" - // "ld1.8b\t$Vt, [$Rn], #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) - def : InstAlias(NAME # Type # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, XZR), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1]" - // "ld1.8b\t$Vt, [$Rn]" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) - def : InstAlias(NAME # Type) - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, GPR64sp:$Rn), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1], x2" - // "ld1.8b\t$Vt, [$Rn], $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) - def : InstAlias(NAME # Type # "_POST") - GPR64sp:$Rn, - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdSt1SingleAliases { - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt2SingleAliases { - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt3SingleAliases { - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt4SingleAliases { - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; - defm "" : SIMDLdStSingleAliases; -} -} // end of 'let Predicates = [HasNEON]' - -//---------------------------------------------------------------------------- -// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract -//---------------------------------------------------------------------------- - -let Predicates = [HasNEON, HasRDM] in { - -class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, - RegisterOperand regtype, string asm, - string kind, list pattern> - : BaseSIMDThreeSameVectorTied { -} -multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, - SDPatternOperator op> { - def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", - [(set (v4i16 V64:$dst), - (v4i16 (op (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", - [(set (v8i16 V128:$dst), - (v8i16 (op (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", - [(set (v2i32 V64:$dst), - (v2i32 (op (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", - [(set (v4i32 V128:$dst), - (v4i32 (op (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; -} - -multiclass SIMDIndexedSQRDMLxHSDTied opc, string asm, - SDPatternOperator op> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V64, V64, V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$dst), - (v4i16 (op (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx)))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$dst), - (v8i16 (op (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx)))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V64, V64, V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$dst), - (v2i32 (op (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx)))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$dst), - (v4i32 (op (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx)))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, - FPR16Op, FPR16Op, V128_lo, - VectorIndexH, asm, ".h", "", "", ".h", - []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i32 FPR32Op:$dst), - (i32 (op (i32 FPR32Op:$Rd), (i32 FPR32Op:$Rn), - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx)))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} -} // let Predicates = [HasNeon, HasRDM] - -//---------------------------------------------------------------------------- -// ARMv8.3 Complex ADD/MLA instructions -//---------------------------------------------------------------------------- - -class ComplexRotationOperand - : AsmOperandClass { - let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; - let DiagnosticType = "InvalidComplexRotation" # Type; - let Name = "ComplexRotation" # Type; -} -def complexrotateop : Operand, TImmLeaf= 0 && Imm <= 270; }], - SDNodeXFormgetTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32); -}]>> { - let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; - let PrintMethod = "printComplexRotationOp<90, 0>"; -} -def complexrotateopodd : Operand, TImmLeaf= 0 && Imm <= 270; }], - SDNodeXFormgetTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32); -}]>> { - let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; - let PrintMethod = "printComplexRotationOp<180, 90>"; -} -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorComplex size, bits<3> opcode, - RegisterOperand regtype, Operand rottype, - string asm, string kind, list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" - "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<1> rot; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15-13} = opcode; - // Non-tied version (FCADD) only has one rotation bit - let Inst{12} = rot; - let Inst{11} = 0; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -//8.3 CompNum - Floating-point complex number support -multiclass SIMDThreeSameVectorComplexHSD opcode, Operand rottype, - string asm, SDPatternOperator OpNode>{ - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype, - asm, ".4h", - [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), - (v4f16 V64:$Rn), - (v4f16 V64:$Rm), - (i32 rottype:$rot)))]>; - - def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype, - asm, ".8h", - [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), - (v8f16 V128:$Rn), - (v8f16 V128:$Rm), - (i32 rottype:$rot)))]>; - } - - let Predicates = [HasComplxNum, HasNEON] in { - def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype, - asm, ".2s", - [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), - (v2f32 V64:$Rn), - (v2f32 V64:$Rm), - (i32 rottype:$rot)))]>; - - def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype, - asm, ".4s", - [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), - (v4f32 V128:$Rn), - (v4f32 V128:$Rm), - (i32 rottype:$rot)))]>; - - def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype, - asm, ".2d", - [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), - (v2f64 V128:$Rn), - (v2f64 V128:$Rm), - (i32 rottype:$rot)))]>; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTiedComplex size, - bits<3> opcode, - RegisterOperand regtype, - Operand rottype, string asm, - string kind, list pattern> - : I<(outs regtype:$dst), - (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" - "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<2> rot; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15-13} = opcode; - let Inst{12-11} = rot; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDThreeSameVectorTiedComplexHSD opcode, - Operand rottype, string asm, - SDPatternOperator OpNode> { - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { - def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64, - rottype, asm, ".4h", - [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), - (v4f16 V64:$Rn), - (v4f16 V64:$Rm), - (i32 rottype:$rot)))]>; - - def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128, - rottype, asm, ".8h", - [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), - (v8f16 V128:$Rn), - (v8f16 V128:$Rm), - (i32 rottype:$rot)))]>; - } - - let Predicates = [HasComplxNum, HasNEON] in { - def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64, - rottype, asm, ".2s", - [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), - (v2f32 V64:$Rn), - (v2f32 V64:$Rm), - (i32 rottype:$rot)))]>; - - def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128, - rottype, asm, ".4s", - [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), - (v4f32 V128:$Rn), - (v4f32 V128:$Rm), - (i32 rottype:$rot)))]>; - - def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128, - rottype, asm, ".2d", - [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), - (v2f64 V128:$Rn), - (v2f64 V128:$Rm), - (i32 rottype:$rot)))]>; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexedTiedComplex size, - bit opc1, bit opc2, RegisterOperand dst_reg, - RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, - Operand rottype, string asm, string apple_kind, - string dst_kind, string lhs_kind, - string rhs_kind, list pattern> - : I<(outs dst_reg:$dst), - (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot), - asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # - "$idx, $rot" # "|" # apple_kind # - "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>, - Sched<[!if(Q, WriteVq, WriteVd)]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<2> rot; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15} = opc1; - let Inst{14-13} = rot; - let Inst{12} = opc2; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// The complex instructions index by pairs of elements, so the VectorIndexes -// don't match the lane types, and the index bits are different to the other -// classes. -multiclass SIMDIndexedTiedComplexHSD { - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { - def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64, - V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h", - ".4h", ".h", []> { - bits<1> idx; - let Inst{11} = 0; - let Inst{21} = idx{0}; - } - - def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2, - V128, V128, V128, VectorIndexS, rottype, asm, ".8h", - ".8h", ".8h", ".h", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } // Predicates = HasComplxNum, HasNEON, HasFullFP16] - - let Predicates = [HasComplxNum, HasNEON] in { - def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2, - V128, V128, V128, VectorIndexD, rottype, asm, ".4s", - ".4s", ".4s", ".s", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - } // Predicates = [HasComplxNum, HasNEON] -} - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class AESBase opc, string asm, dag outs, dag ins, string cstr, - list pat> - : I, - Sched<[WriteVq]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0100111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class AESInst opc, string asm, Intrinsic OpNode> - : AESBase; - -class AESTiedInst opc, string asm, Intrinsic OpNode> - : AESBase; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA3OpTiedInst opc, string asm, string dst_lhs_kind, - dag oops, dag iops, list pat> - : I, - Sched<[WriteVq]>{ - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-21} = 0b01011110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstQSV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -class SHATiedInstVVV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -class SHATiedInstQQV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA2OpInst opc, string asm, string kind, - string cstr, dag oops, dag iops, - list pat> - : I, - Sched<[WriteVq]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0101111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstVV opc, string asm, Intrinsic OpNode> - : SHA2OpInst; - -class SHAInstSS opc, string asm, Intrinsic OpNode> - : SHA2OpInst; - -// Armv8.2-A Crypto extensions -class BaseCryptoV82 pattern> - : I , Sched<[WriteVq]> { - bits<5> Vd; - bits<5> Vn; - let Inst{31-25} = 0b1100111; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -class CryptoRRTiedop0, bits<2>op1, string asm, string asmops> - : BaseCryptoV82<(outs V128:$Vdst), (ins V128:$Vd, V128:$Vn), asm, asmops, - "$Vd = $Vdst", []> { - let Inst{31-25} = 0b1100111; - let Inst{24-21} = 0b0110; - let Inst{20-15} = 0b000001; - let Inst{14} = op0; - let Inst{13-12} = 0b00; - let Inst{11-10} = op1; -} -class CryptoRRTied_2Dop0, bits<2>op1, string asm> - : CryptoRRTied; -class CryptoRRTied_4Sop0, bits<2>op1, string asm> - : CryptoRRTied; - -class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, - string asmops, string cst> - : BaseCryptoV82 { - bits<5> Vm; - let Inst{24-21} = 0b0011; - let Inst{20-16} = Vm; - let Inst{15} = 0b1; - let Inst{14} = op0; - let Inst{13-12} = 0b00; - let Inst{11-10} = op1; -} -class CryptoRRR_2D op0, bits<2>op1, string asm> - : CryptoRRR; -class CryptoRRRTied_2D op0, bits<2>op1, string asm> - : CryptoRRR; -class CryptoRRR_4S op0, bits<2>op1, string asm> - : CryptoRRR; -class CryptoRRRTied_4S op0, bits<2>op1, string asm> - : CryptoRRR; -class CryptoRRRTied op0, bits<2>op1, string asm> - : CryptoRRR; - -class CryptoRRRRop0, string asm, string asmops> - : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm, - asmops, "", []> { - bits<5> Vm; - bits<5> Va; - let Inst{24-23} = 0b00; - let Inst{22-21} = op0; - let Inst{20-16} = Vm; - let Inst{15} = 0b0; - let Inst{14-10} = Va; -} -class CryptoRRRR_16Bop0, string asm> - : CryptoRRRR { -} -class CryptoRRRR_4Sop0, string asm> - : CryptoRRRR { -} - -class CryptoRRRi6 - : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm, - "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm" # - "|.2d\t$Vd, $Vn, $Vm, $imm}", "", []> { - bits<6> imm; - bits<5> Vm; - let Inst{24-21} = 0b0100; - let Inst{20-16} = Vm; - let Inst{15-10} = imm; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -class CryptoRRRi2Tiedop0, bits<2>op1, string asm> - : BaseCryptoV82<(outs V128:$Vdst), - (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm), - asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm" # - "|.4s\t$Vd, $Vn, $Vm$imm}", "$Vd = $Vdst", []> { - bits<2> imm; - bits<5> Vm; - let Inst{24-21} = 0b0010; - let Inst{20-16} = Vm; - let Inst{15} = 0b1; - let Inst{14} = op0; - let Inst{13-12} = imm; - let Inst{11-10} = op1; -} - -//---------------------------------------------------------------------------- -// v8.1 atomic instructions extension: -// * CAS -// * CASP -// * SWP -// * LDOPregister, and aliases STOPregister - -// Instruction encodings: -// -// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 -// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt -// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt -// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt -// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt -// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 - -// Instruction syntax: -// -// CAS{}[] , , [] -// CAS{} , , [] -// CASP{} , , , , [] -// CASP{} , , , , [] -// SWP{}[] , , [] -// SWP{} , , [] -// LD{}[] , , [] -// LD{} , , [] -// ST{}[] , [] -// ST{} , [] - -let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in -class BaseCASEncoding pattern> - : I { - bits<2> Sz; - bit NP; - bit Acq; - bit Rel; - bits<5> Rs; - bits<5> Rn; - bits<5> Rt; - let Inst{31-30} = Sz; - let Inst{29-24} = 0b001000; - let Inst{23} = NP; - let Inst{22} = Acq; - let Inst{21} = 0b1; - let Inst{20-16} = Rs; - let Inst{15} = Rel; - let Inst{14-10} = 0b11111; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - let Predicates = [HasLSE]; -} - -class BaseCAS - : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), - "cas" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]>, - Sched<[WriteAtomic]> { - let NP = 1; -} - -multiclass CompareAndSwap Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS; - let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS; - let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS; - let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS; -} - -class BaseCASP - : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), - "casp" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]>, - Sched<[WriteAtomic]> { - let NP = 0; -} - -multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in - def W : BaseCASP; - let Sz = 0b01, Acq = Acq, Rel = Rel in - def X : BaseCASP; -} - -let Predicates = [HasLSE] in -class BaseSWP - : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, - "\t$Rs, $Rt, [$Rn]","",[]>, - Sched<[WriteAtomic]> { - bits<2> Sz; - bit Acq; - bit Rel; - bits<5> Rs; - bits<3> opc = 0b000; - bits<5> Rn; - bits<5> Rt; - let Inst{31-30} = Sz; - let Inst{29-24} = 0b111000; - let Inst{23} = Acq; - let Inst{22} = Rel; - let Inst{21} = 0b1; - let Inst{20-16} = Rs; - let Inst{15} = 0b1; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - let Predicates = [HasLSE]; -} - -multiclass Swap Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP; - let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP; - let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP; - let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP; -} - -let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in -class BaseLDOPregister - : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, - "\t$Rs, $Rt, [$Rn]","",[]>, - Sched<[WriteAtomic]> { - bits<2> Sz; - bit Acq; - bit Rel; - bits<5> Rs; - bits<3> opc; - bits<5> Rn; - bits<5> Rt; - let Inst{31-30} = Sz; - let Inst{29-24} = 0b111000; - let Inst{23} = Acq; - let Inst{22} = Rel; - let Inst{21} = 0b1; - let Inst{20-16} = Rs; - let Inst{15} = 0b0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - let Predicates = [HasLSE]; -} - -multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, - string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in - def B : BaseLDOPregister; - let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in - def H : BaseLDOPregister; - let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in - def W : BaseLDOPregister; - let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in - def X : BaseLDOPregister; -} - -// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more -// complex DAG for DstRHS. -let Predicates = [HasLSE] in -multiclass LDOPregister_patterns_ord_dag { - def : Pat<(!cast(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS), - (!cast(inst # suffix) DstRHS, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS), - (!cast(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS), - (!cast(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS), - (!cast(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS), - (!cast(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; -} - -multiclass LDOPregister_patterns_ord { - defm : LDOPregister_patterns_ord_dag; -} - -multiclass LDOPregister_patterns_ord_mod { - defm : LDOPregister_patterns_ord_dag; -} - -multiclass LDOPregister_patterns { - defm : LDOPregister_patterns_ord; - defm : LDOPregister_patterns_ord; - defm : LDOPregister_patterns_ord; - defm : LDOPregister_patterns_ord; -} - -multiclass LDOPregister_patterns_mod { - defm : LDOPregister_patterns_ord_mod(mod#Xrr) XZR, GPR64:$Rm))>; - defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; - defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; - defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; -} - -let Predicates = [HasLSE] in -multiclass CASregister_patterns_ord_dag { - def : Pat<(!cast(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW), - (!cast(inst # suffix) OLD, NEW, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW), - (!cast(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW), - (!cast(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW), - (!cast(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; - def : Pat<(!cast(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW), - (!cast(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; -} - -multiclass CASregister_patterns_ord { - defm : CASregister_patterns_ord_dag; -} - -multiclass CASregister_patterns { - defm : CASregister_patterns_ord; - defm : CASregister_patterns_ord; - defm : CASregister_patterns_ord; - defm : CASregister_patterns_ord; -} - -let Predicates = [HasLSE] in -class BaseSTOPregister : - InstAlias; - -multiclass STOPregister { - def : BaseSTOPregister(instr # "LB")>; - def : BaseSTOPregister(instr # "LH")>; - def : BaseSTOPregister(instr # "LW")>; - def : BaseSTOPregister(instr # "LX")>; - def : BaseSTOPregister(instr # "B")>; - def : BaseSTOPregister(instr # "H")>; - def : BaseSTOPregister(instr # "W")>; - def : BaseSTOPregister(instr # "X")>; -} - -class LoadStore64B_base opc, string asm_inst, string asm_ops, - dag iops, dag oops, list pat> - : I, - Sched<[]> /* FIXME: fill in scheduling details once known */ { - bits<5> Rt; - bits<5> Rn; - let Inst{31-21} = 0b11111000001; - let Inst{15} = 1; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rt; - - let Predicates = [HasV8_7a]; -} - -class LoadStore64B opc, string asm_inst, dag iops, dag oops, - list pat = []> - : LoadStore64B_base { - let Inst{20-16} = 0b11111; -} - -class Store64BV opc, string asm_inst, list pat = []> - : LoadStore64B_base { - bits<5> Rs; - let Inst{20-16} = Rs; -} - -class MOPSMemoryCopyMoveBase opcode, bits<2> op1, - bits<2> op2, string asm> - : I<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), - asm, "\t[$Rd]!, [$Rs]!, $Rn!", - "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb", []>, - Sched<[]> { - bits<5> Rd; - bits<5> Rs; - bits<5> Rn; - let Inst{31-27} = 0b00011; - let Inst{26} = isMove; - let Inst{25-24} = 0b01; - let Inst{23-22} = opcode; - let Inst{21} = 0b0; - let Inst{20-16} = Rs; - let Inst{15-14} = op2; - let Inst{13-12} = op1; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeCPYMemOpInstruction"; - let mayLoad = 1; - let mayStore = 1; -} - -class MOPSMemoryCopy opcode, bits<2> op1, bits<2> op2, string asm> - : MOPSMemoryCopyMoveBase<0, opcode, op1, op2, asm>; - -class MOPSMemoryMove opcode, bits<2> op1, bits<2> op2, string asm> - : MOPSMemoryCopyMoveBase<1, opcode, op1, op2, asm>; - -class MOPSMemorySetBase opcode, bit op1, bit op2, - string asm> - : I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), - asm, "\t[$Rd]!, $Rn!, $Rm", - "$Rd = $Rd_wb,$Rn = $Rn_wb", []>, - Sched<[]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-27} = 0b00011; - let Inst{26} = isTagging; - let Inst{25-21} = 0b01110; - let Inst{20-16} = Rm; - let Inst{15-14} = opcode; - let Inst{13} = op2; - let Inst{12} = op1; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeSETMemOpInstruction"; - let mayLoad = 0; - let mayStore = 1; -} - -class MOPSMemorySet opcode, bit op1, bit op2, string asm> - : MOPSMemorySetBase<0, opcode, op1, op2, asm>; - -class MOPSMemorySetTagging opcode, bit op1, bit op2, string asm> - : MOPSMemorySetBase<1, opcode, op1, op2, asm>; - -multiclass MOPSMemoryCopyInsns opcode, string asm> { - def "" : MOPSMemoryCopy; - def WN : MOPSMemoryCopy; - def RN : MOPSMemoryCopy; - def N : MOPSMemoryCopy; - def WT : MOPSMemoryCopy; - def WTWN : MOPSMemoryCopy; - def WTRN : MOPSMemoryCopy; - def WTN : MOPSMemoryCopy; - def RT : MOPSMemoryCopy; - def RTWN : MOPSMemoryCopy; - def RTRN : MOPSMemoryCopy; - def RTN : MOPSMemoryCopy; - def T : MOPSMemoryCopy; - def TWN : MOPSMemoryCopy; - def TRN : MOPSMemoryCopy; - def TN : MOPSMemoryCopy; -} - -multiclass MOPSMemoryMoveInsns opcode, string asm> { - def "" : MOPSMemoryMove; - def WN : MOPSMemoryMove; - def RN : MOPSMemoryMove; - def N : MOPSMemoryMove; - def WT : MOPSMemoryMove; - def WTWN : MOPSMemoryMove; - def WTRN : MOPSMemoryMove; - def WTN : MOPSMemoryMove; - def RT : MOPSMemoryMove; - def RTWN : MOPSMemoryMove; - def RTRN : MOPSMemoryMove; - def RTN : MOPSMemoryMove; - def T : MOPSMemoryMove; - def TWN : MOPSMemoryMove; - def TRN : MOPSMemoryMove; - def TN : MOPSMemoryMove; -} - -multiclass MOPSMemorySetInsns opcode, string asm> { - def "" : MOPSMemorySet; - def T : MOPSMemorySet; - def N : MOPSMemorySet; - def TN : MOPSMemorySet; -} - -multiclass MOPSMemorySetTaggingInsns opcode, string asm> { - def "" : MOPSMemorySetTagging; - def T : MOPSMemorySetTagging; - def N : MOPSMemorySetTagging; - def TN : MOPSMemorySetTagging; -} - -//---------------------------------------------------------------------------- -// Allow the size specifier tokens to be upper case, not just lower. -def : TokenAlias<".4B", ".4b">; // Add dot product -def : TokenAlias<".8B", ".8b">; -def : TokenAlias<".4H", ".4h">; -def : TokenAlias<".2S", ".2s">; -def : TokenAlias<".1D", ".1d">; -def : TokenAlias<".16B", ".16b">; -def : TokenAlias<".8H", ".8h">; -def : TokenAlias<".4S", ".4s">; -def : TokenAlias<".2D", ".2d">; -def : TokenAlias<".1Q", ".1q">; -def : TokenAlias<".2H", ".2h">; -def : TokenAlias<".B", ".b">; -def : TokenAlias<".H", ".h">; -def : TokenAlias<".S", ".s">; -def : TokenAlias<".D", ".d">; -def : TokenAlias<".Q", ".q">; diff --git a/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td b/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td deleted file mode 100644 index 58b6dcadfc..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td +++ /dev/null @@ -1,287 +0,0 @@ -//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 GlobalISel target pseudo instruction definitions. This is kept -// separately from the other tablegen files for organizational purposes, but -// share the same infrastructure. -// -//===----------------------------------------------------------------------===// - - -class AArch64GenericInstruction : GenericInstruction { - let Namespace = "AArch64"; -} - -// A pseudo to represent a relocatable add instruction as part of address -// computation. -def G_ADD_LOW : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$src, type2:$imm); - let hasSideEffects = 0; -} - -// Pseudo for a rev16 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_REV16 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -// Pseudo for a rev32 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_REV32 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -// Pseudo for a rev64 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_REV64 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -// Represents an uzp1 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_UZP1 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents an uzp2 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_UZP2 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents a zip1 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_ZIP1 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents a zip2 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_ZIP2 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents a dup instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_DUP: AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$lane); - let hasSideEffects = 0; -} - -// Represents a lane duplicate operation. -def G_DUPLANE8 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src, type1:$lane); - let hasSideEffects = 0; -} -def G_DUPLANE16 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src, type1:$lane); - let hasSideEffects = 0; -} -def G_DUPLANE32 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src, type1:$lane); - let hasSideEffects = 0; -} -def G_DUPLANE64 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src, type1:$lane); - let hasSideEffects = 0; -} - -// Represents a trn1 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_TRN1 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents a trn2 instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_TRN2 : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2); - let hasSideEffects = 0; -} - -// Represents an ext instruction. Produced post-legalization from -// G_SHUFFLE_VECTORs with appropriate masks. -def G_EXT: AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm); - let hasSideEffects = 0; -} - -// Represents a vector G_ASHR with an immediate. -def G_VASHR : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); - let hasSideEffects = 0; -} - -// Represents a vector G_LSHR with an immediate. -def G_VLSHR : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); - let hasSideEffects = 0; -} - -// Represents an integer to FP conversion on the FPR bank. -def G_SITOF : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} -def G_UITOF : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def G_FCMEQ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; -} - -def G_FCMGE : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; -} - -def G_FCMGT : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; -} - -def G_FCMEQZ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def G_FCMGEZ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def G_FCMGTZ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def G_FCMLEZ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def G_FCMLTZ : AArch64GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); - let hasSideEffects = 0; -} - -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; - -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; - -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; - -def : GINodeEquiv; - -// These are patterns that we only use for GlobalISel via the importer. -def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)), - (vector_extract (v2f32 FPR64:$Rn), (i64 1)))), - (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>; - -let Predicates = [HasNEON] in { - def : Pat<(v2f64 (sint_to_fp v2i32:$src)), - (SCVTFv2f64 (SSHLLv2i32_shift V64:$src, 0))>; - def : Pat<(v2f64 (uint_to_fp v2i32:$src)), - (UCVTFv2f64 (USHLLv2i32_shift V64:$src, 0))>; - def : Pat<(v2f32 (sint_to_fp v2i64:$src)), - (FCVTNv2i32 (SCVTFv2f64 V128:$src))>; - def : Pat<(v2f32 (uint_to_fp v2i64:$src)), - (FCVTNv2i32 (UCVTFv2f64 V128:$src))>; - - def : Pat<(v2i64 (fp_to_sint v2f32:$src)), - (FCVTZSv2f64 (FCVTLv2i32 V64:$src))>; - def : Pat<(v2i64 (fp_to_uint v2f32:$src)), - (FCVTZUv2f64 (FCVTLv2i32 V64:$src))>; - def : Pat<(v2i32 (fp_to_sint v2f64:$src)), - (XTNv2i32 (FCVTZSv2f64 V128:$src))>; - def : Pat<(v2i32 (fp_to_uint v2f64:$src)), - (XTNv2i32 (FCVTZUv2f64 V128:$src))>; - -} - -let Predicates = [HasNoLSE] in { -def : Pat<(atomic_cmp_swap_8 GPR64:$addr, GPR32:$desired, GPR32:$new), - (CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>; - -def : Pat<(atomic_cmp_swap_16 GPR64:$addr, GPR32:$desired, GPR32:$new), - (CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>; - -def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new), - (CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>; - -def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new), - (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>; -} - -def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr), - (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; -def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr), - (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; diff --git a/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td b/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td deleted file mode 100644 index 2680b5ac09..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td +++ /dev/null @@ -1,8425 +0,0 @@ -//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Instruction definitions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM Instruction Predicate Definitions. -// -def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, - AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">; -def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, - AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">; -def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, - AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">; -def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, - AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">; -def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, - AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">; -def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, - AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">; -def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, - AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">; -def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, - AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">; -def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, - AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">; -def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, - AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">; -def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, - AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">; -def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, - AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">; - -def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, - AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">; - -def HasEL3 : Predicate<"Subtarget->hasEL3()">, - AssemblerPredicate<(all_of FeatureEL3), "el3">; - -def HasVH : Predicate<"Subtarget->hasVH()">, - AssemblerPredicate<(all_of FeatureVH), "vh">; - -def HasLOR : Predicate<"Subtarget->hasLOR()">, - AssemblerPredicate<(all_of FeatureLOR), "lor">; - -def HasPAuth : Predicate<"Subtarget->hasPAuth()">, - AssemblerPredicate<(all_of FeaturePAuth), "pauth">; - -def HasJS : Predicate<"Subtarget->hasJS()">, - AssemblerPredicate<(all_of FeatureJS), "jsconv">; - -def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, - AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">; - -def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, - AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">; - -def HasNV : Predicate<"Subtarget->hasNV()">, - AssemblerPredicate<(all_of FeatureNV), "nv">; - -def HasMPAM : Predicate<"Subtarget->hasMPAM()">, - AssemblerPredicate<(all_of FeatureMPAM), "mpam">; - -def HasDIT : Predicate<"Subtarget->hasDIT()">, - AssemblerPredicate<(all_of FeatureDIT), "dit">; - -def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, - AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">; - -def HasAM : Predicate<"Subtarget->hasAM()">, - AssemblerPredicate<(all_of FeatureAM), "am">; - -def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, - AssemblerPredicate<(all_of FeatureSEL2), "sel2">; - -def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, - AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">; - -def HasFlagM : Predicate<"Subtarget->hasFlagM()">, - AssemblerPredicate<(all_of FeatureFlagM), "flagm">; - -def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, - AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">; - -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">; -def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<(all_of FeatureNEON), "neon">; -def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<(all_of FeatureCrypto), "crypto">; -def HasSM4 : Predicate<"Subtarget->hasSM4()">, - AssemblerPredicate<(all_of FeatureSM4), "sm4">; -def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, - AssemblerPredicate<(all_of FeatureSHA3), "sha3">; -def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, - AssemblerPredicate<(all_of FeatureSHA2), "sha2">; -def HasAES : Predicate<"Subtarget->hasAES()">, - AssemblerPredicate<(all_of FeatureAES), "aes">; -def HasDotProd : Predicate<"Subtarget->hasDotProd()">, - AssemblerPredicate<(all_of FeatureDotProd), "dotprod">; -def HasCRC : Predicate<"Subtarget->hasCRC()">, - AssemblerPredicate<(all_of FeatureCRC), "crc">; -def HasLSE : Predicate<"Subtarget->hasLSE()">, - AssemblerPredicate<(all_of FeatureLSE), "lse">; -def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; -def HasRAS : Predicate<"Subtarget->hasRAS()">, - AssemblerPredicate<(all_of FeatureRAS), "ras">; -def HasRDM : Predicate<"Subtarget->hasRDM()">, - AssemblerPredicate<(all_of FeatureRDM), "rdm">; -def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; -def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, - AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">; -def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, - AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">; -def HasSPE : Predicate<"Subtarget->hasSPE()">, - AssemblerPredicate<(all_of FeatureSPE), "spe">; -def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, - AssemblerPredicate<(all_of FeatureFuseAES), - "fuse-aes">; -def HasSVE : Predicate<"Subtarget->hasSVE()">, - AssemblerPredicate<(all_of FeatureSVE), "sve">; -def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, - AssemblerPredicate<(all_of FeatureSVE2), "sve2">; -def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, - AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">; -def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, - AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">; -def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, - AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">; -def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; -def HasSME : Predicate<"Subtarget->hasSME()">, - AssemblerPredicate<(all_of FeatureSME), "sme">; -def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">, - AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">; -def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">, - AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">; -def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(all_of FeatureStreamingSVE), "sme">; -// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, -// they should be enabled if either has been specified. -def HasSVEorStreamingSVE - : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE), - "sve or sme">; -def HasSVE2orStreamingSVE - : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE), - "sve2 or sme">; -// A subset of NEON instructions are legal in Streaming SVE execution mode, -// they should be enabled if either has been specified. -def HasNEONorStreamingSVE - : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE), - "neon or sme">; -def HasRCPC : Predicate<"Subtarget->hasRCPC()">, - AssemblerPredicate<(all_of FeatureRCPC), "rcpc">; -def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, - AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">; -def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, - AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">; -def HasSB : Predicate<"Subtarget->hasSB()">, - AssemblerPredicate<(all_of FeatureSB), "sb">; -def HasPredRes : Predicate<"Subtarget->hasPredRes()">, - AssemblerPredicate<(all_of FeaturePredRes), "predres">; -def HasCCDP : Predicate<"Subtarget->hasCCDP()">, - AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">; -def HasBTI : Predicate<"Subtarget->hasBTI()">, - AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">; -def HasMTE : Predicate<"Subtarget->hasMTE()">, - AssemblerPredicate<(all_of FeatureMTE), "mte">; -def HasTME : Predicate<"Subtarget->hasTME()">, - AssemblerPredicate<(all_of FeatureTME), "tme">; -def HasETE : Predicate<"Subtarget->hasETE()">, - AssemblerPredicate<(all_of FeatureETE), "ete">; -def HasTRBE : Predicate<"Subtarget->hasTRBE()">, - AssemblerPredicate<(all_of FeatureTRBE), "trbe">; -def HasBF16 : Predicate<"Subtarget->hasBF16()">, - AssemblerPredicate<(all_of FeatureBF16), "bf16">; -def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, - AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">; -def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, - AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">; -def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, - AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">; -def HasXS : Predicate<"Subtarget->hasXS()">, - AssemblerPredicate<(all_of FeatureXS), "xs">; -def HasWFxT : Predicate<"Subtarget->hasWFxT()">, - AssemblerPredicate<(all_of FeatureWFxT), "wfxt">; -def HasLS64 : Predicate<"Subtarget->hasLS64()">, - AssemblerPredicate<(all_of FeatureLS64), "ls64">; -def HasBRBE : Predicate<"Subtarget->hasBRBE()">, - AssemblerPredicate<(all_of FeatureBRBE), "brbe">; -def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, - AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">; -def HasHBC : Predicate<"Subtarget->hasHBC()">, - AssemblerPredicate<(all_of FeatureHBC), "hbc">; -def HasMOPS : Predicate<"Subtarget->hasMOPS()">, - AssemblerPredicate<(all_of FeatureMOPS), "mops">; -def IsLE : Predicate<"Subtarget->isLittleEndian()">; -def IsBE : Predicate<"!Subtarget->isLittleEndian()">; -def IsWindows : Predicate<"Subtarget->isTargetWindows()">; -def UseExperimentalZeroingPseudos - : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; -def UseAlternateSExtLoadCVTF32 - : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; - -def UseNegativeImmediates - : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), - "NegativeImmediates">; - -def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; - -def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", - SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, - SDTCisInt<1>]>>; - - -//===----------------------------------------------------------------------===// -// AArch64-specific DAG Nodes. -// - -// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS -def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; - -// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<0>, - SDTCisVT<3, i32>]>; - -// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, - SDTCisVT<1, i32>, - SDTCisVT<4, i32>]>; - -def SDT_AArch64Brcond : SDTypeProfile<0, 3, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; -def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; -def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, OtherVT>]>; - - -def SDT_AArch64CSel : SDTypeProfile<1, 4, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<3>, - SDTCisVT<4, i32>]>; -def SDT_AArch64CCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, - SDTCisInt<1>, - SDTCisSameAs<1, 2>, - SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; -def SDT_AArch64FCCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, - SDTCisFP<1>, - SDTCisSameAs<1, 2>, - SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; -def SDT_AArch64FCmp : SDTypeProfile<0, 2, - [SDTCisFP<0>, - SDTCisSameAs<0, 1>]>; -def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; -def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; -def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; -def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; -def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisInt<2>, SDTCisInt<3>]>; -def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; -def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisVec<2>, SDTCisSameAs<2,3>]>; - -def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>; - -def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; -def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; -def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>; -def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>; -def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; -def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; - -def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; - -def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, - SDTCisPtrTy<1>]>; - -def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; - -def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; - -// Generates the general dynamic sequences, i.e. -// adrp x0, :tlsdesc:var -// ldr x1, [x0, #:tlsdesc_lo12:var] -// add x0, x0, #:tlsdesc_lo12:var -// .tlsdesccall var -// blr x1 - -// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) -// number of operands (the variable) -def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, - [SDTCisPtrTy<0>]>; - -def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, - SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, - SDTCisSameAs<1, 4>]>; - -def SDT_AArch64TBL : SDTypeProfile<1, 2, [ - SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> -]>; - -// non-extending masked load fragment. -def nonext_masked_load : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && - cast(N)->isUnindexed() && - !cast(N)->isNonTemporal(); -}]>; -// sign extending masked load fragments. -def asext_masked_load : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ - return (cast(N)->getExtensionType() == ISD::EXTLOAD || - cast(N)->getExtensionType() == ISD::SEXTLOAD) && - cast(N)->isUnindexed(); -}]>; -def asext_masked_load_i8 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def asext_masked_load_i16 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def asext_masked_load_i32 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; -// zero extending masked load fragments. -def zext_masked_load : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::ZEXTLOAD && - cast(N)->isUnindexed(); -}]>; -def zext_masked_load_i8 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def zext_masked_load_i16 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def zext_masked_load_i32 : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; - -def non_temporal_load : - PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && - cast(N)->isUnindexed() && - cast(N)->isNonTemporal(); -}]>; - -// non-truncating masked store fragment. -def nontrunc_masked_store : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ - return !cast(N)->isTruncatingStore() && - cast(N)->isUnindexed() && - !cast(N)->isNonTemporal(); -}]>; -// truncating masked store fragments. -def trunc_masked_store : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ - return cast(N)->isTruncatingStore() && - cast(N)->isUnindexed(); -}]>; -def trunc_masked_store_i8 : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def trunc_masked_store_i16 : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def trunc_masked_store_i32 : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; - -def non_temporal_store : - PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ - return !cast(N)->isTruncatingStore() && - cast(N)->isUnindexed() && - cast(N)->isNonTemporal(); -}]>; - -// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise -def top16Zero: PatLeaf<(i32 GPR32:$src), [{ - return SDValue(N,0)->getValueType(0) == MVT::i32 && - CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); - }]>; - -// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise -def top32Zero: PatLeaf<(i64 GPR64:$src), [{ - return SDValue(N,0)->getValueType(0) == MVT::i64 && - CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); - }]>; - -// Node definitions. -def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; -def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; -def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; -def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", - SDCallSeqStart<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>, - [SDNPHasChain, SDNPOutGlue]>; -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", - SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64call : SDNode<"AArch64ISD::CALL", - SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", - SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", - SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, - [SDNPHasChain]>; -def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, - [SDNPHasChain]>; -def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, - [SDNPHasChain]>; -def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, - [SDNPHasChain]>; -def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, - [SDNPHasChain]>; - - -def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; -def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; -def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; -def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; -def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; -def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; -def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, - [SDNPCommutative]>; -def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; -def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, - [SDNPCommutative]>; -def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; -def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; - -def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; -def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; -def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; - -def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; - -def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; -def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, - [SDNPHasChain]>; -def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, - [SDNPHasChain]>; -def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), - [(AArch64strict_fcmp node:$lhs, node:$rhs), - (AArch64fcmp node:$lhs, node:$rhs)]>; - -def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; -def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; -def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; -def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; -def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; - -def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; - -def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; -def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; -def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; -def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; -def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; -def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; - -def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; -def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; -def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; -def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; -def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; -def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; -def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; - -def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; -def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; -def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; -def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; - -def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; -def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; -def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; -def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; -def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; -def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; -def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; -def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; -def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; -def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; - -def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; -def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; - -def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; -def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; -def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; -def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; -def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; - -def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; -def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; -def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; - -def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; -def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; -def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; -def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; -def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; -def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), - (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; - -def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; -def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; -def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; -def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; -def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; - -def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; -def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; - -def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, - [SDNPHasChain, SDNPSideEffect]>; - -def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; -def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; - -def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", - SDT_AArch64TLSDescCallSeq, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - - -def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", - SDT_AArch64WrapperLarge>; - -def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; - -def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisSameAs<1, 2>]>; -def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; -def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; - -def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; -def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; -def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; -def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; - -def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; -def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; - -def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; -def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; -def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; -def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; -def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; -def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; - -def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>; -def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>; -def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>; -def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>; - -def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), - [(abdu node:$lhs, node:$rhs), - (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; -def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), - [(abds node:$lhs, node:$rhs), - (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; - -def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; -def AArch64uaddlp : PatFrags<(ops node:$src), - [(AArch64uaddlp_n node:$src), - (int_aarch64_neon_uaddlp node:$src)]>; - -def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; -def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - -def SDT_AArch64unpk : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> -]>; -def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; -def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; -def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; -def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; - -def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - -def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; -def AArch64mrs : SDNode<"AArch64ISD::MRS", - SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue]>; -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// - -// AArch64 Instruction Predicate Definitions. -// We could compute these on a per-module basis but doing so requires accessing -// the Function object through the Subtarget and objections were raised -// to that (see post-commit review comments for r301750). -let RecomputePerFunction = 1 in { - def ForCodeSize : Predicate<"shouldOptForSize(MF)">; - def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; - // Avoid generating STRQro if it is slow, unless we're optimizing for code size. - def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; - - def UseBTI : Predicate<[{ MF->getInfo()->branchTargetEnforcement() }]>; - def NotUseBTI : Predicate<[{ !MF->getInfo()->branchTargetEnforcement() }]>; - - def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; - def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; - // Toggles patterns which aren't beneficial in GlobalISel when we aren't - // optimizing. This allows us to selectively use patterns without impacting - // SelectionDAG's behaviour. - // FIXME: One day there will probably be a nicer way to check for this, but - // today is not that day. - def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; -} - -include "AArch64InstrFormats.td" -include "SVEInstrFormats.td" -include "SMEInstrFormats.td" - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Miscellaneous instructions. -//===----------------------------------------------------------------------===// - -let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { -// We set Sched to empty list because we expect these instructions to simply get -// removed in most cases. -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, - Sched<[]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, - Sched<[]>; -} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 - -let isReMaterializable = 1, isCodeGenOnly = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, they can be -// removed, along with the AArch64Wrapper node. - -let AddedComplexity = 10 in -def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), - [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, - Sched<[WriteLDAdr]>; - -// The MOVaddr instruction should match only when the add is not folded -// into a load or store address. -def MOVaddr - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), - tglobaladdr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrJT - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), - tjumptable:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrCP - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), - tconstpool:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrBA - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), - tblockaddress:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrTLS - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), - tglobaltlsaddr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrEXT - : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), - texternalsym:$low))]>, - Sched<[WriteAdrAdr]>; -// Normally AArch64addlow either gets folded into a following ldr/str, -// or together with an adrp into MOVaddr above. For cases with TLS, it -// might appear without either of them, so allow lowering it into a plain -// add. -def ADDlowTLS - : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), - [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, - tglobaltlsaddr:$low))]>, - Sched<[WriteAdr]>; - -} // isReMaterializable, isCodeGenOnly - -def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), - (LOADgot tglobaltlsaddr:$addr)>; - -def : Pat<(AArch64LOADgot texternalsym:$addr), - (LOADgot texternalsym:$addr)>; - -def : Pat<(AArch64LOADgot tconstpool:$addr), - (LOADgot tconstpool:$addr)>; - -// In general these get lowered into a sequence of three 4-byte instructions. -// 32-bit jump table destination is actually only 2 instructions since we can -// use the table itself as a PC-relative base. But optimization occurs after -// branch relaxation so be pessimistic. -let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", - isNotDuplicable = 1 in { -def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), - (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, - Sched<[]>; -def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), - (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, - Sched<[]>; -def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), - (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, - Sched<[]>; -} - -// Space-consuming pseudo to aid testing of placement and reachability -// algorithms. Immediate operand is the number of bytes this "instruction" -// occupies; register operands can be used to enforce dependency and constrain -// the scheduler. -let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in -def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), - [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, - Sched<[]>; - -let hasSideEffects = 1, isCodeGenOnly = 1 in { - def SpeculationSafeValueX - : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; - def SpeculationSafeValueW - : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; -} - -// SpeculationBarrierEndBB must only be used after an unconditional control -// flow, i.e. after a terminator for which isBarrier is True. -let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { - // This gets lowered to a pair of 4-byte instructions. - let Size = 8 in - def SpeculationBarrierISBDSBEndBB - : Pseudo<(outs), (ins), []>, Sched<[]>; - // This gets lowered to a 4-byte instruction. - let Size = 4 in - def SpeculationBarrierSBEndBB - : Pseudo<(outs), (ins), []>, Sched<[]>; -} - -//===----------------------------------------------------------------------===// -// System instructions. -//===----------------------------------------------------------------------===// - -def HINT : HintI<"hint">; -def : InstAlias<"nop", (HINT 0b000)>; -def : InstAlias<"yield",(HINT 0b001)>; -def : InstAlias<"wfe", (HINT 0b010)>; -def : InstAlias<"wfi", (HINT 0b011)>; -def : InstAlias<"sev", (HINT 0b100)>; -def : InstAlias<"sevl", (HINT 0b101)>; -def : InstAlias<"dgh", (HINT 0b110)>; -def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; -def : InstAlias<"csdb", (HINT 20)>; -// In order to be able to write readable assembly, LLVM should accept assembly -// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. -// However, in order to be compatible with other assemblers (e.g. GAS), LLVM -// should not emit these mnemonics unless BTI is enabled. -def : InstAlias<"bti", (HINT 32), 0>; -def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; -def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; -def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; - -// v8.2a Statistical Profiling extension -def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; - -// As far as LLVM is concerned this writes to the system's exclusive monitors. -let mayLoad = 1, mayStore = 1 in -def CLREX : CRmSystemI; - -// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot -// model patterns with sufficiently fine granularity. -let mayLoad = ?, mayStore = ? in { -def DMB : CRmSystemI; - -def DSB : CRmSystemI; - -def ISB : CRmSystemI; - -def TSB : CRmSystemI { - let CRm = 0b0010; - let Inst{12} = 0; - let Predicates = [HasTRACEV8_4]; -} - -def DSBnXS : CRmSystemI { - let CRm{1-0} = 0b11; - let Inst{9-8} = 0b10; - let Predicates = [HasXS]; -} - -let Predicates = [HasWFxT] in { -def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; -def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; -} - -// Branch Record Buffer two-word mnemonic instructions -class BRBEI op2, string keyword> - : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { - let Inst{31-8} = 0b110101010000100101110010; - let Inst{7-5} = op2; - let Predicates = [HasBRBE]; -} -def BRB_IALL: BRBEI<0b100, "\tiall">; -def BRB_INJ: BRBEI<0b101, "\tinj">; - -} - -// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ -def : TokenAlias<"INJ", "inj">; -def : TokenAlias<"IALL", "iall">; - -// ARMv8.2-A Dot Product -let Predicates = [HasDotProd] in { -defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; -defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; -defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; -defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; -} - -// ARMv8.6-A BFloat -let Predicates = [HasNEON, HasBF16] in { -defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; -defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; -def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; -def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; -def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; -def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; -def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; -def BFCVTN : SIMD_BFCVTN; -def BFCVTN2 : SIMD_BFCVTN2; - -// Vector-scalar BFDOT: -// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit -// register (the instruction uses a single 32-bit lane from it), so the pattern -// is a bit tricky. -def : Pat<(v2f32 (int_aarch64_neon_bfdot - (v2f32 V64:$Rd), (v4bf16 V64:$Rn), - (v4bf16 (bitconvert - (v2i32 (AArch64duplane32 - (v4i32 (bitconvert - (v8bf16 (insert_subvector undef, - (v4bf16 V64:$Rm), - (i64 0))))), - VectorIndexS:$idx)))))), - (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; -} - -let Predicates = [HasNEONorStreamingSVE, HasBF16] in { -def BFCVT : BF16ToSinglePrecision<"bfcvt">; -} - -// ARMv8.6A AArch64 matrix multiplication -let Predicates = [HasMatMulInt8] in { -def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; -def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; -def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; -defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; -defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; - -// sudot lane has a pattern where usdot is expected (there is no sudot). -// The second operand is used in the dup operation to repeat the indexed -// element. -class BaseSIMDSUDOTIndex - : BaseSIMDThreeSameVectorDotIndex { - let Pattern = [(set (AccumType RegType:$dst), - (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), - (InputType (bitconvert (AccumType - (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx)))), - (InputType RegType:$Rn))))]; -} - -multiclass SIMDSUDOTIndex { - def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; - def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; -} - -defm SUDOTlane : SIMDSUDOTIndex; - -} - -// ARMv8.2-A FP16 Fused Multiply-Add Long -let Predicates = [HasNEON, HasFP16FML] in { -defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; -defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; -defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; -defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; -defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; -defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; -defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; -defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; -} - -// Armv8.2-A Crypto extensions -let Predicates = [HasSHA3] in { -def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; -def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; -def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; -def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; -def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; -def EOR3 : CryptoRRRR_16B<0b00, "eor3">; -def BCAX : CryptoRRRR_16B<0b01, "bcax">; -def XAR : CryptoRRRi6<"xar">; - -class SHA3_pattern - : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), - (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; - -def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), - (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; - -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; - -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; - -class EOR3_pattern - : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), - (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; - -def : EOR3_pattern; -def : EOR3_pattern; -def : EOR3_pattern; -def : EOR3_pattern; - -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; - -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; - -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; -def : SHA3_pattern; - -def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), - (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; - -def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), - (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; - - -} // HasSHA3 - -let Predicates = [HasSM4] in { -def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; -def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; -def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; -def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; -def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; -def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; -def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; -def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; -def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; - -def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), - (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; - -class SM3PARTW_pattern - : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), - (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; - -class SM3TT_pattern - : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), - (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; - -class SM4_pattern - : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), - (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; - -def : SM3PARTW_pattern; -def : SM3PARTW_pattern; - -def : SM3TT_pattern; -def : SM3TT_pattern; -def : SM3TT_pattern; -def : SM3TT_pattern; - -def : SM4_pattern; -def : SM4_pattern; -} // HasSM4 - -let Predicates = [HasRCPC] in { - // v8.3 Release Consistent Processor Consistent support, optional in v8.2. - def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; - def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; - def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; - def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; -} - -// v8.3a complex add and multiply-accumulate. No predicate here, that is done -// inside the multiclass as the FP16 versions need different predicates. -defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, - "fcmla", null_frag>; -defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, - "fcadd", null_frag>; -defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; - -let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { - def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), - (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; - def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), - (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; - def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), - (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; - def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), - (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; -} - -let Predicates = [HasComplxNum, HasNEON] in { - def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), - (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; - def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), - (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; - foreach Ty = [v4f32, v2f64] in { - def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), - (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; - def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), - (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; - } -} - -multiclass FCMLA_PATS { - def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), - (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), - (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), - (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), - (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; -} - -multiclass FCMLA_LANE_PATS { - def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), - (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), - (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), - (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; - def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), - (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; -} - - -let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { - defm : FCMLA_PATS; - defm : FCMLA_PATS; - - defm : FCMLA_LANE_PATS; - defm : FCMLA_LANE_PATS; -} -let Predicates = [HasComplxNum, HasNEON] in { - defm : FCMLA_PATS; - defm : FCMLA_PATS; - defm : FCMLA_PATS; - - defm : FCMLA_LANE_PATS; -} - -// v8.3a Pointer Authentication -// These instructions inhabit part of the hint space and so can be used for -// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is -// important for compatibility with other assemblers (e.g. GAS) when building -// software compatible with both CPUs that do or don't implement PA. -let Uses = [LR], Defs = [LR] in { - def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; - def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; - let isAuthenticated = 1 in { - def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; - def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; - } -} -let Uses = [LR, SP], Defs = [LR] in { - def PACIASP : SystemNoOperands<0b001, "hint\t#25">; - def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; - let isAuthenticated = 1 in { - def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; - def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; - } -} -let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { - def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; - def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; - let isAuthenticated = 1 in { - def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; - def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; - } -} - -let Uses = [LR], Defs = [LR], CRm = 0b0000 in { - def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; -} - -// In order to be able to write readable assembly, LLVM should accept assembly -// inputs that use pointer authentication mnemonics, even with PA disabled. -// However, in order to be compatible with other assemblers (e.g. GAS), LLVM -// should not emit these mnemonics unless PA is enabled. -def : InstAlias<"paciaz", (PACIAZ), 0>; -def : InstAlias<"pacibz", (PACIBZ), 0>; -def : InstAlias<"autiaz", (AUTIAZ), 0>; -def : InstAlias<"autibz", (AUTIBZ), 0>; -def : InstAlias<"paciasp", (PACIASP), 0>; -def : InstAlias<"pacibsp", (PACIBSP), 0>; -def : InstAlias<"autiasp", (AUTIASP), 0>; -def : InstAlias<"autibsp", (AUTIBSP), 0>; -def : InstAlias<"pacia1716", (PACIA1716), 0>; -def : InstAlias<"pacib1716", (PACIB1716), 0>; -def : InstAlias<"autia1716", (AUTIA1716), 0>; -def : InstAlias<"autib1716", (AUTIB1716), 0>; -def : InstAlias<"xpaclri", (XPACLRI), 0>; - -// These pointer authentication instructions require armv8.3a -let Predicates = [HasPAuth] in { - - // When PA is enabled, a better mnemonic should be emitted. - def : InstAlias<"paciaz", (PACIAZ), 1>; - def : InstAlias<"pacibz", (PACIBZ), 1>; - def : InstAlias<"autiaz", (AUTIAZ), 1>; - def : InstAlias<"autibz", (AUTIBZ), 1>; - def : InstAlias<"paciasp", (PACIASP), 1>; - def : InstAlias<"pacibsp", (PACIBSP), 1>; - def : InstAlias<"autiasp", (AUTIASP), 1>; - def : InstAlias<"autibsp", (AUTIBSP), 1>; - def : InstAlias<"pacia1716", (PACIA1716), 1>; - def : InstAlias<"pacib1716", (PACIB1716), 1>; - def : InstAlias<"autia1716", (AUTIA1716), 1>; - def : InstAlias<"autib1716", (AUTIB1716), 1>; - def : InstAlias<"xpaclri", (XPACLRI), 1>; - - multiclass SignAuth prefix, bits<3> prefix_z, string asm, - SDPatternOperator op> { - def IA : SignAuthOneData; - def IB : SignAuthOneData; - def DA : SignAuthOneData; - def DB : SignAuthOneData; - def IZA : SignAuthZero; - def DZA : SignAuthZero; - def IZB : SignAuthZero; - def DZB : SignAuthZero; - } - - defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; - defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; - - def XPACI : ClearAuth<0, "xpaci">; - def XPACD : ClearAuth<1, "xpacd">; - - def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; - - // Combined Instructions - let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def BRAA : AuthBranchTwoOperands<0, 0, "braa">; - def BRAB : AuthBranchTwoOperands<0, 1, "brab">; - } - let isCall = 1, Defs = [LR], Uses = [SP] in { - def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; - def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; - } - - let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; - def BRABZ : AuthOneOperand<0b000, 1, "brabz">; - } - let isCall = 1, Defs = [LR], Uses = [SP] in { - def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; - def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; - } - - let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def RETAA : AuthReturn<0b010, 0, "retaa">; - def RETAB : AuthReturn<0b010, 1, "retab">; - def ERETAA : AuthReturn<0b100, 0, "eretaa">; - def ERETAB : AuthReturn<0b100, 1, "eretab">; - } - - defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; - defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; - -} - -// v8.3a floating point conversion for javascript -let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in -def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, - "fjcvtzs", - [(set GPR32:$Rd, - (int_aarch64_fjcvtzs FPR64:$Rn))]> { - let Inst{31} = 0; -} // HasJS, HasFPARMv8 - -// v8.4 Flag manipulation instructions -let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { -def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { - let Inst{20-5} = 0b0000001000000000; -} -def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; -def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; -def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", - "{\t$Rn, $imm, $mask}">; -} // HasFlagM - -// v8.5 flag manipulation instructions -let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { - -def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { - let Inst{18-16} = 0b000; - let Inst{11-8} = 0b0000; - let Unpredictable{11-8} = 0b1111; - let Inst{7-5} = 0b001; -} - -def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { - let Inst{18-16} = 0b000; - let Inst{11-8} = 0b0000; - let Unpredictable{11-8} = 0b1111; - let Inst{7-5} = 0b010; -} -} // HasAltNZCV - - -// Armv8.5-A speculation barrier -def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { - let Inst{20-5} = 0b0001100110000111; - let Unpredictable{11-8} = 0b1111; - let Predicates = [HasSB]; - let hasSideEffects = 1; -} - -def : InstAlias<"clrex", (CLREX 0xf)>; -def : InstAlias<"isb", (ISB 0xf)>; -def : InstAlias<"ssbb", (DSB 0)>; -def : InstAlias<"pssbb", (DSB 4)>; -def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; - -def MRS : MRSI; -def MSR : MSRI; -def MSRpstateImm1 : MSRpstateImm0_1; -def MSRpstateImm4 : MSRpstateImm0_15; - -def : Pat<(AArch64mrs imm:$id), - (MRS imm:$id)>; - -// The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), - [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; - -let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { -def HWASAN_CHECK_MEMACCESS : Pseudo< - (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, - Sched<[]>; -} - -let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { -def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< - (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, - Sched<[]>; -} - -// The cycle counter PMC register is PMCCNTR_EL0. -let Predicates = [HasPerfMon] in -def : Pat<(readcyclecounter), (MRS 0xdce8)>; - -// FPCR register -def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; -def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>; - -// Generic system instructions -def SYSxt : SystemXtI<0, "sys">; -def SYSLxt : SystemLXtI<1, "sysl">; - -def : InstAlias<"sys $op1, $Cn, $Cm, $op2", - (SYSxt imm0_7:$op1, sys_cr_op:$Cn, - sys_cr_op:$Cm, imm0_7:$op2, XZR)>; - - -let Predicates = [HasTME] in { - -def TSTART : TMSystemI<0b0000, "tstart", - [(set GPR64:$Rt, (int_aarch64_tstart))]>; - -def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; - -def TCANCEL : TMSystemException<0b011, "tcancel", - [(int_aarch64_tcancel timm64_0_65535:$imm)]>; - -def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { - let mayLoad = 0; - let mayStore = 0; -} -} // HasTME - -//===----------------------------------------------------------------------===// -// Move immediate instructions. -//===----------------------------------------------------------------------===// - -defm MOVK : InsertImmediate<0b11, "movk">; -defm MOVN : MoveImmediate<0b00, "movn">; - -let PostEncoderMethod = "fixMOVZ" in -defm MOVZ : MoveImmediate<0b10, "movz">; - -// First group of aliases covers an implicit "lsl #0". -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; - -// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; - -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; - -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; - -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; - -// Final group of aliases covers true "mov $Rd, $imm" cases. -multiclass movw_mov_alias { - def _asmoperand : AsmOperandClass { - let Name = basename # width # "_lsl" # shift # "MovAlias"; - let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " - # shift # ">"; - let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; - } - - def _movimm : Operand { - let ParserMatchClass = !cast(NAME # "_asmoperand"); - } - - def : InstAlias<"mov $Rd, $imm", - (INST GPR:$Rd, !cast(NAME # "_movimm"):$imm, shift)>; -} - -defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; -defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; - -defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; -defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; -defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; -defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; - -defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; -defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; - -defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; -defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; -defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; -defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; - -let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, - isAsCheapAsAMove = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, we can select -// directly to the real instructions and get rid of these pseudos. - -def MOVi32imm - : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), - [(set GPR32:$dst, imm:$src)]>, - Sched<[WriteImm]>; -def MOVi64imm - : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), - [(set GPR64:$dst, imm:$src)]>, - Sched<[WriteImm]>; -} // isReMaterializable, isCodeGenOnly - -// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the -// eventual expansion code fewer bits to worry about getting right. Marshalling -// the types is a little tricky though: -def i64imm_32bit : ImmLeaf(Imm); -}]>; - -def s64imm_32bit : ImmLeaf(Imm); - return Imm64 >= std::numeric_limits::min() && - Imm64 <= std::numeric_limits::max(); -}]>; - -def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, - GISDNodeXFormEquiv; - -let Predicates = [OptimizedGISelOrOtherSelector] in { -// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless -// copies. -def : Pat<(i64 i64imm_32bit:$src), - (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; -} - -// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). -def bitcast_fpimm_to_i32 : SDNodeXFormgetTargetConstant( - N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -def bitcast_fpimm_to_i64 : SDNodeXFormgetTargetConstant( - N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); -}]>; - - -def : Pat<(f32 fpimm:$in), - (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; -def : Pat<(f64 fpimm:$in), - (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; - - -// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK -// sequences. -def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, - tglobaladdr:$g1, tglobaladdr:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), - tglobaladdr:$g1, 16), - tglobaladdr:$g2, 32), - tglobaladdr:$g3, 48)>; - -def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, - tblockaddress:$g1, tblockaddress:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), - tblockaddress:$g1, 16), - tblockaddress:$g2, 32), - tblockaddress:$g3, 48)>; - -def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, - tconstpool:$g1, tconstpool:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), - tconstpool:$g1, 16), - tconstpool:$g2, 32), - tconstpool:$g3, 48)>; - -def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, - tjumptable:$g1, tjumptable:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), - tjumptable:$g1, 16), - tjumptable:$g2, 32), - tjumptable:$g3, 48)>; - - -//===----------------------------------------------------------------------===// -// Arithmetic instructions. -//===----------------------------------------------------------------------===// - -// Add/subtract with carry. -defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; -defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; - -def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; - -// Add/subtract -defm ADD : AddSub<0, "add", "sub", add>; -defm SUB : AddSub<1, "sub", "add">; - -def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; - -defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; -defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; - -// Use SUBS instead of SUB to enable CSE between SUBS and SUB. -def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), - (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), - (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, GPR32:$Rm), - (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; -def : Pat<(sub GPR64:$Rn, GPR64:$Rm), - (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; -def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), - (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; -def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), - (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; -let AddedComplexity = 1 in { -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; -} - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; -def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; -def : InstAlias<"neg $dst, $src$shift", - (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; -def : InstAlias<"neg $dst, $src$shift", - (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; - -def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; -def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; -def : InstAlias<"negs $dst, $src$shift", - (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; -def : InstAlias<"negs $dst, $src$shift", - (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; - - -// Unsigned/Signed divide -defm UDIV : Div<0, "udiv", udiv>; -defm SDIV : Div<1, "sdiv", sdiv>; - -def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; -def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; -def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; -def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; - -// Variable shift -defm ASRV : Shift<0b10, "asr", sra>; -defm LSLV : Shift<0b00, "lsl", shl>; -defm LSRV : Shift<0b01, "lsr", srl>; -defm RORV : Shift<0b11, "ror", rotr>; - -def : ShiftAlias<"asrv", ASRVWr, GPR32>; -def : ShiftAlias<"asrv", ASRVXr, GPR64>; -def : ShiftAlias<"lslv", LSLVWr, GPR32>; -def : ShiftAlias<"lslv", LSLVXr, GPR64>; -def : ShiftAlias<"lsrv", LSRVWr, GPR32>; -def : ShiftAlias<"lsrv", LSRVXr, GPR64>; -def : ShiftAlias<"rorv", RORVWr, GPR32>; -def : ShiftAlias<"rorv", RORVXr, GPR64>; - -// Multiply-add -let AddedComplexity = 5 in { -defm MADD : MulAccum<0, "madd">; -defm MSUB : MulAccum<1, "msub">; - -def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), - (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), - (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; - -def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), - (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), - (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), - (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), - (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 5 - -let AddedComplexity = 5 in { -def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; -def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; -def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; -def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; - -def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), - (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; -def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), - (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; -def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), - (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), - (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; -def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), - (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; -def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), - (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; - -def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), - (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), - (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; - -def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), - (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; -def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), - (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; -def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), - (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), - (MOVi32imm (trunc_imm imm:$C)), XZR)>; - -def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), - (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; -def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), - (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; -def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), - (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), - (MOVi32imm (trunc_imm imm:$C)), XZR)>; - -def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), - (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; -def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), - (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; -def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), - GPR64:$Ra)), - (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), - (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; - -def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), - (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; -def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), - (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; -def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), - (s64imm_32bit:$C)))), - (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), - (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; -} // AddedComplexity = 5 - -def : MulAccumWAlias<"mul", MADDWrrr>; -def : MulAccumXAlias<"mul", MADDXrrr>; -def : MulAccumWAlias<"mneg", MSUBWrrr>; -def : MulAccumXAlias<"mneg", MSUBXrrr>; -def : WideMulAccumAlias<"smull", SMADDLrrr>; -def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; -def : WideMulAccumAlias<"umull", UMADDLrrr>; -def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; - -// Multiply-high -def SMULHrr : MulHi<0b010, "smulh", mulhs>; -def UMULHrr : MulHi<0b110, "umulh", mulhu>; - -// CRC32 -def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; -def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; -def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; -def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; - -def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; -def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; -def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; -def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; - -// v8.1 atomic CAS -defm CAS : CompareAndSwap<0, 0, "">; -defm CASA : CompareAndSwap<1, 0, "a">; -defm CASL : CompareAndSwap<0, 1, "l">; -defm CASAL : CompareAndSwap<1, 1, "al">; - -// v8.1 atomic CASP -defm CASP : CompareAndSwapPair<0, 0, "">; -defm CASPA : CompareAndSwapPair<1, 0, "a">; -defm CASPL : CompareAndSwapPair<0, 1, "l">; -defm CASPAL : CompareAndSwapPair<1, 1, "al">; - -// v8.1 atomic SWP -defm SWP : Swap<0, 0, "">; -defm SWPA : Swap<1, 0, "a">; -defm SWPL : Swap<0, 1, "l">; -defm SWPAL : Swap<1, 1, "al">; - -// v8.1 atomic LD(register). Performs load and then ST(register) -defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; -defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; -defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; -defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; - -defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; -defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; -defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; -defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; - -defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; -defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; -defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; -defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; - -defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; -defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; -defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; -defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; - -defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; -defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; -defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; -defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; - -defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; -defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; -defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; -defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; - -defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; -defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; -defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; -defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; - -defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; -defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; -defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; -defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; - -// v8.1 atomic ST(register) as aliases to "LD(register) when Rt=xZR" -defm : STOPregister<"stadd","LDADD">; // STADDx -defm : STOPregister<"stclr","LDCLR">; // STCLRx -defm : STOPregister<"steor","LDEOR">; // STEORx -defm : STOPregister<"stset","LDSET">; // STSETx -defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx -defm : STOPregister<"stsmin","LDSMIN">;// STSMINx -defm : STOPregister<"stumax","LDUMAX">;// STUMAXx -defm : STOPregister<"stumin","LDUMIN">;// STUMINx - -// v8.5 Memory Tagging Extension -let Predicates = [HasMTE] in { - -def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>, - Sched<[]>{ - let Inst{31} = 1; -} -def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{ - let Inst{31} = 1; - let isNotDuplicable = 1; -} -def ADDG : AddSubG<0, "addg", null_frag>; -def SUBG : AddSubG<1, "subg", null_frag>; - -def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; - -def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; -def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ - let Defs = [NZCV]; -} - -def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; - -def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; - -def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), - (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; -def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), - (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; - -def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; - -def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", - (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; -def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", - (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; -def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", - (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { - let Inst{23} = 0; -} - -defm STG : MemTagStore<0b00, "stg">; -defm STZG : MemTagStore<0b01, "stzg">; -defm ST2G : MemTagStore<0b10, "st2g">; -defm STZ2G : MemTagStore<0b11, "stz2g">; - -def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), - (STGOffset $Rn, $Rm, $imm)>; -def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), - (STZGOffset $Rn, $Rm, $imm)>; -def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), - (ST2GOffset $Rn, $Rm, $imm)>; -def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), - (STZ2GOffset $Rn, $Rm, $imm)>; - -defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; -def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; -def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; - -def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), - (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; - -def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), - (STGPi $Rt, $Rt2, $Rn, $imm)>; - -def IRGstack - : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, - Sched<[]>; -def TAGPstack - : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, - Sched<[]>; - -// Explicit SP in the first operand prevents ShrinkWrap optimization -// from leaving this instruction out of the stack frame. When IRGstack -// is transformed into IRG, this operand is replaced with the actual -// register / expression for the tagged base pointer of the current function. -def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; - -// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. -// $Rn_wback is one past the end of the range. $Rm is the loop counter. -let isCodeGenOnly=1, mayStore=1 in { -def STGloop_wback - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), - [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, - Sched<[WriteAdr, WriteST]>; - -def STZGloop_wback - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), - [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, - Sched<[WriteAdr, WriteST]>; - -// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. -// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). -def STGloop - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), - [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, - Sched<[WriteAdr, WriteST]>; - -def STZGloop - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), - [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, - Sched<[WriteAdr, WriteST]>; -} - -} // Predicates = [HasMTE] - -//===----------------------------------------------------------------------===// -// Logical instructions. -//===----------------------------------------------------------------------===// - -// (immediate) -defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; -defm AND : LogicalImm<0b00, "and", and, "bic">; -defm EOR : LogicalImm<0b10, "eor", xor, "eon">; -defm ORR : LogicalImm<0b01, "orr", or, "orn">; - -// FIXME: these aliases *are* canonical sometimes (when movz can't be -// used). Actually, it seems to be working right now, but putting logical_immXX -// here is a bit dodgy on the AsmParser side too. -def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, - logical_imm32:$imm), 0>; -def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, - logical_imm64:$imm), 0>; - - -// (register) -defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; -defm BICS : LogicalRegS<0b11, 1, "bics", - BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; -defm AND : LogicalReg<0b00, 0, "and", and>; -defm BIC : LogicalReg<0b00, 1, "bic", - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -defm EON : LogicalReg<0b10, 1, "eon", - BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; -defm EOR : LogicalReg<0b10, 0, "eor", xor>; -defm ORN : LogicalReg<0b01, 1, "orn", - BinOpFrag<(or node:$LHS, (not node:$RHS))>>; -defm ORR : LogicalReg<0b01, 0, "orr", or>; - -def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; -def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; - -def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; -def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; - -def : InstAlias<"mvn $Wd, $Wm$sh", - (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; -def : InstAlias<"mvn $Xd, $Xm$sh", - (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; -def : InstAlias<"tst $src1, $src2", - (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; -def : InstAlias<"tst $src1, $src2", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; - -def : InstAlias<"tst $src1, $src2$sh", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; -def : InstAlias<"tst $src1, $src2$sh", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; - - -def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; -def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; - - -//===----------------------------------------------------------------------===// -// One operand data processing instructions. -//===----------------------------------------------------------------------===// - -defm CLS : OneOperandData<0b101, "cls">; -defm CLZ : OneOperandData<0b100, "clz", ctlz>; -defm RBIT : OneOperandData<0b000, "rbit", bitreverse>; - -def REV16Wr : OneWRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; -def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; - -def : Pat<(cttz GPR32:$Rn), - (CLZWr (RBITWr GPR32:$Rn))>; -def : Pat<(cttz GPR64:$Rn), - (CLZXr (RBITXr GPR64:$Rn))>; -def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), - (i32 1))), - (CLSWr GPR32:$Rn)>; -def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), - (i64 1))), - (CLSXr GPR64:$Rn)>; -def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; -def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; - -// Unlike the other one operand instructions, the instructions with the "rev" -// mnemonic do *not* just different in the size bit, but actually use different -// opcode bits for the different sizes. -def REVWr : OneWRegData<0b010, "rev", bswap>; -def REVXr : OneXRegData<0b011, "rev", bswap>; -def REV32Xr : OneXRegData<0b010, "rev32", - UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; - -def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; - -// The bswap commutes with the rotr so we want a pattern for both possible -// orders. -def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; -def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; - -// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. -def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; -def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; - -//===----------------------------------------------------------------------===// -// Bitfield immediate extraction instruction. -//===----------------------------------------------------------------------===// -let hasSideEffects = 0 in -defm EXTR : ExtractImm<"extr">; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; - -def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), - (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; -def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), - (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; - -//===----------------------------------------------------------------------===// -// Other bitfield immediate instructions. -//===----------------------------------------------------------------------===// -let hasSideEffects = 0 in { -defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; -defm SBFM : BitfieldImm<0b00, "sbfm">; -defm UBFM : BitfieldImm<0b10, "ubfm">; -} - -def i32shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x1f; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -def i32shift_b : Operand, SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -// min(7, 31 - shift_amt) -def i32shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -// min(15, 31 - shift_amt) -def i32shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -def i64shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x3f; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -def i64shift_b : Operand, SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -// min(7, 63 - shift_amt) -def i64shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -// min(15, 63 - shift_amt) -def i64shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -// min(31, 63 - shift_amt) -def i64shift_sext_i32 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 31 ? 31 : enc; - return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); -}]>; - -def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_b imm0_31:$imm)))>; -def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_b imm0_63:$imm)))>; - -let AddedComplexity = 10 in { -def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; -} - -def : InstAlias<"asr $dst, $src, $shift", - (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"asr $dst, $src, $shift", - (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; - -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -//===----------------------------------------------------------------------===// -// Conditional comparison instructions. -//===----------------------------------------------------------------------===// -defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; -defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; - -//===----------------------------------------------------------------------===// -// Conditional select instructions. -//===----------------------------------------------------------------------===// -defm CSEL : CondSelect<0, 0b00, "csel">; - -def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; -defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; -defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; -defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; - -def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), - (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), - (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), - (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), - (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), - (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), - (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; - -def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), - (CSINCWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), - (CSINCXr XZR, XZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), - (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), - (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), - (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; -def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), - (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; -def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), - (CSINVWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), - (CSINVXr XZR, XZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), - (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), - (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; -def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), - (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; -def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), - (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; - -def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), - (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; -def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), - (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; - -// The inverse of the condition code from the alias instruction is what is used -// in the aliased instruction. The parser all ready inverts the condition code -// for these aliases. -def : InstAlias<"cset $dst, $cc", - (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; -def : InstAlias<"cset $dst, $cc", - (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; - -def : InstAlias<"csetm $dst, $cc", - (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; -def : InstAlias<"csetm $dst, $cc", - (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; - -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; - -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; - -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; - -//===----------------------------------------------------------------------===// -// PC-relative instructions. -//===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { -let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { -def ADR : ADRI<0, "adr", adrlabel, - [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; -} // hasSideEffects = 0 - -def ADRP : ADRI<1, "adrp", adrplabel, - [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; -} // isReMaterializable = 1 - -// page address of a constant pool entry, block address -def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; -def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; -def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; -def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; -def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; -def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; -def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions. -//===----------------------------------------------------------------------===// - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { -def RET : BranchReg<0b0010, "ret", []>; -def DRPS : SpecialReturn<0b0101, "drps">; -def ERET : SpecialReturn<0b0100, "eret">; -} // isReturn = 1, isTerminator = 1, isBarrier = 1 - -// Default to the LR register. -def : InstAlias<"ret", (RET LR)>; - -let isCall = 1, Defs = [LR], Uses = [SP] in { - def BLR : BranchReg<0b0001, "blr", []>; - def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, - Sched<[WriteBrReg]>, - PseudoInstExpansion<(BLR GPR64:$Rn)>; - def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, - Sched<[WriteBrReg]>; - def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, - Sched<[WriteBrReg]>; -} // isCall - -def : Pat<(AArch64call GPR64:$Rn), - (BLR GPR64:$Rn)>, - Requires<[NoSLSBLRMitigation]>; -def : Pat<(AArch64call GPR64noip:$Rn), - (BLRNoIP GPR64noip:$Rn)>, - Requires<[SLSBLRMitigation]>; - -def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), - (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, - Requires<[NoSLSBLRMitigation]>; - -def : Pat<(AArch64call_bti GPR64:$Rn), - (BLR_BTI GPR64:$Rn)>, - Requires<[NoSLSBLRMitigation]>; - -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { -def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; -} // isBranch, isTerminator, isBarrier, isIndirectBranch - -// Create a separate pseudo-instruction for codegen to use so that we don't -// flag lr as used in every function. It'll be restored before the RET by the -// epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, - Sched<[WriteBrReg]> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; -} - -// This is a directive-like pseudo-instruction. The purpose is to insert an -// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction -// (which in the usual case is a BLR). -let hasSideEffects = 1 in -def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { - let AsmString = ".tlsdesccall $sym"; -} - -// Pseudo instruction to tell the streamer to emit a 'B' character into the -// augmentation string. -def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} - -// FIXME: maybe the scratch register used shouldn't be fixed to X1? -// FIXME: can "hasSideEffects be dropped? -// This gets lowered to an instruction sequence which takes 16 bytes -let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16, - isCodeGenOnly = 1 in -def TLSDESC_CALLSEQ - : Pseudo<(outs), (ins i64imm:$sym), - [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, - Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; -def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), - (TLSDESC_CALLSEQ texternalsym:$sym)>; - -//===----------------------------------------------------------------------===// -// Conditional branch (immediate) instruction. -//===----------------------------------------------------------------------===// -def Bcc : BranchCond<0, "b">; - -// Armv8.8-A variant form which hints to the branch predictor that -// this branch is very likely to go the same way nearly all the time -// (even though it is not known at compile time _which_ way that is). -def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; - -//===----------------------------------------------------------------------===// -// Compare-and-branch instructions. -//===----------------------------------------------------------------------===// -defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; -defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; - -//===----------------------------------------------------------------------===// -// Test-bit-and-branch instructions. -//===----------------------------------------------------------------------===// -defm TBZ : TestBranch<0, "tbz", AArch64tbz>; -defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions. -//===----------------------------------------------------------------------===// -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def B : BranchImm<0, "b", [(br bb:$addr)]>; -} // isBranch, isTerminator, isBarrier - -let isCall = 1, Defs = [LR], Uses = [SP] in { -def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; -} // isCall -def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; - -//===----------------------------------------------------------------------===// -// Exception generation instructions. -//===----------------------------------------------------------------------===// -let isTrap = 1 in { -def BRK : ExceptionGeneration<0b001, 0b00, "brk">; -} -def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; -def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; -def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; -def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; -def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; -def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; -def SVC : ExceptionGeneration<0b000, 0b01, "svc">; - -// DCPSn defaults to an immediate operand of zero if unspecified. -def : InstAlias<"dcps1", (DCPS1 0)>; -def : InstAlias<"dcps2", (DCPS2 0)>; -def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; - -def UDF : UDFType<0, "udf">; - -//===----------------------------------------------------------------------===// -// Load instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; -defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; -defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; -defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; -defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; - -defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; - -// Pair (pre-indexed) -def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; -def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; -def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; -def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; -def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; - -def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; - -// Pair (post-indexed) -def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; -def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; -def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; -def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; -def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; - -def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; - - -// Pair (no allocate) -defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; -defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; -defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; -defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; -defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; - -def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), - (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; - -//--- -// (register offset) -//--- - -// Integer -defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; -defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; -defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; -defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; - -// Floating-point -defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; -defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; -defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; -defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; -defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; - -// Load sign-extended half-word -defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; -defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; - -// Load sign-extended byte -defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; -defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; - -// Load sign-extended word -defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; - -// Pre-fetch. -defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -multiclass ScalToVecROLoadPat { - def : Pat<(VecTy (scalar_to_vector (ScalTy - (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), - (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), - (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), - sub)>; - - def : Pat<(VecTy (scalar_to_vector (ScalTy - (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), - (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), - (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), - sub)>; -} - -let AddedComplexity = 10 in { -defm : ScalToVecROLoadPat; -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; - -defm : ScalToVecROLoadPat; - - -def : Pat <(v1i64 (scalar_to_vector (i64 - (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend64:$extend))))), - (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; - -def : Pat <(v1i64 (scalar_to_vector (i64 - (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend64:$extend))))), - (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; -} - -// Match all load 64 bits width whose type is compatible with FPR64 -multiclass VecROLoadPat { - - def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), - (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; - - def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), - (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -} - -let AddedComplexity = 10 in { -let Predicates = [IsLE] in { - // We must do vector loads with LD1 in big-endian. - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; -} - -defm : VecROLoadPat; -defm : VecROLoadPat; - -// Match all load 128 bits width whose type is compatible with FPR128 -let Predicates = [IsLE] in { - // We must do vector loads with LD1 in big-endian. - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; - defm : VecROLoadPat; -} -} // AddedComplexity = 10 - -// zextload -> i64 -multiclass ExtLoadTo64ROPat { - def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), - (SUBREG_TO_REG (i64 0), - (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), - sub_32)>; - - def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), - (SUBREG_TO_REG (i64 0), - (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), - sub_32)>; -} - -let AddedComplexity = 10 in { - defm : ExtLoadTo64ROPat; - defm : ExtLoadTo64ROPat; - defm : ExtLoadTo64ROPat; - - // zextloadi1 -> zextloadi8 - defm : ExtLoadTo64ROPat; - - // extload -> zextload - defm : ExtLoadTo64ROPat; - defm : ExtLoadTo64ROPat; - defm : ExtLoadTo64ROPat; - - // extloadi1 -> zextloadi8 - defm : ExtLoadTo64ROPat; -} - - -// zextload -> i64 -multiclass ExtLoadTo32ROPat { - def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), - (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; - - def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), - (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; - -} - -let AddedComplexity = 10 in { - // extload -> zextload - defm : ExtLoadTo32ROPat; - defm : ExtLoadTo32ROPat; - defm : ExtLoadTo32ROPat; - - // zextloadi1 -> zextloadi8 - defm : ExtLoadTo32ROPat; -} - -//--- -// (unsigned immediate) -//--- -defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", - [(set GPR64z:$Rt, - (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; -defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", - [(set GPR32z:$Rt, - (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; -defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", - [(set FPR8Op:$Rt, - (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; -defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", - [(set (f16 FPR16Op:$Rt), - (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; -defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", - [(set (f32 FPR32Op:$Rt), - (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; -defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", - [(set (f64 FPR64Op:$Rt), - (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; -defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", - [(set (f128 FPR128Op:$Rt), - (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; - -// bf16 load pattern -def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 - (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 - (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 - (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 - (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 - (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 - (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 - (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat <(v2i64 (scalar_to_vector (i64 - (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; - -// Match all load 64 bits width whose type is compatible with FPR64 -let Predicates = [IsLE] in { - // We must use LD1 to perform vector loads in big-endian. - def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; -} -def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), - (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -let Predicates = [IsLE] in { - // We must use LD1 to perform vector loads in big-endian. - def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; -} -def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), - (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; - -defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", - [(set GPR32:$Rt, - (zextloadi16 (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)))]>; -defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", - [(set GPR32:$Rt, - (zextloadi8 (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)))]>; -// zextload -> i64 -def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; -def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), - (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; - -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; - -// extload -> zextload -def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), - (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), - (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; -def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), - (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; -def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; -def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), - (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; - -// load sign-extended half-word -defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", - [(set GPR32:$Rt, - (sextloadi16 (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)))]>; -defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", - [(set GPR64:$Rt, - (sextloadi16 (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)))]>; - -// load sign-extended byte -defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", - [(set GPR32:$Rt, - (sextloadi8 (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)))]>; -defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", - [(set GPR64:$Rt, - (sextloadi8 (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)))]>; - -// load sign-extended word -defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", - [(set GPR64:$Rt, - (sextloadi32 (am_indexed32 GPR64sp:$Rn, - uimm12s4:$offset)))]>; - -// load zero-extended word -def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), - (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; - -// Pre-fetch. -def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", - [(AArch64Prefetch imm:$Rt, - (am_indexed64 GPR64sp:$Rn, - uimm12s8:$offset))]>; - -def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; - -//--- -// (literal) - -def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ - if (auto *G = dyn_cast(N)) { - const DataLayout &DL = MF->getDataLayout(); - Align Align = G->getGlobal()->getPointerAlignment(DL); - return Align >= 4 && G->getOffset() % 4 == 0; - } - if (auto *C = dyn_cast(N)) - return C->getAlign() >= 4 && C->getOffset() % 4 == 0; - return false; -}]>; - -def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", - [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; -def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", - [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; -def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", - [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; -def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", - [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; -def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", - [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; - -// load sign-extended word -def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", - [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; - -let AddedComplexity = 20 in { -def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), - (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; -} - -// prefetch -def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; -// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; - -//--- -// (unscaled immediate) -defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", - [(set GPR64z:$Rt, - (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", - [(set GPR32z:$Rt, - (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", - [(set FPR8Op:$Rt, - (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", - [(set (f16 FPR16Op:$Rt), - (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", - [(set (f32 FPR32Op:$Rt), - (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", - [(set (f64 FPR64Op:$Rt), - (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", - [(set (f128 FPR128Op:$Rt), - (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; - -defm LDURHH - : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", - [(set GPR32:$Rt, - (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURBB - : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", - [(set GPR32:$Rt, - (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; - -// Match all load 64 bits width whose type is compatible with FPR64 -let Predicates = [IsLE] in { - def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; -} -def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), - (LDURDi GPR64sp:$Rn, simm9:$offset)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -let Predicates = [IsLE] in { - def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), - (LDURQi GPR64sp:$Rn, simm9:$offset)>; -} - -// anyext -> zext -def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (LDURHHi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (LDURBBi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (LDURBBi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; -// unscaled zext -def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (LDURHHi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (LDURBBi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (LDURBBi GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; - - -//--- -// LDR mnemonics fall back to LDUR for negative or unaligned offsets. - -// Define new assembler match classes as we want to only match these when -// the don't otherwise match the scaled addressing mode for LDR/STR. Don't -// associate a DiagnosticType either, as we want the diagnostic for the -// canonical form (the scaled operand) to take precedence. -class SImm9OffsetOperand : AsmOperandClass { - let Name = "SImm9OffsetFB" # Width; - let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; - let RenderMethod = "addImmOperands"; -} - -def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; -def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; -def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; -def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; -def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; - -def simm9_offset_fb8 : Operand { - let ParserMatchClass = SImm9OffsetFB8Operand; -} -def simm9_offset_fb16 : Operand { - let ParserMatchClass = SImm9OffsetFB16Operand; -} -def simm9_offset_fb32 : Operand { - let ParserMatchClass = SImm9OffsetFB32Operand; -} -def simm9_offset_fb64 : Operand { - let ParserMatchClass = SImm9OffsetFB64Operand; -} -def simm9_offset_fb128 : Operand { - let ParserMatchClass = SImm9OffsetFB128Operand; -} - -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; -def : InstAlias<"ldr $Rt, [$Rn, $offset]", - (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; - -// zextload -> i64 -def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; -def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; - -// load sign-extended half-word -defm LDURSHW - : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", - [(set GPR32:$Rt, - (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURSHX - : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", - [(set GPR64:$Rt, - (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; - -// load sign-extended byte -defm LDURSBW - : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", - [(set GPR32:$Rt, - (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; -defm LDURSBX - : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", - [(set GPR64:$Rt, - (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; - -// load sign-extended word -defm LDURSW - : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", - [(set GPR64:$Rt, - (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; - -// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. -def : InstAlias<"ldrb $Rt, [$Rn, $offset]", - (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"ldrh $Rt, [$Rn, $offset]", - (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; -def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", - (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", - (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", - (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; -def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", - (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; -def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", - (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; - -// Pre-fetch. -defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", - [(AArch64Prefetch imm:$Rt, - (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; - -//--- -// (unscaled immediate, unprivileged) -defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; -defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; - -defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; -defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; - -// load sign-extended half-word -defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; -defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; - -// load sign-extended byte -defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; -defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; - -// load sign-extended word -defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; - -//--- -// (immediate pre-indexed) -def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; -def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; -def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; -def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; -def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; -def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; -def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; - -// load sign-extended half-word -def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; -def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; - -// load sign-extended byte -def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; -def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; - -// load zero-extended byte -def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; -def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; - -// load sign-extended word -def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; - -//--- -// (immediate post-indexed) -def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; -def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; -def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; -def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; -def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; -def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; -def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; - -// load sign-extended half-word -def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; -def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; - -// load sign-extended byte -def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; -def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; - -// load zero-extended byte -def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; -def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; - -// load sign-extended word -def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; - -//===----------------------------------------------------------------------===// -// Store instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -// FIXME: Use dedicated range-checked addressing mode operand here. -defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; -defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; -defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; -defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; -defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; - -// Pair (pre-indexed) -def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; -def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; -def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; -def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; -def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; - -// Pair (pre-indexed) -def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; -def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; -def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; -def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; -def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; - -// Pair (no allocate) -defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; -defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; -defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; -defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; -defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; - -def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), - (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; - -def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), - (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; - - -//--- -// (Register offset) - -// Integer -defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; -defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; -defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; -defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; - - -// Floating-point -defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; -defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; -defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; -defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; -defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; - -let Predicates = [UseSTRQro], AddedComplexity = 10 in { - def : Pat<(store (f128 FPR128:$Rt), - (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend128:$extend)), - (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; - def : Pat<(store (f128 FPR128:$Rt), - (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend128:$extend)), - (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; -} - -multiclass TruncStoreFrom64ROPat { - - def : Pat<(storeop GPR64:$Rt, - (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), - (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), - GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; - - def : Pat<(storeop GPR64:$Rt, - (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), - (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), - GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -} - -let AddedComplexity = 10 in { - // truncstore i64 - defm : TruncStoreFrom64ROPat; - defm : TruncStoreFrom64ROPat; - defm : TruncStoreFrom64ROPat; -} - -multiclass VecROStorePat { - def : Pat<(store (VecTy FPR:$Rt), - (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), - (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; - - def : Pat<(store (VecTy FPR:$Rt), - (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), - (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -} - -let AddedComplexity = 10 in { -// Match all store 64 bits width whose type is compatible with FPR64 -let Predicates = [IsLE] in { - // We must use ST1 to store vectors in big-endian. - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; -} - -defm : VecROStorePat; -defm : VecROStorePat; - -// Match all store 128 bits width whose type is compatible with FPR128 -let Predicates = [IsLE, UseSTRQro] in { - // We must use ST1 to store vectors in big-endian. - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; - defm : VecROStorePat; -} -} // AddedComplexity = 10 - -// Match stores from lane 0 to the appropriate subreg's store. -multiclass VecROStoreLane0Pat { - - def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), - (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), - (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), - GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; - - def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), - (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), - (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), - GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -} - -let AddedComplexity = 19 in { - defm : VecROStoreLane0Pat; - defm : VecROStoreLane0Pat; - defm : VecROStoreLane0Pat; - defm : VecROStoreLane0Pat; - defm : VecROStoreLane0Pat; - defm : VecROStoreLane0Pat; -} - -//--- -// (unsigned immediate) -defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", - [(store GPR64z:$Rt, - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; -defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", - [(store GPR32z:$Rt, - (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; -defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", - [(store FPR8Op:$Rt, - (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; -defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", - [(store (f16 FPR16Op:$Rt), - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; -defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", - [(store (f32 FPR32Op:$Rt), - (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; -defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", - [(store (f64 FPR64Op:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; -defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; - -defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", - [(truncstorei16 GPR32z:$Rt, - (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset))]>; -defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", - [(truncstorei8 GPR32z:$Rt, - (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset))]>; - -// bf16 store pattern -def : Pat<(store (bf16 FPR16Op:$Rt), - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), - (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; - -let AddedComplexity = 10 in { - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v1i64 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; -def : Pat<(store (v1f64 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - -let Predicates = [IsLE] in { - // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v2f32 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(store (v8i8 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(store (v4i16 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(store (v2i32 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(store (v4f16 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; - def : Pat<(store (v4bf16 FPR64:$Rt), - (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), - (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; -} - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (f128 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - -let Predicates = [IsLE] in { - // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v4f32 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v2f64 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v16i8 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v8i16 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v4i32 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v2i64 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v8f16 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; - def : Pat<(store (v8bf16 FPR128:$Rt), - (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), - (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; -} - -// truncstore i64 -def : Pat<(truncstorei32 GPR64:$Rt, - (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), - (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; -def : Pat<(truncstorei16 GPR64:$Rt, - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), - (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), - (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; - -} // AddedComplexity = 10 - -// Match stores from lane 0 to the appropriate subreg's store. -multiclass VecStoreLane0Pat { - def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), - (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), - (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), - GPR64sp:$Rn, IndexType:$offset)>; -} - -let AddedComplexity = 19 in { - defm : VecStoreLane0Pat; - defm : VecStoreLane0Pat; - defm : VecStoreLane0Pat; - defm : VecStoreLane0Pat; - defm : VecStoreLane0Pat; - defm : VecStoreLane0Pat; -} - -//--- -// (unscaled immediate) -defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", - [(store GPR64z:$Rt, - (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; -defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", - [(store GPR32z:$Rt, - (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; -defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", - [(store FPR8Op:$Rt, - (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; -defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", - [(store (f16 FPR16Op:$Rt), - (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; -defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", - [(store (f32 FPR32Op:$Rt), - (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; -defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", - [(store (f64 FPR64Op:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; -defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", - [(store (f128 FPR128Op:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; -defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", - [(truncstorei16 GPR32z:$Rt, - (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; -defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", - [(truncstorei8 GPR32z:$Rt, - (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; - -// Armv8.4 Weaker Release Consistency enhancements -// LDAPR & STLR with Immediate Offset instructions -let Predicates = [HasRCPC_IMMO] in { -defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; -defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; -defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; -defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; -defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; -defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; -defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; -defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; -defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; -defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; -defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; -defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; -defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; -} - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - -let AddedComplexity = 10 in { - -let Predicates = [IsLE] in { - // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v2f32 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v8i8 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v4i16 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v2i32 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v4f16 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v4bf16 FPR64:$Rt), - (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), - (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; -} - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - -let Predicates = [IsLE] in { - // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v4f32 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v2f64 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v16i8 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v8i16 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v4i32 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v2i64 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v2f64 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v8f16 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; - def : Pat<(store (v8bf16 FPR128:$Rt), - (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), - (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; -} - -} // AddedComplexity = 10 - -// unscaled i64 truncating stores -def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), - (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), - (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; -def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), - (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; - -// Match stores from lane 0 to the appropriate subreg's store. -multiclass VecStoreULane0Pat { - defm : VecStoreLane0Pat; -} - -let AddedComplexity = 19 in { - defm : VecStoreULane0Pat; - defm : VecStoreULane0Pat; - defm : VecStoreULane0Pat; - defm : VecStoreULane0Pat; - defm : VecStoreULane0Pat; - defm : VecStoreULane0Pat; -} - -//--- -// STR mnemonics fall back to STUR for negative or unaligned offsets. -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; -def : InstAlias<"str $Rt, [$Rn, $offset]", - (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; - -def : InstAlias<"strb $Rt, [$Rn, $offset]", - (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; -def : InstAlias<"strh $Rt, [$Rn, $offset]", - (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; - -//--- -// (unscaled immediate, unprivileged) -defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; -defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; - -defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; -defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; - -//--- -// (immediate pre-indexed) -def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; -def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; -def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; -def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; -def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; -def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; -def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; - -def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; -def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; - -// truncstore i64 -def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; - -def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; - -def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; - -//--- -// (immediate post-indexed) -def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; -def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; -def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; -def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; -def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; -def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; -def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; - -def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; -def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; - -// truncstore i64 -def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; -def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; -def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; - -def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), - (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; - -def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; - -def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; - -//===----------------------------------------------------------------------===// -// Load/store exclusive instructions. -//===----------------------------------------------------------------------===// - -def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; -def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; -def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; -def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; - -def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; -def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; -def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; -def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; - -def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; -def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; -def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; -def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; - -def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; -def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; -def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; -def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; - -def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; -def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; -def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; -def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; - -def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; -def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; -def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; -def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; - -def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; -def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; - -def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; -def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; - -def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; -def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; - -def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; -def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; - -let Predicates = [HasLOR] in { - // v8.1a "Limited Order Region" extension load-acquire instructions - def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; - def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; - def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; - def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; - - // v8.1a "Limited Order Region" extension store-release instructions - def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; - def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; - def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; - def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; -} - -//===----------------------------------------------------------------------===// -// Scaled floating point to integer conversion instructions. -//===----------------------------------------------------------------------===// - -defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; -defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; -defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; -defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; -defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; -defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; -defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; -defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; -defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; -defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; -defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; -defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; - -// AArch64's FCVT instructions saturate when out of range. -multiclass FPToIntegerSatPats { - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat f16:$Rn, i32)), - (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat f16:$Rn, i64)), - (!cast(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat f32:$Rn, i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat f32:$Rn, i64)), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat f64:$Rn, i32)), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat f64:$Rn, i64)), - (!cast(INST # UXDr) f64:$Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), - (!cast(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), - (!cast(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), - (!cast(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), - (!cast(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), - (!cast(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), - (!cast(INST # SXDri) $Rn, $scale)>; -} - -defm : FPToIntegerSatPats; -defm : FPToIntegerSatPats; - -multiclass FPToIntegerIntPats { - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; - def : Pat<(i64 (round f16:$Rn)), (!cast(INST # UXHr) $Rn)>; - } - def : Pat<(i32 (round f32:$Rn)), (!cast(INST # UWSr) $Rn)>; - def : Pat<(i64 (round f32:$Rn)), (!cast(INST # UXSr) $Rn)>; - def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; - def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), - (!cast(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), - (!cast(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), - (!cast(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), - (!cast(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), - (!cast(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), - (!cast(INST # SXDri) $Rn, $scale)>; -} - -defm : FPToIntegerIntPats; -defm : FPToIntegerIntPats; - -multiclass FPToIntegerPats { - def : Pat<(i32 (to_int (round f32:$Rn))), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int (round f32:$Rn))), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int (round f64:$Rn))), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int (round f64:$Rn))), - (!cast(INST # UXDr) f64:$Rn)>; - - // These instructions saturate like fp_to_[su]int_sat. - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), - (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), - (!cast(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), - (!cast(INST # UXDr) f64:$Rn)>; -} - -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; - - - -let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lround f16:$Rn)), - (!cast(FCVTASUWHr) f16:$Rn)>; - def : Pat<(i64 (lround f16:$Rn)), - (!cast(FCVTASUXHr) f16:$Rn)>; - def : Pat<(i64 (llround f16:$Rn)), - (!cast(FCVTASUXHr) f16:$Rn)>; -} -def : Pat<(i32 (lround f32:$Rn)), - (!cast(FCVTASUWSr) f32:$Rn)>; -def : Pat<(i32 (lround f64:$Rn)), - (!cast(FCVTASUWDr) f64:$Rn)>; -def : Pat<(i64 (lround f32:$Rn)), - (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (lround f64:$Rn)), - (!cast(FCVTASUXDr) f64:$Rn)>; -def : Pat<(i64 (llround f32:$Rn)), - (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (llround f64:$Rn)), - (!cast(FCVTASUXDr) f64:$Rn)>; - -//===----------------------------------------------------------------------===// -// Scaled integer to floating point conversion instructions. -//===----------------------------------------------------------------------===// - -defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Unscaled integer to floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FMOV : UnscaledConversion<"fmov">; - -// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable -let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { -def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, - Sched<[WriteF]>, Requires<[HasFullFP16]>; -def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, - Sched<[WriteF]>; -def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, - Sched<[WriteF]>; -} -// Similarly add aliases -def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, - Requires<[HasFullFP16]>; -def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; -def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; - -//===----------------------------------------------------------------------===// -// Floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FCVT : FPConversion<"fcvt">; - -//===----------------------------------------------------------------------===// -// Floating point single operand instructions. -//===----------------------------------------------------------------------===// - -defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; -defm FMOV : SingleOperandFPData<0b0000, "fmov">; -defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; - -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; - -let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; -} - -let Predicates = [HasFRInt3264] in { - defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; - defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; - defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; - defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; -} // HasFRInt3264 - -let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lrint f16:$Rn)), - (FCVTZSUWHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (lrint f16:$Rn)), - (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (llrint f16:$Rn)), - (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; -} -def : Pat<(i32 (lrint f32:$Rn)), - (FCVTZSUWSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i32 (lrint f64:$Rn)), - (FCVTZSUWDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (lrint f32:$Rn)), - (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (lrint f64:$Rn)), - (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (llrint f32:$Rn)), - (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (llrint f64:$Rn)), - (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; - -//===----------------------------------------------------------------------===// -// Floating point two operand instructions. -//===----------------------------------------------------------------------===// - -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; -let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; -} -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>; -let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; -} -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; - -def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; - -//===----------------------------------------------------------------------===// -// Floating point three operand instructions. -//===----------------------------------------------------------------------===// - -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; -defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; -defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; -defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; - -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. - -// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike -// the NEON variant. - -// Here we handle first -(a + b*c) for FNMADD: - -let Predicates = [HasNEON, HasFullFP16] in -def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), - (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; - -def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), - (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), - (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -// Now it's time for "(-a) + (-b)*c" - -let Predicates = [HasNEON, HasFullFP16] in -def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), - (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; - -def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), - (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), - (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -//===----------------------------------------------------------------------===// -// Floating point comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; -defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; - -//===----------------------------------------------------------------------===// -// Floating point conditional comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; - -//===----------------------------------------------------------------------===// -// Floating point conditional select instruction. -//===----------------------------------------------------------------------===// - -defm FCSEL : FPCondSelect<"fcsel">; - -// CSEL instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : Pseudo<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), - [(set (f128 FPR128:$Rd), - (AArch64csel FPR128:$Rn, FPR128:$Rm, - (i32 imm:$cond), NZCV))]> { - let Uses = [NZCV]; - let usesCustomInserter = 1; - let hasNoSchedulingInfo = 1; -} - -//===----------------------------------------------------------------------===// -// Instructions used for emitting unwind opcodes on ARM64 Windows. -//===----------------------------------------------------------------------===// -let isPseudo = 1 in { - def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; - def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; - def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; - def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; - def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; - def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; - def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; - def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; - def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; - def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; - def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; -} - -// Pseudo instructions for Windows EH -//===----------------------------------------------------------------------===// -let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, - isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { - def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; - let usesCustomInserter = 1 in - def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, - Sched<[]>; -} - -// Pseudo instructions for homogeneous prolog/epilog -let isPseudo = 1 in { - // Save CSRs in order, {FPOffset} - def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; - // Restore CSRs in order - def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; -} - -//===----------------------------------------------------------------------===// -// Floating point immediate move. -//===----------------------------------------------------------------------===// - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm FMOV : FPMoveImmediate<"fmov">; -} - -//===----------------------------------------------------------------------===// -// Advanced SIMD two vector instructions. -//===----------------------------------------------------------------------===// - -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - AArch64uabd>; -// Match UABDL in log2-shuffle patterns. -def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), - (zext (v8i8 V64:$opB))))), - (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (v8i8 V64:$opA)), - (zext (v8i8 V64:$opB))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)), - (zext (extract_high_v16i8 V128:$opB))))), - (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), - (zext (extract_high_v16i8 V128:$opB))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), - (zext (v4i16 V64:$opB))))), - (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; -def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)), - (zext (extract_high_v8i16 V128:$opB))))), - (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; -def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), - (zext (v2i32 V64:$opB))))), - (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; -def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)), - (zext (extract_high_v4i32 V128:$opB))))), - (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; - -defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; -defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; -defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; -defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; -defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; -defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; -defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; -defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; -defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; -defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; - -def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), - (CMLTv8i8rz V64:$Rn)>; -def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), - (CMLTv4i16rz V64:$Rn)>; -def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), - (CMLTv2i32rz V64:$Rn)>; -def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), - (CMLTv16i8rz V128:$Rn)>; -def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), - (CMLTv8i16rz V128:$Rn)>; -def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), - (CMLTv4i32rz V128:$Rn)>; -def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), - (CMLTv2i64rz V128:$Rn)>; - -defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; -defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; -defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; -defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; -defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; -defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; -defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; -defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; -def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), - (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), - (i64 4)))), - (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; - -def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; - -defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; -defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; -defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; -defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; -defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; -def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), - (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, - (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), - (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), - (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; -defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; -defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", - int_aarch64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; - -// AArch64's FCVT instructions saturate when out of range. -multiclass SIMDTwoVectorFPToIntSatPats { - def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), - (!cast(INST # v4f16) v4f16:$Rn)>; - def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), - (!cast(INST # v8f16) v8f16:$Rn)>; - def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), - (!cast(INST # v2f32) v2f32:$Rn)>; - def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), - (!cast(INST # v4f32) v4f32:$Rn)>; - def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), - (!cast(INST # v2f64) v2f64:$Rn)>; -} -defm : SIMDTwoVectorFPToIntSatPats; -defm : SIMDTwoVectorFPToIntSatPats; - -def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; -def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; -def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; -def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; -def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; - -def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; -def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; -def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; -def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; -def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; - -defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; -defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; - -let Predicates = [HasFRInt3264] in { - defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; - defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; - defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; - defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; -} // HasFRInt3264 - -defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; -defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; -// Aliases for MVN -> NOT. -def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", - (NOTv8i8 V64:$Vd, V64:$Vn)>; -def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", - (NOTv16i8 V128:$Vd, V128:$Vn)>; - -def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - -defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; -defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; -defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; -defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; -defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", - BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; -defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; -defm SHLL : SIMDVectorLShiftLongBySizeBHS; -defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; -defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; -defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; -defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; -defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; -defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", - BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; -defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; -defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; -defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; -defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; -defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; -defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; - -def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; -def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; -def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; -def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; -def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; -def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; -def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; -def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; -def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; -def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; - -// Patterns for vector long shift (by element width). These need to match all -// three of zext, sext and anyext so it's easier to pull the patterns out of the -// definition. -multiclass SIMDVectorLShiftLongBySizeBHSPats { - def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), - (SHLLv8i8 V64:$Rn)>; - def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), - (SHLLv16i8 V128:$Rn)>; - def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), - (SHLLv4i16 V64:$Rn)>; - def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), - (SHLLv8i16 V128:$Rn)>; - def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), - (SHLLv2i32 V64:$Rn)>; - def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), - (SHLLv4i32 V128:$Rn)>; -} - -defm : SIMDVectorLShiftLongBySizeBHSPats; -defm : SIMDVectorLShiftLongBySizeBHSPats; -defm : SIMDVectorLShiftLongBySizeBHSPats; - -// Constant vector values, used in the S/UQXTN patterns below. -def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; -def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; -def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; -def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; -def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; -def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; - -// trunc(umin(X, 255)) -> UQXTRN v8i8 -def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), - (UQXTNv8i8 V128:$Vn)>; -// trunc(umin(X, 65535)) -> UQXTRN v4i16 -def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), - (UQXTNv4i16 V128:$Vn)>; -// trunc(smin(smax(X, -128), 128)) -> SQXTRN -// with reversed min/max -def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), - (v8i16 VImm7F)))), - (SQXTNv8i8 V128:$Vn)>; -def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), - (v8i16 VImm80)))), - (SQXTNv8i8 V128:$Vn)>; -// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN -// with reversed min/max -def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), - (v4i32 VImm7FFF)))), - (SQXTNv4i16 V128:$Vn)>; -def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), - (v4i32 VImm8000)))), - (SQXTNv4i16 V128:$Vn)>; - -// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) -// with reversed min/max -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), - (v8i16 VImm7F)))))), - (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), - (v8i16 VImm80)))))), - (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; - -// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) -// with reversed min/max -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), - (v4i32 VImm7FFF)))))), - (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), - (v4i32 VImm8000)))))), - (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three vector instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; -defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; -defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; -defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; -defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; -defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; -defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; -defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; -foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { -def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast("CMTST"#VT) VT:$Rn, VT:$Rn)>; -} -defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; -let Predicates = [HasNEON] in { -foreach VT = [ v2f32, v4f32, v2f64 ] in -def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; -} -let Predicates = [HasNEON, HasFullFP16] in { -foreach VT = [ v4f16, v8f16 ] in -def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; -} -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>; - -// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the -// instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; - -defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; - -// MLA and MLS are generated in MachineCombine -defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; -defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; - -defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; -defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; -defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; -defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>; -defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; -defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; -defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; -defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; -defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; -defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; -defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; -defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; -defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>; -defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; -defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; -defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; -defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; -defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>; -defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; -defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; -defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; -defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; -defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; -defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; -defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; -defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; -defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>; -defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; -defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; -defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", - int_aarch64_neon_sqrdmlah>; -defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", - int_aarch64_neon_sqrdmlsh>; - -// Extra saturate patterns, other than the intrinsics matches above -defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; -defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; -defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; -defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; - -defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; -defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", - BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; -defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; -defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", - BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; -defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; - -// Pseudo bitwise select pattern BSP. -// It is expanded into BSL/BIT/BIF after register allocation. -defm BSP : SIMDLogicalThreeVectorPseudo>; -defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; -defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; -defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; - -def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), - (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), - (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), - (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), - (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; - -def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), - (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), - (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), - (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), - (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; - -def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; -def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; - -def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; -def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; - -def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # - "|cmls.8b\t$dst, $src1, $src2}", - (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # - "|cmls.16b\t$dst, $src1, $src2}", - (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # - "|cmls.4h\t$dst, $src1, $src2}", - (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # - "|cmls.8h\t$dst, $src1, $src2}", - (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # - "|cmls.2s\t$dst, $src1, $src2}", - (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # - "|cmls.4s\t$dst, $src1, $src2}", - (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # - "|cmls.2d\t$dst, $src1, $src2}", - (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlo.8b\t$dst, $src1, $src2}", - (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlo.16b\t$dst, $src1, $src2}", - (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlo.4h\t$dst, $src1, $src2}", - (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlo.8h\t$dst, $src1, $src2}", - (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlo.2s\t$dst, $src1, $src2}", - (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlo.4s\t$dst, $src1, $src2}", - (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlo.2d\t$dst, $src1, $src2}", - (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # - "|cmle.8b\t$dst, $src1, $src2}", - (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # - "|cmle.16b\t$dst, $src1, $src2}", - (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # - "|cmle.4h\t$dst, $src1, $src2}", - (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # - "|cmle.8h\t$dst, $src1, $src2}", - (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # - "|cmle.2s\t$dst, $src1, $src2}", - (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # - "|cmle.4s\t$dst, $src1, $src2}", - (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # - "|cmle.2d\t$dst, $src1, $src2}", - (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlt.8b\t$dst, $src1, $src2}", - (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlt.16b\t$dst, $src1, $src2}", - (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlt.4h\t$dst, $src1, $src2}", - (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlt.8h\t$dst, $src1, $src2}", - (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlt.2s\t$dst, $src1, $src2}", - (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlt.4s\t$dst, $src1, $src2}", - (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlt.2d\t$dst, $src1, $src2}", - (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -let Predicates = [HasNEON, HasFullFP16] in { -def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # - "|fcmle.4h\t$dst, $src1, $src2}", - (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # - "|fcmle.8h\t$dst, $src1, $src2}", - (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; -} -def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmle.2s\t$dst, $src1, $src2}", - (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmle.4s\t$dst, $src1, $src2}", - (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmle.2d\t$dst, $src1, $src2}", - (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -let Predicates = [HasNEON, HasFullFP16] in { -def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # - "|fcmlt.4h\t$dst, $src1, $src2}", - (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # - "|fcmlt.8h\t$dst, $src1, $src2}", - (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; -} -def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmlt.2s\t$dst, $src1, $src2}", - (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmlt.4s\t$dst, $src1, $src2}", - (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmlt.2d\t$dst, $src1, $src2}", - (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -let Predicates = [HasNEON, HasFullFP16] in { -def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # - "|facle.4h\t$dst, $src1, $src2}", - (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # - "|facle.8h\t$dst, $src1, $src2}", - (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; -} -def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # - "|facle.2s\t$dst, $src1, $src2}", - (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # - "|facle.4s\t$dst, $src1, $src2}", - (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # - "|facle.2d\t$dst, $src1, $src2}", - (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -let Predicates = [HasNEON, HasFullFP16] in { -def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # - "|faclt.4h\t$dst, $src1, $src2}", - (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # - "|faclt.8h\t$dst, $src1, $src2}", - (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; -} -def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # - "|faclt.2s\t$dst, $src1, $src2}", - (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # - "|faclt.4s\t$dst, $src1, $src2}", - (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # - "|faclt.2d\t$dst, $src1, $src2}", - (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; -defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; -defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; -defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; -defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; -defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; -defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; -def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FABD64 FPR64:$Rn, FPR64:$Rm)>; -let Predicates = [HasFullFP16] in { -def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; -} -def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; -def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", - int_aarch64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", - int_aarch64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>; -defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>; -defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; -defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; -defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; -defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; -defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; -defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; -defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; -let Predicates = [HasRDM] in { - defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; - defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; - def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), - (i32 FPR32:$Rm))), - (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), - (i32 FPR32:$Rm))), - (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; -} - -def : InstAlias<"cmls $dst, $src1, $src2", - (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"cmle $dst, $src1, $src2", - (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"cmlo $dst, $src1, $src2", - (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"cmlt $dst, $src1, $src2", - (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions (mixed operands). -//===----------------------------------------------------------------------===// -defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", - int_aarch64_neon_sqdmulls_scalar>; -defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; -defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; - -def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), - (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), - (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD two scalar instructions. -//===----------------------------------------------------------------------===// - -defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>; -defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; -defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; -defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; -defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; -defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; -defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; -defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; -defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; -defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; -defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; -defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; -defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; -defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; -defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; -defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; -defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; -defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; -defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; -def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>; -defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>; -defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>; -defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; -defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; -defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; -defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; -defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; -defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", - int_aarch64_neon_suqadd>; -defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; -defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; -defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", - int_aarch64_neon_usqadd>; - -def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), - (CMLTv1i64rz V64:$Rn)>; - -def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), - (FCVTASv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), - (FCVTAUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), - (FCVTMSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), - (FCVTMUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), - (FCVTNSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), - (FCVTNUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), - (FCVTPSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), - (FCVTPUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), - (FCVTZSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), - (FCVTZUv1i64 FPR64:$Rn)>; - -def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), - (FRECPEv1f16 FPR16:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), - (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), - (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), - (FRECPEv2f32 V64:$Rn)>; -def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), - (FRECPEv4f32 FPR128:$Rn)>; -def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), - (FRECPEv2f64 FPR128:$Rn)>; - -def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), - (FRECPSv2f32 V64:$Rn, V64:$Rm)>; -def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), - (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; -def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), - (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; - -def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), - (FRECPXv1f16 FPR16:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), - (FRECPXv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), - (FRECPXv1i64 FPR64:$Rn)>; - -def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), - (FRSQRTEv1f16 FPR16:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), - (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), - (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), - (FRSQRTEv2f32 V64:$Rn)>; -def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), - (FRSQRTEv4f32 FPR128:$Rn)>; -def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), - (FRSQRTEv2f64 FPR128:$Rn)>; - -def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), - (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; -def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), - (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; -def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), - (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; - -// Some float -> int -> float conversion patterns for which we want to keep the -// int values in FP registers using the corresponding NEON instructions to -// avoid more costly int <-> fp register transfers. -let Predicates = [HasNEON] in { -def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))), - (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; -def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))), - (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; -def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))), - (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; -def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))), - (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; - -let Predicates = [HasFullFP16] in { -def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))), - (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; -def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))), - (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; -} -} - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// Here are the patterns for 8 and 16-bits to float. -// 8-bits -> float. -multiclass UIntToFPROLoadPat { - def : Pat<(DstTy (uint_to_fp (SrcTy - (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, - ro.Wext:$extend))))), - (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), - (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), - sub))>; - - def : Pat<(DstTy (uint_to_fp (SrcTy - (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, - ro.Wext:$extend))))), - (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), - (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), - sub))>; -} - -defm : UIntToFPROLoadPat; -def : Pat <(f32 (uint_to_fp (i32 - (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 - (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; -// 16-bits -> float. -defm : UIntToFPROLoadPat; -def : Pat <(f32 (uint_to_fp (i32 - (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 - (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; -// 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// UCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. -defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 - (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 - (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; -// 16-bits -> double. -defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 - (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 - (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; -// 32-bits -> double. -defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 - (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 - (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - -//===----------------------------------------------------------------------===// -// Advanced SIMD three different-sized vector instructions. -//===----------------------------------------------------------------------===// - -defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; -defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; -defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; -defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; -defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; -defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - AArch64sabd>; -defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - AArch64sabd>; -defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", - BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; -defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", - BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; -defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; -defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", - int_aarch64_neon_sqadd>; -defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", - int_aarch64_neon_sqsub>; -defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", - int_aarch64_neon_sqdmull>; -defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", - BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; -defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", - BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; -defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - AArch64uabd>; -defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", - BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; -defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", - BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; -defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; -defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", - BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; -defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", - BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; - -// Additional patterns for [SU]ML[AS]L -multiclass Neon_mul_acc_widen_patterns { - def : Pat<(v4i16 (opnode - V64:$Ra, - (v4i16 (extract_subvector - (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), - (i64 0))))), - (EXTRACT_SUBREG (v8i16 (INST8B - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), - V64:$Rn, V64:$Rm)), dsub)>; - def : Pat<(v2i32 (opnode - V64:$Ra, - (v2i32 (extract_subvector - (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), - (i64 0))))), - (EXTRACT_SUBREG (v4i32 (INST4H - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), - V64:$Rn, V64:$Rm)), dsub)>; - def : Pat<(v1i64 (opnode - V64:$Ra, - (v1i64 (extract_subvector - (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), - (i64 0))))), - (EXTRACT_SUBREG (v2i64 (INST2S - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), - V64:$Rn, V64:$Rm)), dsub)>; -} - -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; - -// Additional patterns for SMULL and UMULL -multiclass Neon_mul_widen_patterns { - def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))), - (INST8B V64:$Rn, V64:$Rm)>; - def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))), - (INST4H V64:$Rn, V64:$Rm)>; - def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))), - (INST2S V64:$Rn, V64:$Rm)>; -} - -defm : Neon_mul_widen_patterns; -defm : Neon_mul_widen_patterns; - -// Patterns for smull2/umull2. -multiclass Neon_mul_high_patterns { - def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm))), - (INST8B V128:$Rn, V128:$Rm)>; - def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm))), - (INST4H V128:$Rn, V128:$Rm)>; - def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm))), - (INST2S V128:$Rn, V128:$Rm)>; -} - -defm : Neon_mul_high_patterns; -defm : Neon_mul_high_patterns; - -// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL -multiclass Neon_mulacc_widen_patterns { - def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))), - (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>; - def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))), - (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>; - def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))), - (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>; -} - -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, - SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, - UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, - SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, - UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; - -// Patterns for 64-bit pmull -def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), - (PMULLv1i64 V64:$Rn, V64:$Rm)>; -def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), - (extractelt (v2i64 V128:$Rm), (i64 1))), - (PMULLv2i64 V128:$Rn, V128:$Rm)>; - -// CodeGen patterns for addhn and subhn instructions, which can actually be -// written in LLVM IR without too much difficulty. - -// ADDHN -def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), - (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 8))))), - (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -// SUBHN -def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), - (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 8))))), - (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector instruction. -//---------------------------------------------------------------------------- - -defm EXT : SIMDBitwiseExtract<"ext">; - -def AdjustExtImm : SDNodeXFormgetTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; -multiclass ExtPat { - def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; - def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; - // We use EXT to handle extract_subvector to copy the upper 64-bits of a - // 128-bit vector. - def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; - // A 64-bit EXT of two halves of the same 128-bit register can be done as a - // single 128-bit EXT. - def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), - (extract_subvector V128:$Rn, (i64 N)), - (i32 imm:$imm))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; - // A 64-bit EXT of the high half of a 128-bit register can be done using a - // 128-bit EXT of the whole register with an adjustment to the immediate. The - // top half of the other operand will be unset, but that doesn't matter as it - // will not be used. - def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), - V64:$Rm, - (i32 imm:$imm))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - (AdjustExtImm imm:$imm)), dsub)>; -} - -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; -defm : ExtPat; - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; -defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; -defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; -defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; -defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; -defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; - -def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), - (v8i8 (trunc (v8i16 V128:$Vm))))), - (UZP1v16i8 V128:$Vn, V128:$Vm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), - (v4i16 (trunc (v4i32 V128:$Vm))))), - (UZP1v8i16 V128:$Vn, V128:$Vm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), - (v2i32 (trunc (v2i64 V128:$Vm))))), - (UZP1v4i32 V128:$Vn, V128:$Vm)>; - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX instructions -//---------------------------------------------------------------------------- - -defm TBL : SIMDTableLookup< 0, "tbl">; -defm TBX : SIMDTableLookupTied<1, "tbx">; - -def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBLv16i8One V128:$Ri, V128:$Rn)>; - -def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), - (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), - (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar DUP instruction -//---------------------------------------------------------------------------- - -defm DUP : SIMDScalarDUP<"mov">; - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; -defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; -defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; - -let Predicates = [HasFullFP16] in { -def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), - (FADDPv2i16p - (EXTRACT_SUBREG - (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))), - dsub))>; -def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), - (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>; -} -def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), - (FADDPv2i32p - (EXTRACT_SUBREG - (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))), - dsub))>; -def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), - (FADDPv2i32p V64:$Rn)>; -def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), - (FADDPv2i64p V128:$Rn)>; - -def : Pat<(v2i64 (AArch64saddv V128:$Rn)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; -def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; -def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), - (FADDPv2i32p V64:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), - (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; -def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), - (FADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), - (FMAXNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), - (FMAXNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), - (FMAXPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), - (FMAXPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), - (FMINNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), - (FMINNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), - (FMINPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), - (FMINPv2i64p V128:$Rn)>; - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; - -def DUPv2i64lane : SIMDDup64FromElement; -def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; -def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; -def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; -def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; -def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; -def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; - -// DUP from a 64-bit register to a 64-bit register is just a copy -def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), - (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; -def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), - (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; - -def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), - (v2f32 (DUPv2i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), - (v4f32 (DUPv4i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), - (v2f64 (DUPv2i64lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), - (i64 0)))>; -def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), - (v4f16 (DUPv4i16lane - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), - (i64 0)))>; -def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), - (v4bf16 (DUPv4i16lane - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), - (i64 0)))>; -def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), - (v8f16 (DUPv8i16lane - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), - (i64 0)))>; -def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), - (v8bf16 (DUPv8i16lane - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), - (i64 0)))>; - -def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), - (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; -def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), - (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; - -def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), - (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; -def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), - (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; - -def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), - (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; - -// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane -// instruction even if the types don't match: we just have to remap the lane -// carefully. N.b. this trick only applies to truncations. -def VecIndex_x2 : SDNodeXFormgetTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); -}]>; -def VecIndex_x4 : SDNodeXFormgetTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); -}]>; -def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); -}]>; - -multiclass DUPWithTruncPats { - def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), - imm:$idx)))), - (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - - def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), - imm:$idx)))), - (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; -} - -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; - -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; - -multiclass DUPWithTrunci64Pats { - def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), - imm:$idx))))), - (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - - def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), - imm:$idx))))), - (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; -} - -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; - -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; - -// SMOV and UMOV definitions, with some extra patterns for convenience -defm SMOV : SMov; -defm UMOV : UMov; - -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), - (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; - -def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), - VectorIndexB:$idx)))), i8), - (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), - VectorIndexH:$idx)))), i16), - (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; - -// Extracting i8 or i16 elements will have the zero-extend transformed to -// an 'and' mask by type legalization since neither i8 nor i16 are legal types -// for AArch64. Match these patterns here since UMOV already zeroes out the high -// bits of the destination register. -def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), - (i32 0xff)), - (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), - (i32 0xffff)), - (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; - -def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), - VectorIndexB:$idx)))), (i64 0xff))), - (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; -def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), - VectorIndexH:$idx)))), (i64 0xffff))), - (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; - -defm INS : SIMDIns; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (SUBREG_TO_REG (i32 0), - (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (SUBREG_TO_REG (i32 0), - (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (SUBREG_TO_REG (i32 0), - (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (SUBREG_TO_REG (i32 0), - (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; - -def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), - (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; -def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), - (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; - -def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), - (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; -def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), - (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; - -def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; -def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; - -def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), - (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (i64 FPR64:$Rn), dsub))>; - -def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), - (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; -def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), - (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; - -def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), - (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; -def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), - (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; - -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; - -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; - -def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), - (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), - (EXTRACT_SUBREG - (INSvi16lane - (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), - VectorIndexS:$imm, - (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), - (i64 0)), - dsub)>; - -def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0), - (i64 VectorIndexH:$imm)), - (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; -def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0), - (i64 VectorIndexS:$imm)), - (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; -def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), - (i64 VectorIndexD:$imm)), - (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; - -def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), - (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), - (INSvi16lane - V128:$Rn, VectorIndexH:$imm, - (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), - (i64 0))>; - -def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), - (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), - (EXTRACT_SUBREG - (INSvi16lane - (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), - VectorIndexS:$imm, - (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), - (i64 0)), - dsub)>; - -def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), - (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), - (INSvi16lane - V128:$Rn, VectorIndexH:$imm, - (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), - (i64 0))>; - -def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (EXTRACT_SUBREG - (INSvi32lane - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), - VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0)), - dsub)>; -def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (INSvi32lane - V128:$Rn, VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0))>; -def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), - (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), - (INSvi64lane - V128:$Rn, VectorIndexD:$imm, - (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), - (i64 0))>; - -// Copy an element at a constant index in one vector into a constant indexed -// element of another. -// FIXME refactor to a shared class/dev parameterized on vector type, vector -// index type and INS extension -def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane - (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), - VectorIndexB:$idx2)), - (v16i8 (INSvi8lane - V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) - )>; -def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane - (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), - VectorIndexH:$idx2)), - (v8i16 (INSvi16lane - V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) - )>; -def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane - (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), - VectorIndexS:$idx2)), - (v4i32 (INSvi32lane - V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) - )>; -def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane - (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), - VectorIndexD:$idx2)), - (v2i64 (INSvi64lane - V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) - )>; - -multiclass Neon_INS_elt_pattern { - def : Pat<(VT128 (vector_insert V128:$src, - (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), - imm:$Immd)), - (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; - - def : Pat<(VT128 (vector_insert V128:$src, - (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), - imm:$Immd)), - (INS V128:$src, imm:$Immd, - (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; - - def : Pat<(VT64 (vector_insert V64:$src, - (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), - imm:$Immd)), - (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), - imm:$Immd, V128:$Rn, imm:$Immn), - dsub)>; - - def : Pat<(VT64 (vector_insert V64:$src, - (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), - imm:$Immd)), - (EXTRACT_SUBREG - (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, - (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), - dsub)>; -} - -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; - - -// Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, or a MOV (aka DUP here) if -// the lane number is anything other than zero. -def : Pat<(vector_extract (v2f64 V128:$Rn), 0), - (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), 0), - (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; -def : Pat<(vector_extract (v8f16 V128:$Rn), 0), - (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; -def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), - (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; - - -def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; -def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), - (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), - (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; - -// All concat_vectors operations are canonicalised to act on i64 vectors for -// AArch64. In the general case we need an instruction, which had just as well be -// INS. -class ConcatPat - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), - (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; - -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; - -// If the high lanes are undef, though, we can just ignore them: -class ConcatUndefPat - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; - -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; -defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; -defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; -defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; -defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; -defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; -defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; -defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; -defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; -defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; - -// Patterns for uaddv(uaddlp(x)) ==> uaddlv -def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, - (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), - (i64 0))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (UADDLVv8i8v V64:$op), hsub), ssub)>; -def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp - (v16i8 V128:$op))))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (UADDLVv16i8v V128:$op), hsub), ssub)>; -def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>; - -// Patterns for addp(uaddlp(x))) ==> uaddlv -def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>; -def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>; - -// Patterns for across-vector intrinsics, that have a node equivalent, that -// returns a vector (with only the low lane defined) instead of a scalar. -// In effect, opNode is the same as (scalar_to_vector (IntNode)). -multiclass SIMDAcrossLanesIntrinsic { -// If a lane instruction caught the vector_extract around opNode, we can -// directly match the latter to the instruction. -def : Pat<(v8i8 (opNode V64:$Rn)), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; -def : Pat<(v16i8 (opNode V128:$Rn)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; -def : Pat<(v4i16 (opNode V64:$Rn)), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; -def : Pat<(v8i16 (opNode V128:$Rn)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; -def : Pat<(v4i32 (opNode V128:$Rn)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; - - -// If none did, fallback to the explicit patterns, consuming the vector_extract. -def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), - (i64 0)), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), - bsub), ssub)>; -def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), - bsub), ssub)>; -def : Pat<(i32 (vector_extract (insert_subvector undef, - (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), - hsub), ssub)>; -def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), - hsub), ssub)>; -def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), - ssub), ssub)>; - -} - -multiclass SIMDAcrossLanesSignedIntrinsic - : SIMDAcrossLanesIntrinsic { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, - (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (sext_inreg (i32 (vector_extract - (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, - (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (sext_inreg (i32 (vector_extract - (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -} - -multiclass SIMDAcrossLanesUnsignedIntrinsic - : SIMDAcrossLanesIntrinsic { -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, - (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; -def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), - maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, - (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), - maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -} - -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), - (ADDPv2i32 V64:$Rn, V64:$Rn)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), - (ADDPv2i32 V64:$Rn, V64:$Rn)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; -def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), - (SMAXPv2i32 V64:$Rn, V64:$Rn)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; -def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), - (SMINPv2i32 V64:$Rn, V64:$Rn)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; -def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), - (UMAXPv2i32 V64:$Rn, V64:$Rn)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; -def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), - (UMINPv2i32 V64:$Rn, V64:$Rn)>; - -multiclass SIMDAcrossLanesSignedLongIntrinsic { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -multiclass SIMDAcrossLanesUnsignedLongIntrinsic { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - ssub))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; -defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; - -// The vaddlv_s32 intrinsic gets mapped to SADDLP. -def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (SADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; -// The vaddlv_u32 intrinsic gets mapped to UADDLP. -def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (UADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; - -//------------------------------------------------------------------------------ -// AdvSIMD modified immediate instructions -//------------------------------------------------------------------------------ - -// AdvSIMD BIC -defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; -// AdvSIMD ORR -defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; - -def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; - -def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; - -def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; - -def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; -def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; - -// AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, - "fmov", ".2d", - [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, - "fmov", ".2s", - [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, - "fmov", ".4s", - [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -let Predicates = [HasNEON, HasFullFP16] in { -def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, - "fmov", ".4h", - [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, - "fmov", ".8h", - [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -} // Predicates = [HasNEON, HasFullFP16] - -// AdvSIMD MOVI - -// EDIT byte mask: scalar -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", - [(set FPR64:$Rd, simdimmtype10:$imm8)]>; -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 here. -def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), - (MOVID imm0_255:$shift)>; - -// EDIT byte mask: 2d - -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 in the pattern -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, - simdimmtype10, - "movi", ".2d", - [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; - -def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; - -def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; - -// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the -// extract is free and this gives better MachineCSE results. -def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; -def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; -def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; -def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; - -def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; -def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; -def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; -def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; - -def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; - -def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; - -def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -// EDIT per word: 2s & 4s with MSL shifter -def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", - [(set (v2i32 V64:$Rd), - (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", - [(set (v4i32 V128:$Rd), - (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; - -// Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, - "movi", ".8b", - [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; - -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, - "movi", ".16b", - [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; -} - -// AdvSIMD MVNI - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; - -def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; - -def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; -def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; - -def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; - -// EDIT per word: 2s & 4s with MSL shifter -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", - [(set (v2i32 V64:$Rd), - (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", - [(set (v4i32 V128:$Rd), - (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let hasSideEffects = 0 in { - defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; - defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; -} - -// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the -// instruction expects the addend first, while the intrinsic expects it last. - -// On the other hand, there are quite a few valid combinatorial options due to -// the commutativity of multiplication and the fact that (-x) * y = x * (-y). -defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; -defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; - -defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; -defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; -defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; -defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; - -multiclass FMLSIndexedAfterNegPatterns { - // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (AArch64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (v2f32 (AArch64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i64 0))), - VectorIndexS:$idx)))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (AArch64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (AArch64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (v4f32 (AArch64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i64 0))), - VectorIndexS:$idx)))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (AArch64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar - // (DUPLANE from 64-bit would be trivial). - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (AArch64duplane64 (v2f64 (fneg V128:$Rm)), - VectorIndexD:$idx))), - (FMLSv2i64_indexed - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (AArch64dup (f64 (fneg FPR64Op:$Rm))))), - (FMLSv2i64_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i64 0))), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexS:$idx)>; -} - -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; - -defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; - -def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), - (FMULv2i32_indexed V64:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), - (FMULv4i32_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), - (FMULv2i64_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), - (i64 0))>; - -defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; -defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; - -defm SQDMULH : SIMDIndexedHSPatterns; -defm SQRDMULH : SIMDIndexedHSPatterns; - -// Generated by MachineCombine -defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; -defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; - -defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; -defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", - int_aarch64_neon_smull>; -defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", - int_aarch64_neon_sqadd>; -defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", - int_aarch64_neon_sqsub>; -defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", - int_aarch64_neon_sqrdmlah>; -defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", - int_aarch64_neon_sqrdmlsh>; -defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; -defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", - int_aarch64_neon_umull>; - -// A scalar sqdmull with the second operand being a vector lane can be -// handled directly with the indexed instruction encoding. -def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (vector_extract (v4i32 V128:$Vm), - VectorIndexS:$idx)), - (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; - -// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands -// have no common bits. -def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), - [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ - if (N->getOpcode() == ISD::ADD) - return true; - return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); -}]> { - let GISelPredicateCode = [{ - // Only handle G_ADD for now. FIXME. build capability to compute whether - // operands of G_OR have common bits set or not. - return MI.getOpcode() == TargetOpcode::G_ADD; - }]; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; -defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; -// Codegen patterns for the above. We don't put these directly on the -// instructions because TableGen's type inference can't handle the truth. -// Having the same base pattern for fp <--> int totally freaks it out. -def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), - (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), - (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), - (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), - (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; - -// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported. - -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), - (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), - (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), - (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp - (and FPR32:$Rn, (i32 65535)), - vecshiftR16:$imm)), - (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), - (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), - (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; -def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), - (i32 (INSERT_SUBREG - (i32 (IMPLICIT_DEF)), - (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), - hsub))>; -def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), - (i64 (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), - (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), - hsub))>; -def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), - (i32 (INSERT_SUBREG - (i32 (IMPLICIT_DEF)), - (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), - hsub))>; -def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), - (i64 (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), - (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), - hsub))>; -def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), - (i32 (INSERT_SUBREG - (i32 (IMPLICIT_DEF)), - (FACGE16 FPR16:$Rn, FPR16:$Rm), - hsub))>; -def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), - (i32 (INSERT_SUBREG - (i32 (IMPLICIT_DEF)), - (FACGT16 FPR16:$Rn, FPR16:$Rm), - hsub))>; - -defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; -defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; -defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - int_aarch64_neon_sqrshrn>; -defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - int_aarch64_neon_sqrshrun>; -defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; -defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; -defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - int_aarch64_neon_sqshrn>; -defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - int_aarch64_neon_sqshrun>; -defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; -defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; -defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (AArch64srshri node:$MHS, node:$RHS))>>; -defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; -defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", - TriOpFrag<(add_and_or_is_add node:$LHS, - (AArch64vashr node:$MHS, node:$RHS))>>; -defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - int_aarch64_neon_uqrshrn>; -defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; -defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - int_aarch64_neon_uqshrn>; -defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; -defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (AArch64urshri node:$MHS, node:$RHS))>>; -defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; -defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", - TriOpFrag<(add_and_or_is_add node:$LHS, - (AArch64vlshr node:$MHS, node:$RHS))>>; - -//---------------------------------------------------------------------------- -// AdvSIMD vector shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; -defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", - int_aarch64_neon_vcvtfxs2fp>; -defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", - int_aarch64_neon_rshrn>; -defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; -defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", - BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; -defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; -def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftL64:$imm))), - (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; -defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", - int_aarch64_neon_sqrshrn>; -defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", - int_aarch64_neon_sqrshrun>; -defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; -defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; -defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", - int_aarch64_neon_sqshrn>; -defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", - int_aarch64_neon_sqshrun>; -defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; -def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; -defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; -defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (AArch64srshri node:$MHS, node:$RHS))> >; -defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", - BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; - -defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; -defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", - TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", - int_aarch64_neon_vcvtfxu2fp>; -defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", - int_aarch64_neon_uqrshrn>; -defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; -defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", - int_aarch64_neon_uqshrn>; -defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; -defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (AArch64urshri node:$MHS, node:$RHS))> >; -defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", - BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; -defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; -defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", - TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; - -// RADDHN patterns for when RSHRN shifts by half the size of the vector element -def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), - (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), - (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), - (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; - -// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), - (RADDHNv8i16_v16i8 - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, - (v8i16 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), - (RADDHNv4i32_v8i16 - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, - (v4i32 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v4i32 (concat_vectors - (v2i32 V64:$Vd), - (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), - (RADDHNv2i64_v4i32 - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, - (v2i64 (MOVIv2d_ns (i32 0))))>; - -// SHRN patterns for when a logical right shift was used instead of arithmetic -// (the immediate guarantees no sign bits actually end up in the result so it -// doesn't matter). -def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), - (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), - (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), - (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; - -def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), - (trunc (AArch64vlshr (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)))), - (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), - (trunc (AArch64vlshr (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)))), - (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), - (trunc (AArch64vlshr (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)))), - (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - -// Vector sign and zero extensions are implemented with SSHLL and USSHLL. -// Anyexts are implemented as zexts. -def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -// Also match an extend from the upper half of a 128 bit source register. -def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (SSHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (SSHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (SSHLLv4i32_shift V128:$Rn, (i32 0))>; - -// Vector shift sxtl aliases -def : InstAlias<"sxtl.8h $dst, $src1", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.8h, $src1.8b", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.4s $dst, $src1", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.4s, $src1.4h", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.2d $dst, $src1", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.2d, $src1.2s", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift sxtl2 aliases -def : InstAlias<"sxtl2.8h $dst, $src1", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.8h, $src1.16b", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.4s $dst, $src1", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.4s, $src1.8h", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.2d $dst, $src1", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.2d, $src1.4s", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// Vector shift uxtl aliases -def : InstAlias<"uxtl.8h $dst, $src1", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.8h, $src1.8b", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.4s $dst, $src1", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.4s, $src1.4h", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.2d $dst, $src1", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.2d, $src1.2s", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift uxtl2 aliases -def : InstAlias<"uxtl2.8h $dst, $src1", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.8h, $src1.16b", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.4s $dst, $src1", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.4s, $src1.8h", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.2d $dst, $src1", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.2d, $src1.4s", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// These patterns are more complex because floating point loads do not -// support sign extension. -// The sign extension has to be explicitly added and is only supported for -// one step: byte-to-half, half-to-word, word-to-doubleword. -// SCVTF GPR -> FPR is 9 cycles. -// SCVTF FPR -> FPR is 4 cyclces. -// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. -// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR -// and still being faster. -// However, this is not good for code size. -// 8-bits -> float. 2 sizes step-up. -class SExtLoadi8CVTf32Pat - : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - INST, - bsub), - 0), - dsub)), - 0), - ssub)))>, - Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; - -def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), - (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; -def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), - (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; -def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), - (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), - (LDURBi GPR64sp:$Rn, simm9:$offset)>; - -// 16-bits -> float. 1 size step-up. -class SExtLoadi16CVTf32Pat - : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - INST, - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; - -def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), - (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; -def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), - (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; -def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), - (LDURHi GPR64sp:$Rn, simm9:$offset)>; - -// 32-bits to 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// SCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. 3 size step-up: give up. -// 16-bits -> double. 2 size step. -class SExtLoadi16CVTf64Pat - : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - INST, - hsub), - 0), - dsub)), - 0), - dsub)))>, - Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; - -def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), - (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; -def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), - (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; -def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), - (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), - (LDURHi GPR64sp:$Rn, simm9:$offset)>; -// 32-bits -> double. 1 size step-up. -class SExtLoadi32CVTf64Pat - : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - INST, - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; - -def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), - (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; -def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), - (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; -def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), - (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; -def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), - (LDURSi GPR64sp:$Rn, simm9:$offset)>; - -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - - -//---------------------------------------------------------------------------- -// AdvSIMD Load-Store Structure -//---------------------------------------------------------------------------- -defm LD1 : SIMDLd1Multiple<"ld1">; -defm LD2 : SIMDLd2Multiple<"ld2">; -defm LD3 : SIMDLd3Multiple<"ld3">; -defm LD4 : SIMDLd4Multiple<"ld4">; - -defm ST1 : SIMDSt1Multiple<"st1">; -defm ST2 : SIMDSt2Multiple<"st2">; -defm ST3 : SIMDSt3Multiple<"st3">; -defm ST4 : SIMDSt4Multiple<"st4">; - -class Ld1Pat - : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; - -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; - -class St1Pat - : Pat<(store ty:$Vt, GPR64sp:$Rn), - (INST ty:$Vt, GPR64sp:$Rn)>; - -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; - -//--- -// Single-element -//--- - -defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; -defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; -defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; -defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; -let mayLoad = 1, hasSideEffects = 0 in { -defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; -defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; -defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; -defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; -defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; -defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; -defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; -defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; -defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; -defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; -defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; -defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; -defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; -defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; -defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; -defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; -} - -def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), - (LD1Rv8b GPR64sp:$Rn)>; -def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), - (LD1Rv16b GPR64sp:$Rn)>; -def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), - (LD1Rv4h GPR64sp:$Rn)>; -def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), - (LD1Rv8h GPR64sp:$Rn)>; -def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), - (LD1Rv2s GPR64sp:$Rn)>; -def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), - (LD1Rv4s GPR64sp:$Rn)>; -def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), - (LD1Rv2d GPR64sp:$Rn)>; -def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), - (LD1Rv1d GPR64sp:$Rn)>; -// Grab the floating point version too -def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), - (LD1Rv2s GPR64sp:$Rn)>; -def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), - (LD1Rv4s GPR64sp:$Rn)>; -def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), - (LD1Rv2d GPR64sp:$Rn)>; -def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), - (LD1Rv1d GPR64sp:$Rn)>; -def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), - (LD1Rv4h GPR64sp:$Rn)>; -def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), - (LD1Rv8h GPR64sp:$Rn)>; -def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), - (LD1Rv4h GPR64sp:$Rn)>; -def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), - (LD1Rv8h GPR64sp:$Rn)>; - -class Ld1Lane128Pat - : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), - (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; - -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; - -// Generate LD1 for extload if memory type does not match the -// destination type, for example: -// -// (v4i32 (insert_vector_elt (load anyext from i8) idx)) -// -// In this case, the index must be adjusted to match LD1 type. -// -class Ld1Lane128IdxOpPat - : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), - (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; - -def VectorIndexStoH : SDNodeXFormgetTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); -}]>; -def VectorIndexStoB : SDNodeXFormgetTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); -}]>; -def VectorIndexHtoB : SDNodeXFormgetTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); -}]>; - -def : Ld1Lane128IdxOpPat; -def : Ld1Lane128IdxOpPat; -def : Ld1Lane128IdxOpPat; - -// Same as above, but the first element is populated using -// scalar_to_vector + insert_subvector instead of insert_vector_elt. -class Ld1Lane128FirstElm - : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), - (ResultTy (EXTRACT_SUBREG - (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; - -def : Ld1Lane128FirstElm; -def : Ld1Lane128FirstElm; -def : Ld1Lane128FirstElm; - -class Ld1Lane64Pat - : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), - (EXTRACT_SUBREG - (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), - VecIndex:$idx, GPR64sp:$Rn), - dsub)>; - -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; - - -defm LD1 : SIMDLdSt1SingleAliases<"ld1">; -defm LD2 : SIMDLdSt2SingleAliases<"ld2">; -defm LD3 : SIMDLdSt3SingleAliases<"ld3">; -defm LD4 : SIMDLdSt4SingleAliases<"ld4">; - -// Stores -defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; -defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; -defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; -defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; - -let AddedComplexity = 19 in -class St1Lane128Pat - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - GPR64sp:$Rn), - (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; - -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; - -let AddedComplexity = 19 in -class St1Lane64Pat - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - GPR64sp:$Rn), - (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, GPR64sp:$Rn)>; - -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; - -multiclass St1LanePost64Pat { - def : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - GPR64sp:$Rn, offset), - (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, GPR64sp:$Rn, XZR)>; - - def : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - GPR64sp:$Rn, GPR64:$Rm), - (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, GPR64sp:$Rn, $Rm)>; -} - -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; -defm : St1LanePost64Pat; - -multiclass St1LanePost128Pat { - def : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - GPR64sp:$Rn, offset), - (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; - - def : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - GPR64sp:$Rn, GPR64:$Rm), - (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; -} - -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; -defm : St1LanePost128Pat; - -let mayStore = 1, hasSideEffects = 0 in { -defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; -defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; -defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; -defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; -defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; -defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; -defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; -defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; -defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; -defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; -defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; -defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; -} - -defm ST1 : SIMDLdSt1SingleAliases<"st1">; -defm ST2 : SIMDLdSt2SingleAliases<"st2">; -defm ST3 : SIMDLdSt3SingleAliases<"st3">; -defm ST4 : SIMDLdSt4SingleAliases<"st4">; - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -let Predicates = [HasAES] in { -def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; -def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; -def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; -def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; -} - -// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required -// for AES fusion on some CPUs. -let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { -def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, - Sched<[WriteVq]>; -def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, - Sched<[WriteVq]>; -} - -// Only use constrained versions of AES(I)MC instructions if they are paired with -// AESE/AESD. -def : Pat<(v16i8 (int_aarch64_crypto_aesmc - (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), - (v16i8 V128:$src2))))), - (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), - (v16i8 V128:$src2)))))>, - Requires<[HasFuseAES]>; - -def : Pat<(v16i8 (int_aarch64_crypto_aesimc - (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), - (v16i8 V128:$src2))))), - (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), - (v16i8 V128:$src2)))))>, - Requires<[HasFuseAES]>; - -let Predicates = [HasSHA2] in { -def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; -def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; -def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; -def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; -def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; -def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; -def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; - -def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; -def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; -def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; -} - -//---------------------------------------------------------------------------- -// Compiler-pseudos -//---------------------------------------------------------------------------- -// FIXME: Like for X86, these should go in their own separate .td file. - -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return isDef32(*N); -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; - -// For an anyext, we don't care what the high bits are, so we can perform an -// INSERT_SUBREF into an IMPLICIT_DEF. -def : Pat<(i64 (anyext GPR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; - -// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and -// then assert the extension has happened. -def : Pat<(i64 (zext GPR32:$src)), - (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; - -// To sign extend, we use a signed bitfield move instruction (SBFM) on the -// containing super-reg. -def : Pat<(i64 (sext GPR32:$src)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i8 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i8 imm0_63:$imm)))>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i16 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i16 imm0_63:$imm)))>; - -def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i32 imm0_63:$imm)))>; - -// sra patterns have an AddedComplexity of 10, so make sure we have a higher -// AddedComplexity for the following patterns since we want to match sext + sra -// patterns before we attempt to match a single sra node. -let AddedComplexity = 20 in { -// We support all sext + sra combinations which preserve at least one bit of the -// original value which is to be sign extended. E.g. we support shifts up to -// bitwidth-1 bits. -def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; - -def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; - -def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 imm0_31:$imm), 31)>; -} // AddedComplexity = 20 - -// To truncate, we can simply extract from a subregister. -def : Pat<(i32 (trunc GPR64sp:$src)), - (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; - -// __builtin_trap() uses the BRK instruction on AArch64. -def : Pat<(trap), (BRK 1)>; -def : Pat<(debugtrap), (BRK 0xF000)>; - -def ubsan_trap_xform : SDNodeXFormgetTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); -}]>; - -def ubsan_trap_imm : TImmLeaf(Imm); -}], ubsan_trap_xform>; - -def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; - -// Multiply high patterns which multiply the lower subvector using smull/umull -// and the upper subvector with smull2/umull2. Then shuffle the high the high -// part of both results together. -def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), - (UZP2v16i8 - (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; -def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), - (UZP2v8i16 - (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; -def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), - (UZP2v4i32 - (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; - -def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), - (UZP2v16i8 - (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; -def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), - (UZP2v8i16 - (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; -def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), - (UZP2v4i32 - (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), - (EXTRACT_SUBREG V128:$Rm, dsub)), - (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; - -// Conversions within AdvSIMD types in the same register size are free. -// But because we need a consistent lane ordering, in big endian many -// conversions require one or more REV instructions. -// -// Consider a simple memory load followed by a bitconvert then a store. -// v0 = load v2i32 -// v1 = BITCAST v2i32 v0 to v4i16 -// store v4i16 v2 -// -// In big endian mode every memory access has an implicit byte swap. LDR and -// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that -// is, they treat the vector as a sequence of elements to be byte-swapped. -// The two pairs of instructions are fundamentally incompatible. We've decided -// to use LD1/ST1 only to simplify compiler implementation. -// -// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes -// the original code sequence: -// v0 = load v2i32 -// v1 = REV v2i32 (implicit) -// v2 = BITCAST v2i32 v1 to v4i16 -// v3 = REV v4i16 v2 (implicit) -// store v4i16 v3 -// -// But this is now broken - the value stored is different to the value loaded -// due to lane reordering. To fix this, on every BITCAST we must perform two -// other REVs: -// v0 = load v2i32 -// v1 = REV v2i32 (implicit) -// v2 = REV v2i32 -// v3 = BITCAST v2i32 v2 to v4i16 -// v4 = REV v4i16 -// v5 = REV v4i16 v4 (implicit) -// store v4i16 v5 -// -// This means an extra two instructions, but actually in most cases the two REV -// instructions can be combined into one. For example: -// (REV64_2s (REV64_4h X)) === (REV32_4h X) -// -// There is also no 128-bit REV instruction. This must be synthesized with an -// EXT instruction. -// -// Most bitconverts require some sort of conversion. The only exceptions are: -// a) Identity conversions - vNfX <-> vNiX -// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX -// - -// Natural vector casts (64 bit) -def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; - -// Natural vector casts (128 bit) -def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; - -def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -} -let Predicates = [IsBE] in { -def : Pat<(v8i8 (bitconvert GPR64:$Xn)), - (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; -def : Pat<(v4i16 (bitconvert GPR64:$Xn)), - (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; -def : Pat<(v2i32 (bitconvert GPR64:$Xn)), - (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; -def : Pat<(v4f16 (bitconvert GPR64:$Xn)), - (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; -def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), - (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; -def : Pat<(v2f32 (bitconvert GPR64:$Xn)), - (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; - -def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), - (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), - (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), - (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), - (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), - (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), - (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; -} -def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), - (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), - (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; - -def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), - (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; -def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), - (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; -def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), - (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; -def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), - (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; -def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), - (COPY_TO_REGCLASS V64:$Vn, GPR64)>; - -def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; -def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), - (v1i64 (REV64v2i32 FPR64:$src))>; -def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), - (v1i64 (REV64v4i16 FPR64:$src))>; -def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), - (v1i64 (REV64v8i8 FPR64:$src))>; -def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), - (v1i64 (REV64v4i16 FPR64:$src))>; -def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), - (v1i64 (REV64v4i16 FPR64:$src))>; -def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), - (v1i64 (REV64v2i32 FPR64:$src))>; -} -def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), - (v2i32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), - (v2i32 (REV32v4i16 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), - (v2i32 (REV32v8i8 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), - (v2i32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), - (v2i32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), - (v2i32 (REV32v4i16 FPR64:$src))>; -def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), - (v2i32 (REV32v4i16 FPR64:$src))>; -} -def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), - (v4i16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), - (v4i16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), - (v4i16 (REV16v8i8 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), - (v4i16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), - (v4i16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), - (v4i16 (REV64v4i16 FPR64:$src))>; -} -def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; - -def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), - (v4f16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), - (v4f16 (REV16v8i8 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), - (v4f16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; - -def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), - (v4bf16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), - (v4bf16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), - (v4bf16 (REV16v8i8 FPR64:$src))>; -def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), - (v4bf16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), - (v4bf16 (REV32v4i16 FPR64:$src))>; -def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), - (v4bf16 (REV64v4i16 FPR64:$src))>; -} -def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), - (v8i8 (REV64v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), - (v8i8 (REV32v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), - (v8i8 (REV16v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), - (v8i8 (REV64v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), - (v8i8 (REV32v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), - (v8i8 (REV64v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), - (v8i8 (REV16v8i8 FPR64:$src))>; -def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), - (v8i8 (REV16v8i8 FPR64:$src))>; -} - -let Predicates = [IsLE] in { -def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), - (f64 (REV64v2i32 FPR64:$src))>; -def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), - (f64 (REV64v4i16 FPR64:$src))>; -def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), - (f64 (REV64v2i32 FPR64:$src))>; -def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), - (f64 (REV64v8i8 FPR64:$src))>; -def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), - (f64 (REV64v4i16 FPR64:$src))>; -def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), - (f64 (REV64v4i16 FPR64:$src))>; -} -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), - (v1f64 (REV64v2i32 FPR64:$src))>; -def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), - (v1f64 (REV64v4i16 FPR64:$src))>; -def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), - (v1f64 (REV64v8i8 FPR64:$src))>; -def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), - (v1f64 (REV64v2i32 FPR64:$src))>; -def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), - (v1f64 (REV64v4i16 FPR64:$src))>; -def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), - (v1f64 (REV64v4i16 FPR64:$src))>; -} -def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), - (v2f32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), - (v2f32 (REV32v4i16 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), - (v2f32 (REV32v8i8 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), - (v2f32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), - (v2f32 (REV64v2i32 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), - (v2f32 (REV32v4i16 FPR64:$src))>; -def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), - (v2f32 (REV32v4i16 FPR64:$src))>; -} -def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), - (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; -def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), - (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), - (REV64v4i32 FPR128:$src), (i32 8)))>; -def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), - (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), (i32 8)))>; -def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), - (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), (i32 8)))>; -def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), - (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), (i32 8)))>; -def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), - (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; -def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), - (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), - (REV64v4i32 FPR128:$src), (i32 8)))>; -def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), - (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), - (REV64v16i8 FPR128:$src), (i32 8)))>; -} - -let Predicates = [IsLE] in { -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), - (v2f64 (EXTv16i8 FPR128:$src, - FPR128:$src, (i32 8)))>; -def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), - (v2f64 (REV64v4i32 FPR128:$src))>; -def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), - (v2f64 (REV64v8i16 FPR128:$src))>; -def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), - (v2f64 (REV64v8i16 FPR128:$src))>; -def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), - (v2f64 (REV64v8i16 FPR128:$src))>; -def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), - (v2f64 (REV64v16i8 FPR128:$src))>; -def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), - (v2f64 (REV64v4i32 FPR128:$src))>; -} -def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), - (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), - (REV64v4i32 FPR128:$src), (i32 8)))>; -def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), - (v4f32 (REV32v8i16 FPR128:$src))>; -def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), - (v4f32 (REV32v8i16 FPR128:$src))>; -def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), - (v4f32 (REV32v8i16 FPR128:$src))>; -def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), - (v4f32 (REV32v16i8 FPR128:$src))>; -def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), - (v4f32 (REV64v4i32 FPR128:$src))>; -def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), - (v4f32 (REV64v4i32 FPR128:$src))>; -} -def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), - (v2i64 (EXTv16i8 FPR128:$src, - FPR128:$src, (i32 8)))>; -def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), - (v2i64 (REV64v4i32 FPR128:$src))>; -def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), - (v2i64 (REV64v8i16 FPR128:$src))>; -def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), - (v2i64 (REV64v16i8 FPR128:$src))>; -def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), - (v2i64 (REV64v4i32 FPR128:$src))>; -def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), - (v2i64 (REV64v8i16 FPR128:$src))>; -def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), - (v2i64 (REV64v8i16 FPR128:$src))>; -} -def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), - (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), - (REV64v4i32 FPR128:$src), - (i32 8)))>; -def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), - (v4i32 (REV64v4i32 FPR128:$src))>; -def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), - (v4i32 (REV32v8i16 FPR128:$src))>; -def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), - (v4i32 (REV32v16i8 FPR128:$src))>; -def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), - (v4i32 (REV64v4i32 FPR128:$src))>; -def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), - (v4i32 (REV32v8i16 FPR128:$src))>; -def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), - (v4i32 (REV32v8i16 FPR128:$src))>; -} -def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), - (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), - (i32 8)))>; -def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), - (v8i16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), - (v8i16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), - (v8i16 (REV16v16i8 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), - (v8i16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), - (v8i16 (REV32v8i16 FPR128:$src))>; -} -def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; - -def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), - (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), - (i32 8)))>; -def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), - (v8f16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), - (v8f16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), - (v8f16 (REV16v16i8 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), - (v8f16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), - (v8f16 (REV32v8i16 FPR128:$src))>; - -def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), - (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), - (REV64v8i16 FPR128:$src), - (i32 8)))>; -def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), - (v8bf16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), - (v8bf16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), - (v8bf16 (REV16v16i8 FPR128:$src))>; -def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), - (v8bf16 (REV64v8i16 FPR128:$src))>; -def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), - (v8bf16 (REV32v8i16 FPR128:$src))>; -} -def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; - -let Predicates = [IsLE] in { -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; -} -let Predicates = [IsBE] in { -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), - (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), - (REV64v16i8 FPR128:$src), - (i32 8)))>; -def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), - (v16i8 (REV64v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), - (v16i8 (REV32v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), - (v16i8 (REV16v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), - (v16i8 (REV64v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), - (v16i8 (REV32v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), - (v16i8 (REV16v16i8 FPR128:$src))>; -def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), - (v16i8 (REV16v16i8 FPR128:$src))>; -} - -def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; -def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), - (EXTRACT_SUBREG V128:$Rn, dsub)>; - -def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; - -// A 64-bit subvector insert to the first 128-bit vector position -// is a subregister copy that needs no instruction. -multiclass InsertSubvectorUndef { - def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -} - -defm : InsertSubvectorUndef; -defm : InsertSubvectorUndef; - -// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 -// or v2f32. -def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), - (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), - (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; -def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), - (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), - (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; - // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, - // so we match on v4f32 here, not v2f32. This will also catch adding - // the low two lanes of a true v4f32 vector. -def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), - (vector_extract (v4f32 FPR128:$Rn), (i64 1))), - (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; -def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), - (vector_extract (v8f16 FPR128:$Rn), (i64 1))), - (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; - -// Scalar 64-bit shifts in FPR64 registers. -def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; - -// Patterns for nontemporal/no-allocate stores. -// We have to resort to tricks to turn a single-input store into a store pair, -// because there is no single-input nontemporal store, only STNP. -let Predicates = [IsLE] in { -let AddedComplexity = 15 in { -class NTStore128Pat : - Pat<(nontemporalstore (VT FPR128:$Rt), - (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), - (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), - (DUPi64 FPR128:$Rt, (i64 1)), - GPR64sp:$Rn, simm7s8:$offset)>; - -def : NTStore128Pat; -def : NTStore128Pat; -def : NTStore128Pat; -def : NTStore128Pat; - -class NTStore64Pat : - Pat<(nontemporalstore (VT FPR64:$Rt), - (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), - (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), - (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), - GPR64sp:$Rn, simm7s4:$offset)>; - -// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? -def : NTStore64Pat; -def : NTStore64Pat; -def : NTStore64Pat; -def : NTStore64Pat; -def : NTStore64Pat; - -def : Pat<(nontemporalstore GPR64:$Rt, - (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), - (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), - (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), - GPR64sp:$Rn, simm7s4:$offset)>; -} // AddedComplexity=10 -} // Predicates = [IsLE] - -// Tail call return handling. These are all compiler pseudo-instructions, -// so no encoding information or anything like that. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, - Sched<[WriteBrReg]>; - def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, - Sched<[WriteBrReg]>; - // Indirect tail-call with any register allowed, used by MachineOutliner when - // this is proven safe. - // FIXME: If we have to add any more hacks like this, we should instead relax - // some verifier checks for outlined functions. - def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, - Sched<[WriteBrReg]>; - // Indirect tail-call limited to only use registers (x16 and x17) which are - // allowed to tail-call a "BTI c" instruction. - def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, - Sched<[WriteBrReg]>; -} - -def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), - (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, - Requires<[NotUseBTI]>; -def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), - (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, - Requires<[UseBTI]>; -def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), - (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; -def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), - (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; - -def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; -def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; - -// Extracting lane zero is a special case where we can just use a plain -// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the -// rest of the compiler, especially the register allocator and copy propagation, -// to reason about, so is preferred when it's possible to use it. -let AddedComplexity = 10 in { - def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; - def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; - def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; -} - -// dot_v4i8 -class mul_v4i8 : - PatFrag<(ops node:$Rn, node:$Rm, node:$offset), - (mul (ldop (add node:$Rn, node:$offset)), - (ldop (add node:$Rm, node:$offset)))>; -class mulz_v4i8 : - PatFrag<(ops node:$Rn, node:$Rm), - (mul (ldop node:$Rn), (ldop node:$Rm))>; - -def load_v4i8 : - OutPatFrag<(ops node:$R), - (INSERT_SUBREG - (v2i32 (IMPLICIT_DEF)), - (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), - ssub)>; - -class dot_v4i8 : - Pat<(i32 (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), - (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), - (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), - (mulz_v4i8 GPR64sp:$Rn, GPR64sp:$Rm))))), - (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), - (load_v4i8 GPR64sp:$Rn), - (load_v4i8 GPR64sp:$Rm))), - sub_32)>, Requires<[HasDotProd]>; - -// dot_v8i8 -class ee_v8i8 : - PatFrag<(ops node:$V, node:$K), - (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; - -class mul_v8i8 : - PatFrag<(ops node:$M, node:$N, node:$K), - (mulop (v4i16 (ee_v8i8 node:$M, node:$K)), - (v4i16 (ee_v8i8 node:$N, node:$K)))>; - -class idot_v8i8 : - PatFrag<(ops node:$M, node:$N), - (i32 (extractelt - (v4i32 (AArch64uaddv - (add (mul_v8i8 node:$M, node:$N, (i64 0)), - (mul_v8i8 node:$M, node:$N, (i64 4))))), - (i64 0)))>; - -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; - -class odot_v8i8 : - OutPatFrag<(ops node:$Vm, node:$Vn), - (EXTRACT_SUBREG - (VADDV_32 - (i64 (DOT (DUPv2i32gpr WZR), - (v8i8 node:$Vm), - (v8i8 node:$Vn)))), - sub_32)>; - -class dot_v8i8 : - Pat<(idot_v8i8 V64:$Vm, V64:$Vn), - (odot_v8i8 V64:$Vm, V64:$Vn)>, - Requires<[HasDotProd]>; - -// dot_v16i8 -class ee_v16i8 : - PatFrag<(ops node:$V, node:$K1, node:$K2), - (v4i16 (extract_subvector - (v8i16 (extend - (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; - -class mul_v16i8 : - PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), - (v4i32 - (mulop (v4i16 (ee_v16i8 node:$M, node:$K1, node:$K2)), - (v4i16 (ee_v16i8 node:$N, node:$K1, node:$K2))))>; - -class idot_v16i8 : - PatFrag<(ops node:$M, node:$N), - (i32 (extractelt - (v4i32 (AArch64uaddv - (add - (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 0)), - (mul_v16i8 node:$M, node:$N, (i64 8), (i64 0))), - (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 4)), - (mul_v16i8 node:$M, node:$N, (i64 8), (i64 4)))))), - (i64 0)))>; - -class odot_v16i8 : - OutPatFrag<(ops node:$Vm, node:$Vn), - (i32 (ADDVv4i32v - (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; - -class dot_v16i8 : - Pat<(idot_v16i8 V128:$Vm, V128:$Vn), - (odot_v16i8 V128:$Vm, V128:$Vn)>, - Requires<[HasDotProd]>; - -let AddedComplexity = 10 in { - def : dot_v4i8; - def : dot_v4i8; - def : dot_v8i8; - def : dot_v8i8; - def : dot_v16i8; - def : dot_v16i8; - - // FIXME: add patterns to generate vector by element dot product. - // FIXME: add SVE dot-product patterns. -} - -// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, -// so that it can be used as input to inline asm, and vice versa. -def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; -def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; -def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, - GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), - (REG_SEQUENCE GPR64x8Class, - $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, - $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; -foreach i = 0-7 in { - def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), - (EXTRACT_SUBREG $val, !cast("x8sub_"#i))>; -} - -let Predicates = [HasLS64] in { - def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), - (outs GPR64x8:$Rt)>; - def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), - (outs)>; - def ST64BV: Store64BV<0b011, "st64bv">; - def ST64BV0: Store64BV<0b010, "st64bv0">; - - class ST64BPattern - : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), - (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; - - def : ST64BPattern; - def : ST64BPattern; - def : ST64BPattern; -} - -let Predicates = [HasMOPS] in { - let Defs = [NZCV] in { - defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; - - defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; - - defm SETP : MOPSMemorySetInsns<0b00, "setp">; - } - let Uses = [NZCV] in { - defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; - defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; - - defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; - defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; - - defm SETM : MOPSMemorySetInsns<0b01, "setm">; - defm SETE : MOPSMemorySetInsns<0b10, "sete">; - } -} -let Predicates = [HasMOPS, HasMTE] in { - let Defs = [NZCV] in { - defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; - } - let Uses = [NZCV] in { - defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; - // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td - defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; - } -} - -// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain -// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain -def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; -def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; -def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; -def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; -def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; - -// MOPS operations always contain three 4-byte instructions -let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { - let mayLoad = 1 in { - def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), - [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; - def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), - [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; - } - let mayLoad = 0 in { - def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), - [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; - } -} -let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { - def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), - [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; -} - -// This gets lowered into an instruction sequence of 20 bytes -let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in -def StoreSwiftAsyncContext - : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), - []>, Sched<[]>; - -def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; -def : Pat<(AArch64AssertZExtBool GPR32:$op), - (i32 GPR32:$op)>; - -include "AArch64InstrAtomics.td" -include "AArch64SVEInstrInfo.td" -include "AArch64SMEInstrInfo.td" -include "AArch64InstrGISel.td" diff --git a/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td b/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td deleted file mode 100644 index b1d1664e3f..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td +++ /dev/null @@ -1,18 +0,0 @@ -//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This describes the available hardware counters for AArch64. -// -//===----------------------------------------------------------------------===// - -def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">; - -def DefaultPfmCounters : ProcPfmCounters { - let CycleCounter = CpuCyclesPfmCounter; -} -def : PfmCountersDefaultBinding; diff --git a/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td b/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td deleted file mode 100644 index 615ce7d51d..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td +++ /dev/null @@ -1,19 +0,0 @@ -//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -/// General Purpose Registers: W, X. -def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; - -/// Floating Point/Vector Registers: B, H, S, D, Q. -def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; - -/// Conditional register: NZCV. -def CCRegBank : RegisterBank<"CC", [CCR]>; diff --git a/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td b/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td deleted file mode 100644 index 70daf5abf8..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td +++ /dev/null @@ -1,1387 +0,0 @@ -//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - - -class AArch64Reg enc, string n, list subregs = [], - list altNames = []> - : Register { - let HWEncoding = enc; - let Namespace = "AArch64"; - let SubRegs = subregs; -} - -let Namespace = "AArch64" in { - def sub_32 : SubRegIndex<32>; - - def bsub : SubRegIndex<8>; - def hsub : SubRegIndex<16>; - def ssub : SubRegIndex<32>; - def dsub : SubRegIndex<64>; - def sube32 : SubRegIndex<32>; - def subo32 : SubRegIndex<32>; - def sube64 : SubRegIndex<64>; - def subo64 : SubRegIndex<64>; - // SVE - def zsub : SubRegIndex<128>; - // Note: zsub_hi should never be used directly because it represents - // the scalable part of the SVE vector and cannot be manipulated as a - // subvector in the same way the lower 128bits can. - def zsub_hi : SubRegIndex<128>; - // Note: Code depends on these having consecutive numbers - def dsub0 : SubRegIndex<64>; - def dsub1 : SubRegIndex<64>; - def dsub2 : SubRegIndex<64>; - def dsub3 : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def qsub0 : SubRegIndex<128>; - def qsub1 : SubRegIndex<128>; - def qsub2 : SubRegIndex<128>; - def qsub3 : SubRegIndex<128>; - // Note: Code depends on these having consecutive numbers - def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits - def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits - def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits - def zasubs0 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits - def zasubs1 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits - def zasubd0 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits - def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits - def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits - def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits -} - -let Namespace = "AArch64" in { - def vreg : RegAltNameIndex; - def vlist1 : RegAltNameIndex; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; -def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; -def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; -def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>; -def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>; -def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>; -def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>; -def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>; -def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>; -def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>; -def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>; -def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>; -def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>; -def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>; -def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>; -def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>; -def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>; -def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>; -def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>; -def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>; -def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>; -def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>; -def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>; -def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>; -def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>; -def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>; -def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>; -def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>; -def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; -def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; -def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; -def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias; - -let SubRegIndices = [sub_32] in { -def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias; -def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias; -def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias; -def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias; -def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias; -def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias; -def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias; -def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias; -def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias; -def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias; -def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias; -def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias; -def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias; -def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias; -def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias; -def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias; -def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias; -def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias; -def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias; -def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias; -def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias; -def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias; -def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias; -def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias; -def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias; -def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias; -def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias; -def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias; -def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias; -def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias; -def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias; -def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias; -def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias; -} - -// Condition code register. -def NZCV : AArch64Reg<0, "nzcv">; - -// First fault status register -def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; - -// Purely virtual Vector Granule (VG) Dwarf register -def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>; - -// GPR register classes with the intersections of GPR32/GPR32sp and -// GPR64/GPR64sp for use by the coalescer. -def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { - let AltOrders = [(rotl GPR32common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64common : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 28), FP, LR)> { - let AltOrders = [(rotl GPR64common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -// GPR register classes which exclude SP/WSP. -def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> { - let AltOrders = [(rotl GPR32, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> { - let AltOrders = [(rotl GPR64, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include SP/WSP. -def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> { - let AltOrders = [(rotl GPR32sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> { - let AltOrders = [(rotl GPR64sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>; -def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>; - -def GPR64spPlus0Operand : AsmOperandClass { - let Name = "GPR64sp0"; - let RenderMethod = "addRegOperands"; - let PredicateMethod = "isGPR64"; - let ParserMethod = "tryParseGPR64sp0Operand"; -} - -def GPR64sp0 : RegisterOperand { - let ParserMatchClass = GPR64spPlus0Operand; -} - -// GPR32/GPR64 but with zero-register substitution enabled. -// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all. -def GPR32z : RegisterOperand { - let GIZeroRegister = WZR; -} -def GPR64z : RegisterOperand { - let GIZeroRegister = XZR; -} - -// GPR argument registers. -def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>; -def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>; - -// GPR register classes which include WZR/XZR AND SP/WSP. This is not a -// constraint used by any instructions, it is used as a common super-class. -def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; -def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; - -// For tail calls, we can't use callee-saved registers, as they are restored -// to the saved value before the tail call, which would clobber a call address. -// This is for indirect tail calls to store the address of the destination. -def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, - X22, X23, X24, X25, X26, - X27, X28, FP, LR)>; - -// Restricted set of tail call registers, for use when branch target -// enforcement is enabled. These are the only registers which can be used to -// indirectly branch (not call) to the "BTI c" instruction at the start of a -// BTI-protected function. -def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>; - -// Register set that excludes registers that are reserved for procedure calls. -// This is used for pseudo-instructions that are actually implemented using a -// procedure call. -def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>; - -// GPR register classes for post increment amount of vector load/store that -// has alternate printing when Rm=31 and prints a constant immediate value -// equal to the total number of bytes transferred. - -// FIXME: TableGen *should* be able to do these itself now. There appears to be -// a bug in counting how many operands a Post-indexed MCInst should have which -// means the aliases don't trigger. -def GPR64pi1 : RegisterOperand">; -def GPR64pi2 : RegisterOperand">; -def GPR64pi3 : RegisterOperand">; -def GPR64pi4 : RegisterOperand">; -def GPR64pi6 : RegisterOperand">; -def GPR64pi8 : RegisterOperand">; -def GPR64pi12 : RegisterOperand">; -def GPR64pi16 : RegisterOperand">; -def GPR64pi24 : RegisterOperand">; -def GPR64pi32 : RegisterOperand">; -def GPR64pi48 : RegisterOperand">; -def GPR64pi64 : RegisterOperand">; - -// Condition code regclass. -def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { - let CopyCost = -1; // Don't allow copying of status registers. - - // CCR is not allocatable. - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Floating Point Scalar Registers -//===----------------------------------------------------------------------===// - -def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; -def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; -def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; -def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>; -def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>; -def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>; -def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>; -def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>; -def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>; -def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>; -def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>; -def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>; -def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>; -def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>; -def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>; -def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>; -def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>; -def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>; -def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>; -def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>; -def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>; -def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>; -def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>; -def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>; -def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>; -def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>; -def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>; -def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>; -def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>; -def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; -def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; -def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; - -let SubRegIndices = [bsub] in { -def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias; -def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias; -def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias; -def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias; -def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias; -def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias; -def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias; -def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias; -def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias; -def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias; -def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias; -def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias; -def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias; -def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias; -def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias; -def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias; -def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias; -def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias; -def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias; -def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias; -def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias; -def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias; -def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias; -def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias; -def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias; -def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias; -def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias; -def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias; -def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias; -def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias; -def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias; -def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias; -} - -let SubRegIndices = [hsub] in { -def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias; -def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias; -def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias; -def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias; -def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias; -def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias; -def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias; -def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias; -def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias; -def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias; -def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias; -def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias; -def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias; -def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias; -def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias; -def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias; -def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias; -def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias; -def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias; -def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias; -def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias; -def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias; -def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias; -def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias; -def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias; -def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias; -def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias; -def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias; -def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias; -def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias; -def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias; -def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias; -} - -let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { -def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; -def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; -def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; -def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; -def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; -def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; -def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; -def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; -def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; -def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; -def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; -def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; -def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; -def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; -def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; -def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; -def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; -def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; -def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; -def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; -def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; -def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; -def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; -def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; -def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; -def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; -def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; -def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; -def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; -def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; -def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; -def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; -} - -let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { -def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; -def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; -def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; -def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; -def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; -def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; -def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; -def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; -def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; -def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; -def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; -def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; -def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; -def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; -def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; -def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; -def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; -def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; -def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; -def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; -def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; -def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; -def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; -def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; -def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; -def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; -def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; -def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; -def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; -def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; -def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; -def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; -} - -def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { - let Size = 8; -} -def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> { - let Size = 16; -} - -def FPR16_lo : RegisterClass<"AArch64", [f16], 16, (trunc FPR16, 16)> { - let Size = 16; -} -def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; -def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, - v1i64, v4f16, v4bf16], - 64, (sequence "D%u", 0, 31)>; -def FPR64_lo : RegisterClass<"AArch64", - [v8i8, v4i16, v2i32, v1i64, v4f16, v4bf16, v2f32, - v1f64], - 64, (trunc FPR64, 16)>; - -// We don't (yet) have an f128 legal type, so don't use that here. We -// normalize 128-bit vectors to v2f64 for arg passing and such, so use -// that here. -def FPR128 : RegisterClass<"AArch64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128, - v8f16, v8bf16], - 128, (sequence "Q%u", 0, 31)>; - -// The lower 16 vector registers. Some instructions can only take registers -// in this range. -def FPR128_lo : RegisterClass<"AArch64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16, - v8bf16], - 128, (trunc FPR128, 16)>; - -// Pairs, triples, and quads of 64-bit vector registers. -def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; -def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; -def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; -def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> { - let Size = 128; -} -def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> { - let Size = 192; -} -def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> { - let Size = 256; -} - -// Pairs, triples, and quads of 128-bit vector registers. -def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; -def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; -def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; -def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> { - let Size = 256; -} -def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> { - let Size = 384; -} -def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { - let Size = 512; -} - - -// Vector operand versions of the FP registers. Alternate name printing and -// assembler matching. -def VectorReg64AsmOperand : AsmOperandClass { - let Name = "VectorReg64"; - let PredicateMethod = "isNeonVectorReg"; -} -def VectorReg128AsmOperand : AsmOperandClass { - let Name = "VectorReg128"; - let PredicateMethod = "isNeonVectorReg"; -} - -def V64 : RegisterOperand { - let ParserMatchClass = VectorReg64AsmOperand; -} - -def V128 : RegisterOperand { - let ParserMatchClass = VectorReg128AsmOperand; -} - -def VectorRegLoAsmOperand : AsmOperandClass { - let Name = "VectorRegLo"; - let PredicateMethod = "isNeonVectorRegLo"; -} -def V64_lo : RegisterOperand { - let ParserMatchClass = VectorRegLoAsmOperand; -} -def V128_lo : RegisterOperand { - let ParserMatchClass = VectorRegLoAsmOperand; -} - -class TypedVecListAsmOperand - : AsmOperandClass { - let Name = "TypedVectorList" # count # "_" # lanes # eltsize; - - let PredicateMethod - = "isTypedVectorList"; - let RenderMethod = "addVectorListOperands<" # vecty # ", " # count # ">"; -} - -class TypedVecListRegOperand - : RegisterOperand">; - -multiclass VectorList { - // With implicit types (probably on instruction instead). E.g. { v0, v1 } - def _64AsmOperand : AsmOperandClass { - let Name = NAME # "64"; - let PredicateMethod = "isImplicitlyTypedVectorList"; - let RenderMethod = "addVectorListOperands"; - } - - def "64" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_64AsmOperand"); - } - - def _128AsmOperand : AsmOperandClass { - let Name = NAME # "128"; - let PredicateMethod = "isImplicitlyTypedVectorList"; - let RenderMethod = "addVectorListOperands"; - } - - def "128" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_128AsmOperand"); - } - - // 64-bit register lists with explicit type. - - // { v0.8b, v1.8b } - def _8bAsmOperand : TypedVecListAsmOperand; - def "8b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); - } - - // { v0.4h, v1.4h } - def _4hAsmOperand : TypedVecListAsmOperand; - def "4h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); - } - - // { v0.2s, v1.2s } - def _2sAsmOperand : TypedVecListAsmOperand; - def "2s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); - } - - // { v0.1d, v1.1d } - def _1dAsmOperand : TypedVecListAsmOperand; - def "1d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); - } - - // 128-bit register lists with explicit type - - // { v0.16b, v1.16b } - def _16bAsmOperand : TypedVecListAsmOperand; - def "16b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); - } - - // { v0.8h, v1.8h } - def _8hAsmOperand : TypedVecListAsmOperand; - def "8h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); - } - - // { v0.4s, v1.4s } - def _4sAsmOperand : TypedVecListAsmOperand; - def "4s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); - } - - // { v0.2d, v1.2d } - def _2dAsmOperand : TypedVecListAsmOperand; - def "2d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); - } - - // { v0.b, v1.b } - def _bAsmOperand : TypedVecListAsmOperand; - def "b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_bAsmOperand"); - } - - // { v0.h, v1.h } - def _hAsmOperand : TypedVecListAsmOperand; - def "h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_hAsmOperand"); - } - - // { v0.s, v1.s } - def _sAsmOperand : TypedVecListAsmOperand; - def "s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_sAsmOperand"); - } - - // { v0.d, v1.d } - def _dAsmOperand : TypedVecListAsmOperand; - def "d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_dAsmOperand"); - } - - -} - -defm VecListOne : VectorList<1, FPR64, FPR128>; -defm VecListTwo : VectorList<2, DD, QQ>; -defm VecListThree : VectorList<3, DDD, QQQ>; -defm VecListFour : VectorList<4, DDDD, QQQQ>; - -class FPRAsmOperand : AsmOperandClass { - let Name = "FPRAsmOperand" # RC; - let PredicateMethod = "isGPR64"; - let RenderMethod = "addRegOperands"; -} - -// Register operand versions of the scalar FP registers. -def FPR8Op : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR8">; -} - -def FPR16Op : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR16">; -} - -def FPR16Op_lo : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR16_lo">; -} - -def FPR32Op : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR32">; -} - -def FPR64Op : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR64">; -} - -def FPR128Op : RegisterOperand { - let ParserMatchClass = FPRAsmOperand<"FPR128">; -} - -//===----------------------------------------------------------------------===// -// ARMv8.1a atomic CASP register operands - - -def WSeqPairs : RegisterTuples<[sube32, subo32], - [(decimate (rotl GPR32, 0), 2), - (decimate (rotl GPR32, 1), 2)]>; -def XSeqPairs : RegisterTuples<[sube64, subo64], - [(decimate (rotl GPR64, 0), 2), - (decimate (rotl GPR64, 1), 2)]>; - -def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, - (add WSeqPairs)>{ - let Size = 64; -} -def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, - (add XSeqPairs)>{ - let Size = 128; -} - - -let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in { - def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; } - def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; } -} - -def WSeqPairClassOperand : - RegisterOperand"> { - let ParserMatchClass = WSeqPairsAsmOperandClass; -} -def XSeqPairClassOperand : - RegisterOperand"> { - let ParserMatchClass = XSeqPairsAsmOperandClass; -} - - -//===----- END: v8.1a atomic CASP register operands -----------------------===// - -//===----------------------------------------------------------------------===// -// Armv8.7a accelerator extension register operands: 8 consecutive GPRs -// starting with an even one - -let Namespace = "AArch64" in { - foreach i = 0-7 in - def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>; -} - -def Tuples8X : RegisterTuples< - !foreach(i, [0,1,2,3,4,5,6,7], !cast("x8sub_"#i)), - !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>; - -def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> { - let Size = 512; -} -def GPR64x8AsmOp : AsmOperandClass { - let Name = "GPR64x8"; - let ParserMethod = "tryParseGPR64x8"; - let RenderMethod = "addRegOperands"; -} -def GPR64x8 : RegisterOperand { - let ParserMatchClass = GPR64x8AsmOp; - let PrintMethod = "printGPR64x8"; -} - -//===----- END: v8.7a accelerator extension register operands -------------===// - -// SVE predicate registers -def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>; -def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>; -def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>; -def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>; -def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>; -def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>; -def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>; -def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>; -def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>; -def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>; -def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>; -def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>; -def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>; -def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>; -def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>; -def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>; - -// The part of SVE registers that don't overlap Neon registers. -// These are only used as part of clobber lists. -def Z0_HI : AArch64Reg<0, "z0_hi">; -def Z1_HI : AArch64Reg<1, "z1_hi">; -def Z2_HI : AArch64Reg<2, "z2_hi">; -def Z3_HI : AArch64Reg<3, "z3_hi">; -def Z4_HI : AArch64Reg<4, "z4_hi">; -def Z5_HI : AArch64Reg<5, "z5_hi">; -def Z6_HI : AArch64Reg<6, "z6_hi">; -def Z7_HI : AArch64Reg<7, "z7_hi">; -def Z8_HI : AArch64Reg<8, "z8_hi">; -def Z9_HI : AArch64Reg<9, "z9_hi">; -def Z10_HI : AArch64Reg<10, "z10_hi">; -def Z11_HI : AArch64Reg<11, "z11_hi">; -def Z12_HI : AArch64Reg<12, "z12_hi">; -def Z13_HI : AArch64Reg<13, "z13_hi">; -def Z14_HI : AArch64Reg<14, "z14_hi">; -def Z15_HI : AArch64Reg<15, "z15_hi">; -def Z16_HI : AArch64Reg<16, "z16_hi">; -def Z17_HI : AArch64Reg<17, "z17_hi">; -def Z18_HI : AArch64Reg<18, "z18_hi">; -def Z19_HI : AArch64Reg<19, "z19_hi">; -def Z20_HI : AArch64Reg<20, "z20_hi">; -def Z21_HI : AArch64Reg<21, "z21_hi">; -def Z22_HI : AArch64Reg<22, "z22_hi">; -def Z23_HI : AArch64Reg<23, "z23_hi">; -def Z24_HI : AArch64Reg<24, "z24_hi">; -def Z25_HI : AArch64Reg<25, "z25_hi">; -def Z26_HI : AArch64Reg<26, "z26_hi">; -def Z27_HI : AArch64Reg<27, "z27_hi">; -def Z28_HI : AArch64Reg<28, "z28_hi">; -def Z29_HI : AArch64Reg<29, "z29_hi">; -def Z30_HI : AArch64Reg<30, "z30_hi">; -def Z31_HI : AArch64Reg<31, "z31_hi">; - -// SVE variable-size vector registers -let SubRegIndices = [zsub,zsub_hi] in { -def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>; -def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>; -def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>; -def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>; -def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>; -def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>; -def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>; -def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>; -def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>; -def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>; -def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>; -def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>; -def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>; -def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>; -def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>; -def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>; -def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>; -def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>; -def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>; -def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>; -def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>; -def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>; -def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>; -def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>; -def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>; -def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>; -def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>; -def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>; -def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>; -def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>; -def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; -def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; -} - -// Enum describing the element size for destructive -// operations. -class ElementSizeEnum val> { - bits<3> Value = val; -} - -def ElementSizeNone : ElementSizeEnum<0>; -def ElementSizeB : ElementSizeEnum<1>; -def ElementSizeH : ElementSizeEnum<2>; -def ElementSizeS : ElementSizeEnum<3>; -def ElementSizeD : ElementSizeEnum<4>; -def ElementSizeQ : ElementSizeEnum<5>; // Unused - -class SVERegOp : RegisterOperand { - ElementSizeEnum ElementSize; - - let ElementSize = Size; - let PrintMethod = !if(!eq(Suffix, ""), - "printSVERegOp<>", - "printSVERegOp<'" # Suffix # "'>"); - let ParserMatchClass = C; -} - -class PPRRegOp : SVERegOp {} -class ZPRRegOp : SVERegOp {} - -//****************************************************************************** - -// SVE predicate register classes. -class PPRClass : RegisterClass< - "AArch64", - [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16, - (sequence "P%u", 0, lastreg)> { - let Size = 16; -} - -def PPR : PPRClass<15>; -def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class. - -class PPRAsmOperand : AsmOperandClass { - let Name = "SVE" # name # "Reg"; - let PredicateMethod = "isSVEPredicateVectorRegOfWidth<" - # Width # ", " # "AArch64::" # RegClass # "RegClassID>"; - let DiagnosticType = "InvalidSVE" # name # "Reg"; - let RenderMethod = "addRegOperands"; - let ParserMethod = "tryParseSVEPredicateVector"; -} - -def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; -def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; -def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; -def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; -def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; - -def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; -def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; -def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; -def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; -def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; - -def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; - -def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; - -//****************************************************************************** - -// SVE vector register classes -class ZPRClass : RegisterClass<"AArch64", - [nxv16i8, nxv8i16, nxv4i32, nxv2i64, - nxv2f16, nxv4f16, nxv8f16, - nxv2bf16, nxv4bf16, nxv8bf16, - nxv2f32, nxv4f32, - nxv2f64], - 128, (sequence "Z%u", 0, lastreg)> { - let Size = 128; -} - -def ZPR : ZPRClass<31>; -def ZPR_4b : ZPRClass<15>; // Restricted 4 bit SVE vector register class. -def ZPR_3b : ZPRClass<7>; // Restricted 3 bit SVE vector register class. - -class ZPRAsmOperand - : AsmOperandClass { - let Name = "SVE" # name # "Reg"; - let PredicateMethod = "isSVEDataVectorRegOfWidth<" - # Width # ", AArch64::ZPR" - # RegClassSuffix # "RegClassID>"; - let RenderMethod = "addRegOperands"; - let DiagnosticType = "InvalidZPR" # RegClassSuffix # Width; - let ParserMethod = "tryParseSVEDataVector"; -} - -def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", 0>; -def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>; -def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>; -def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; -def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; -def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; - -def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>; -def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>; -def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>; -def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>; -def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>; -def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>; - -def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">; -def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">; -def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">; - -def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>; -def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>; -def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>; - -def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">; -def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">; -def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">; - -def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>; -def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>; -def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>; - -class FPRasZPR : AsmOperandClass{ - let Name = "FPR" # Width # "asZPR"; - let PredicateMethod = "isFPRasZPR"; - let RenderMethod = "addFPRasZPRRegOperands<" # Width # ">"; -} - -class FPRasZPROperand : RegisterOperand { - let ParserMatchClass = FPRasZPR; - let PrintMethod = "printZPRasFPR<" # Width # ">"; -} - -def FPR8asZPR : FPRasZPROperand<8>; -def FPR16asZPR : FPRasZPROperand<16>; -def FPR32asZPR : FPRasZPROperand<32>; -def FPR64asZPR : FPRasZPROperand<64>; -def FPR128asZPR : FPRasZPROperand<128>; - -let Namespace = "AArch64" in { - def zsub0 : SubRegIndex<128, -1>; - def zsub1 : SubRegIndex<128, -1>; - def zsub2 : SubRegIndex<128, -1>; - def zsub3 : SubRegIndex<128, -1>; -} - -// Pairs, triples, and quads of SVE vector registers. -def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>; -def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>; -def ZSeqQuads : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2), (rotl ZPR, 3)]>; - -def ZPR2 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)> { - let Size = 256; -} -def ZPR3 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqTriples)> { - let Size = 384; -} -def ZPR4 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqQuads)> { - let Size = 512; -} - -class ZPRVectorList : AsmOperandClass { - let Name = "SVEVectorList" # NumRegs # ElementWidth; - let ParserMethod = "tryParseVectorList"; - let PredicateMethod = - "isTypedVectorList"; - let RenderMethod = "addVectorListOperands"; -} - -def Z_b : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<8, 1>; -} - -def Z_h : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<16, 1>; -} - -def Z_s : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<32, 1>; -} - -def Z_d : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<64, 1>; -} - -def ZZ_b : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<8, 2>; -} - -def ZZ_h : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<16, 2>; -} - -def ZZ_s : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<32, 2>; -} - -def ZZ_d : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<64, 2>; -} - -def ZZZ_b : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<8, 3>; -} - -def ZZZ_h : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<16, 3>; -} - -def ZZZ_s : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<32, 3>; -} - -def ZZZ_d : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<64, 3>; -} - -def ZZZZ_b : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<8, 4>; -} - -def ZZZZ_h : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<16, 4>; -} - -def ZZZZ_s : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<32, 4>; -} - -def ZZZZ_d : RegisterOperand"> { - let ParserMatchClass = ZPRVectorList<64, 4>; -} - -class ZPRExtendAsmOperand : AsmOperandClass { - let Name = "ZPRExtend" # ShiftExtend # RegWidth # Scale - # !if(ScaleAlwaysSame, "Only", ""); - - let PredicateMethod = "isSVEDataVectorRegWithShiftExtend<" - # RegWidth # ", AArch64::ZPRRegClassID, " - # "AArch64_AM::" # ShiftExtend # ", " - # Scale # ", " - # !if(ScaleAlwaysSame, "true", "false") - # ">"; - let DiagnosticType = "InvalidZPR" # RegWidth # ShiftExtend # Scale; - let RenderMethod = "addRegOperands"; - let ParserMethod = "tryParseSVEDataVector"; -} - -class ZPRExtendRegisterOperand - : RegisterOperand { - let ParserMatchClass = - !cast("ZPR" # RegWidth # "AsmOpndExt" # Repr # Scale # Suffix); - let PrintMethod = "printRegWithShiftExtend<" - # !if(SignExtend, "true", "false") # ", " - # Scale # ", " - # !if(IsLSL, "'x'", "'w'") # ", " - # !if(!eq(RegWidth, 32), "'s'", "'d'") # ">"; -} - -foreach RegWidth = [32, 64] in { - // UXTW(8|16|32|64) - def ZPR#RegWidth#AsmOpndExtUXTW8Only : ZPRExtendAsmOperand<"UXTW", RegWidth, 8, 0b1>; - def ZPR#RegWidth#AsmOpndExtUXTW8 : ZPRExtendAsmOperand<"UXTW", RegWidth, 8>; - def ZPR#RegWidth#AsmOpndExtUXTW16 : ZPRExtendAsmOperand<"UXTW", RegWidth, 16>; - def ZPR#RegWidth#AsmOpndExtUXTW32 : ZPRExtendAsmOperand<"UXTW", RegWidth, 32>; - def ZPR#RegWidth#AsmOpndExtUXTW64 : ZPRExtendAsmOperand<"UXTW", RegWidth, 64>; - - def ZPR#RegWidth#ExtUXTW8Only : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8, "Only">; - def ZPR#RegWidth#ExtUXTW8 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8>; - def ZPR#RegWidth#ExtUXTW16 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 16>; - def ZPR#RegWidth#ExtUXTW32 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 32>; - def ZPR#RegWidth#ExtUXTW64 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 64>; - - // SXTW(8|16|32|64) - def ZPR#RegWidth#AsmOpndExtSXTW8Only : ZPRExtendAsmOperand<"SXTW", RegWidth, 8, 0b1>; - def ZPR#RegWidth#AsmOpndExtSXTW8 : ZPRExtendAsmOperand<"SXTW", RegWidth, 8>; - def ZPR#RegWidth#AsmOpndExtSXTW16 : ZPRExtendAsmOperand<"SXTW", RegWidth, 16>; - def ZPR#RegWidth#AsmOpndExtSXTW32 : ZPRExtendAsmOperand<"SXTW", RegWidth, 32>; - def ZPR#RegWidth#AsmOpndExtSXTW64 : ZPRExtendAsmOperand<"SXTW", RegWidth, 64>; - - def ZPR#RegWidth#ExtSXTW8Only : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8, "Only">; - def ZPR#RegWidth#ExtSXTW8 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8>; - def ZPR#RegWidth#ExtSXTW16 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 16>; - def ZPR#RegWidth#ExtSXTW32 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 32>; - def ZPR#RegWidth#ExtSXTW64 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 64>; - - // LSL(8|16|32|64) - def ZPR#RegWidth#AsmOpndExtLSL8 : ZPRExtendAsmOperand<"LSL", RegWidth, 8>; - def ZPR#RegWidth#AsmOpndExtLSL16 : ZPRExtendAsmOperand<"LSL", RegWidth, 16>; - def ZPR#RegWidth#AsmOpndExtLSL32 : ZPRExtendAsmOperand<"LSL", RegWidth, 32>; - def ZPR#RegWidth#AsmOpndExtLSL64 : ZPRExtendAsmOperand<"LSL", RegWidth, 64>; - def ZPR#RegWidth#ExtLSL8 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 8>; - def ZPR#RegWidth#ExtLSL16 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 16>; - def ZPR#RegWidth#ExtLSL32 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 32>; - def ZPR#RegWidth#ExtLSL64 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 64>; -} - -class GPR64ShiftExtendAsmOperand : AsmOperandClass { - let Name = AsmOperandName # Scale; - let PredicateMethod = "isGPR64WithShiftExtend"; - let DiagnosticType = "Invalid" # AsmOperandName # Scale; - let RenderMethod = "addRegOperands"; - let ParserMethod = "tryParseGPROperand"; -} - -class GPR64ExtendRegisterOperand : RegisterOperand{ - let ParserMatchClass = !cast(Name); - let PrintMethod = "printRegWithShiftExtend"; -} - -foreach Scale = [8, 16, 32, 64, 128] in { - def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">; - def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>; - - def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">; - def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>; -} - -// Accumulator array tiles. -def ZAQ0 : AArch64Reg<0, "za0.q">; -def ZAQ1 : AArch64Reg<1, "za1.q">; -def ZAQ2 : AArch64Reg<2, "za2.q">; -def ZAQ3 : AArch64Reg<3, "za3.q">; -def ZAQ4 : AArch64Reg<4, "za4.q">; -def ZAQ5 : AArch64Reg<5, "za5.q">; -def ZAQ6 : AArch64Reg<6, "za6.q">; -def ZAQ7 : AArch64Reg<7, "za7.q">; -def ZAQ8 : AArch64Reg<8, "za8.q">; -def ZAQ9 : AArch64Reg<9, "za9.q">; -def ZAQ10 : AArch64Reg<10, "za10.q">; -def ZAQ11 : AArch64Reg<11, "za11.q">; -def ZAQ12 : AArch64Reg<12, "za12.q">; -def ZAQ13 : AArch64Reg<13, "za13.q">; -def ZAQ14 : AArch64Reg<14, "za14.q">; -def ZAQ15 : AArch64Reg<15, "za15.q">; - -let SubRegIndices = [zasubq0, zasubq1] in { - def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>; - def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>; - def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>; - def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>; - def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>; - def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>; - def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>; - def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>; -} - -let SubRegIndices = [zasubd0, zasubd1] in { - def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>; - def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>; - def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>; - def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>; -} - -let SubRegIndices = [zasubs0, zasubs1] in { - def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>; - def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>; -} - -let SubRegIndices = [zasubh0, zasubh1] in { - def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>; -} - -let SubRegIndices = [zasubb] in { - def ZA : AArch64Reg<0, "za", [ZAB0]>; -} - -// SME Register Classes - -// Accumulator array -def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> { - let Size = 2048; -} - -// Accumulator array as single tiles -def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> { - let Size = 2048; -} -def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> { - let Size = 1024; -} -def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> { - let Size = 512; -} -def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> { - let Size = 256; -} -def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> { - let Size = 128; -} - -// SME Register Operands -// There are three types of SME matrix register operands: -// * Tiles: -// -// These tiles make up the larger accumulator matrix. The tile representation -// has an element type suffix, e.g. za0.b or za15.q and can be any of the -// registers: -// ZAQ0..ZAQ15 -// ZAD0..ZAD7 -// ZAS0..ZAS3 -// ZAH0..ZAH1 -// or ZAB0 -// -// * Tile vectors: -// -// Their representation is similar to regular tiles, but they have an extra -// 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile, -// horizontally or vertically. -// -// e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and -// ZAQ15, respectively. The horizontal/vertical is more a property of the -// instruction, than a property of the asm-operand itself, or its register. -// The distinction is required for the parsing/printing of the operand, -// as from a compiler's perspective, the whole tile is read/written. -// -// * Accumulator matrix: -// -// This is the entire matrix accumulator register ZA (<=> ZAB0), printed as -// 'za'. - -// -// Tiles -// - -class MatrixTileAsmOperand : AsmOperandClass { - let Name = "MatrixTile" # EltSize; - let DiagnosticType = "Invalid" # Name; - let ParserMethod = "tryParseMatrixRegister"; - let RenderMethod = "addMatrixOperands"; - let PredicateMethod = "isMatrixRegOperand<" - # "MatrixKind::Tile" # ", " - # EltSize # ", AArch64::" # RC # "RegClassID>"; -} - -class MatrixTileOperand - : RegisterOperand { - let ParserMatchClass = MatrixTileAsmOperand(RC), EltSize>; - let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">"; - let PrintMethod = "printMatrixTile"; -} - -def TileOp32 : MatrixTileOperand<32, 2, MPR32>; -def TileOp64 : MatrixTileOperand<64, 3, MPR64>; - -// -// Tile vectors (horizontal and vertical) -// - -class MatrixTileVectorAsmOperand - : AsmOperandClass { - let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize; - let DiagnosticType = "Invalid" # Name; - let ParserMethod = "tryParseMatrixRegister"; - let RenderMethod = "addMatrixOperands"; - let PredicateMethod = "isMatrixRegOperand<" - # "MatrixKind::" - # !if(IsVertical, "Col", "Row") # ", " - # EltSize # ", AArch64::" # RC # "RegClassID>"; -} - -class MatrixTileVectorOperand - : RegisterOperand { - let ParserMatchClass = MatrixTileVectorAsmOperand(RC), EltSize, - IsVertical>; - let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">"; - let PrintMethod = "printMatrixTileVector<" # IsVertical # ">"; -} - -def TileVectorOpH8 : MatrixTileVectorOperand< 8, 0, MPR8, 0>; -def TileVectorOpH16 : MatrixTileVectorOperand< 16, 1, MPR16, 0>; -def TileVectorOpH32 : MatrixTileVectorOperand< 32, 2, MPR32, 0>; -def TileVectorOpH64 : MatrixTileVectorOperand< 64, 3, MPR64, 0>; -def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>; - -def TileVectorOpV8 : MatrixTileVectorOperand< 8, 0, MPR8, 1>; -def TileVectorOpV16 : MatrixTileVectorOperand< 16, 1, MPR16, 1>; -def TileVectorOpV32 : MatrixTileVectorOperand< 32, 2, MPR32, 1>; -def TileVectorOpV64 : MatrixTileVectorOperand< 64, 3, MPR64, 1>; -def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>; - -// -// Accumulator matrix -// - -class MatrixAsmOperand : AsmOperandClass { - let Name = "Matrix"; - let DiagnosticType = "Invalid" # Name; - let ParserMethod = "tryParseMatrixRegister"; - let RenderMethod = "addMatrixOperands"; - let PredicateMethod = "isMatrixRegOperand<" - # "MatrixKind::Array" # ", " - # EltSize # ", AArch64::" # RC # "RegClassID>"; -} - -class MatrixOperand : RegisterOperand { - let ParserMatchClass = MatrixAsmOperand(RC), EltSize>; - let PrintMethod = "printMatrix<" # EltSize # ">"; -} - -def MatrixOp : MatrixOperand; - -class MatrixTileListAsmOperand : AsmOperandClass { - let Name = "MatrixTileList"; - let ParserMethod = "tryParseMatrixTileList"; - let RenderMethod = "addMatrixTileListOperands"; - let PredicateMethod = "isMatrixTileList"; -} - -class MatrixTileListOperand : Operand { - let ParserMatchClass = MatrixTileListAsmOperand<>; - let DecoderMethod = "DecodeMatrixTileListRegisterClass"; - let EncoderMethod = "EncodeMatrixTileListRegisterClass"; - let PrintMethod = "printMatrixTileList"; -} - -def MatrixTileList : MatrixTileListOperand<>; - -def MatrixIndexGPR32_12_15 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 12, 15)> { - let DiagnosticType = "InvalidMatrixIndexGPR32_12_15"; -} -def MatrixIndexGPR32Op12_15 : RegisterOperand { - let EncoderMethod = "encodeMatrixIndexGPR32"; -} - -def SVCROperand : AsmOperandClass { - let Name = "SVCR"; - let ParserMethod = "tryParseSVCR"; - let DiagnosticType = "Invalid" # Name; -} - -def svcr_op : Operand { - let ParserMatchClass = SVCROperand; - let PrintMethod = "printSVCROp"; - let DecoderMethod = "DecodeSVCROp"; - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr; - }]; -} diff --git a/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td b/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td deleted file mode 100644 index aacace64e9..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td +++ /dev/null @@ -1,143 +0,0 @@ -//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Scalable Matrix Extension (SME) Instruction definitions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Add vector elements horizontally or vertically to ZA tile. -//===----------------------------------------------------------------------===// - -let Predicates = [HasSME] in { -def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">; -def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">; -} - -let Predicates = [HasSMEI64] in { -def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">; -def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">; -} - -let Predicates = [HasSME] in { -//===----------------------------------------------------------------------===// -// Outer products -//===----------------------------------------------------------------------===// - -defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">; -defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">; - -def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">; -def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">; -} - -let Predicates = [HasSMEF64] in { -def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">; -def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">; -} - -let Predicates = [HasSME] in { -defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">; -defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">; - -def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">; -def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">; -def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">; -def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">; -def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">; -def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">; -def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">; -def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">; -} - -let Predicates = [HasSMEI64] in { -def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">; -def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">; -def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">; -def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">; -def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">; -def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">; -def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">; -def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">; -} - -let Predicates = [HasSME] in { -//===----------------------------------------------------------------------===// -// Loads and stores -//===----------------------------------------------------------------------===// - -defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">; -defm ST1_MXIPXX : sme_mem_st_ss<"st1">; - -//===----------------------------------------------------------------------===// -// Spill + fill -//===----------------------------------------------------------------------===// - -defm LDR_ZA : sme_fill<"ldr">; -defm STR_ZA : sme_spill<"str">; - -//===----------------------------------------------------------------------===// -// Move instructions -//===----------------------------------------------------------------------===// - -defm INSERT_MXIPZ : sme_vector_to_tile<"mova">; -defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">; - -//===----------------------------------------------------------------------===// -// Zero instruction -//===----------------------------------------------------------------------===// - -defm ZERO_M : sme_zero<"zero">; - -//===----------------------------------------------------------------------===// -// Mode selection and state access instructions -//===----------------------------------------------------------------------===// - -// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or -// both fields: -// -// MSR SVCRSM, # -// MSR SVCRZA, # -// MSR SVCRSMZA, # -// -// It's tricky to using the existing pstate operand defined in -// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, -// when these fields are also encoded in CRm[3:1]. -class MSRpstatesvcrImm0_1 - : PstateWriteSimple<(ins svcr_op:$pstatefield, imm0_1:$imm), "msr", - "\t$pstatefield, $imm">, - Sched<[WriteSys]> { - bits<3> pstatefield; - bit imm; - let Inst{18-16} = 0b011; // op1 - let Inst{11-9} = pstatefield; - let Inst{8} = imm; - let Inst{7-5} = 0b011; // op2 -} - -def MSRpstatesvcrImm1 : MSRpstatesvcrImm0_1; -def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; -def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; -def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; - -def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; -def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; -def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; - -//===----------------------------------------------------------------------===// -// SVE2 instructions -//===----------------------------------------------------------------------===// - -def REVD_ZPmZ : sve2_int_perm_revd<"revd">; - -defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>; -defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>; - -defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">; - -} // End let Predicates = [HasSME] diff --git a/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td b/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td deleted file mode 100644 index 2397a6d320..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td +++ /dev/null @@ -1,3195 +0,0 @@ -//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Scalable Vector Extension (SVE) Instruction definitions. -// -//===----------------------------------------------------------------------===// - -// For predicated nodes where the entire operation is controlled by a governing -// predicate, please stick to a similar naming convention as used for the -// ISD nodes: -// -// SDNode <=> AArch64ISD -// ------------------------------- -// _m <=> _MERGE_OP -// _mt <=> _MERGE_PASSTHRU -// _z <=> _MERGE_ZERO -// _p <=> _PRED -// -// Given the context of this file, it is not strictly necessary to use _p to -// distinguish predicated from unpredicated nodes given that most SVE -// instructions are predicated. - -// Contiguous loads - node definitions -// -def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; -def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; - -// Non-faulting & first-faulting loads - node definitions -// -def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -// Contiguous load and replicate - node definitions -// - -def SDT_AArch64_LD1Replicate : SDTypeProfile<1, 2, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def AArch64ld1rq_z : SDNode<"AArch64ISD::LD1RQ_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1ro_z : SDNode<"AArch64ISD::LD1RO_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; - -// Gather loads - node definitions -// -def SDT_AArch64_GATHER_SV : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_sxtw_z : SDNode<"AArch64ISD::GLD1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; - -def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_sxtw_z : SDNode<"AArch64ISD::GLD1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; - -def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; - -// Contiguous stores - node definitions -// -def SDT_AArch64_ST1 : SDTypeProfile<0, 4, [ - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, - SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2> -]>; - -def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>; - -// Scatter stores - node definitions -// -def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>, - SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> -]>; - -def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; - -def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; - -// AArch64 SVE/SVE2 - the remaining node definitions -// - -// SVE CNT/INC/RDVL -def sve_rdvl_imm : ComplexPattern">; -def sve_cnth_imm : ComplexPattern">; -def sve_cntw_imm : ComplexPattern">; -def sve_cntd_imm : ComplexPattern">; - -// SVE DEC -def sve_cnth_imm_neg : ComplexPattern">; -def sve_cntw_imm_neg : ComplexPattern">; -def sve_cntd_imm_neg : ComplexPattern">; - -def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; -def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; -def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; -def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; -def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; -def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; -def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>; -def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>; -def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; -def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; -def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; -def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; -def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; -def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; -def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; -def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; -def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; - -def SDT_AArch64Arith : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> -]>; - -def SDT_AArch64FMA : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4> -]>; - -// Predicated operations with the result of inactive lanes being unspecified. -def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; -def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; -def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; -def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; -def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; -def AArch64fmax_p : SDNode<"AArch64ISD::FMAX_PRED", SDT_AArch64Arith>; -def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>; -def AArch64fmin_p : SDNode<"AArch64ISD::FMIN_PRED", SDT_AArch64Arith>; -def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>; -def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; -def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; -def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; -def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; -def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>; -def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>; -def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; -def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; -def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; -def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>; -def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>; -def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>; -def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; -def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; -def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; -def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>; - -def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> -]>; - -def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>; - -def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4> -]>; - -// Predicated operations with the result of inactive lanes provided by the last operand. -def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; -def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; -def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>; - -// These are like the above but we don't yet have need for ISD nodes. They allow -// a single pattern to match intrinsic and ISD operand layouts. -def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>; -def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>; -def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>; - -def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<1,i1> -]>; - -def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, - SDTCVecEltisVT<1,i1> -]>; - -def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; -def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; - -def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; -def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; -def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; -def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; - -def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; - -def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>; -def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>; - -def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>; - -def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>; - -def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), - (AArch64mul_p node:$pred, node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; - -def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), - (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>; - -def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt), - (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{ - return N->getFlags().hasNoSignedZeros(); -}]>; - -def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, - SDTCisSameAs<0,1>, SDTCisSameAs<1,2> -]>; - -def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; - -def AArch64bic : PatFrags<(ops node:$op1, node:$op2), - [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))), - (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))), - (and node:$op1, (xor node:$op2, (SVEAllActive))), - (AArch64bic_node node:$op1, node:$op2)]>; - -let Predicates = [HasSVE] in { - defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; - def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; - defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>; - def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>; - def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>; - defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>; - defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>; - defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>; - defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>; - defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>; - - defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>; - defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>; - defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>; - defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>; - - defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>; - defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; - defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>; - - defm ADD_ZPZZ : sve_int_bin_pred_bhsd; - defm SUB_ZPZZ : sve_int_bin_pred_bhsd; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { - defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos - -let Predicates = [HasSVEorStreamingSVE] in { - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>; - - defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; - defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; - defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>; - defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>; - defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>; - defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>; - defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>; - - defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>; - defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>; - defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>; - defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>; - - // SVE predicated integer reductions. - defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>; - defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>; - defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>; - defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>; - defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>; - defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_p>; - defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_p>; - defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>; - defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>; - - defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>; - defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; - defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm BIC_ZI : sve_int_log_imm_bic; - - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; - defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>; - defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; - - defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>; - defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>; - defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>; - - defm MUL_ZPZZ : sve_int_bin_pred_bhsd; - defm SMULH_ZPZZ : sve_int_bin_pred_bhsd; - defm UMULH_ZPZZ : sve_int_bin_pred_bhsd; - - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", /*isReverseInstr*/ 1>; - - defm SDIV_ZPZZ : sve_int_bin_pred_sd; - defm UDIV_ZPZZ : sve_int_bin_pred_sd; - - defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", AArch64sdot>; - defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", AArch64udot>; - - defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; - defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; - - defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>; - defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>; - defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>; - defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>; - defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>; - defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>; - - defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>; - defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>; - defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", AArch64cnt_mt>; - defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", AArch64cnot_mt>; - defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", AArch64not_mt>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; - - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; - - defm SMAX_ZPZZ : sve_int_bin_pred_bhsd; - defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; - defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; - defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; - defm SABD_ZPZZ : sve_int_bin_pred_bhsd; - defm UABD_ZPZZ : sve_int_bin_pred_bhsd; - - defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>; - defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>; - - defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>; - defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>; - defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>; - defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; - defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; - defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>; - defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>; - defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>; - - defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FMIN_ZPZI : sve_fp_2op_i_p_zds_hfd; - - let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { - defm FADD_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; - } - - defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; - defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; - defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>; - defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; - defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>; - defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>; - defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>; - defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>; - defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>; - defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>; - defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>; - defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", /*isReverseInstr*/ 1>; - defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">; - - defm FADD_ZPZZ : sve_fp_bin_pred_hfd; - defm FSUB_ZPZZ : sve_fp_bin_pred_hfd; - defm FMUL_ZPZZ : sve_fp_bin_pred_hfd; - defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd; - defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd; - defm FMAX_ZPZZ : sve_fp_bin_pred_hfd; - defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; - defm FABD_ZPZZ : sve_fp_bin_pred_hfd; - defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { - defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; - defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos - -let Predicates = [HasSVEorStreamingSVE] in { - defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>; - defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; - defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", int_aarch64_sve_fmla, "FMAD_ZPmZZ">; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", int_aarch64_sve_fmls, "FMSB_ZPmZZ">; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ">; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ">; - - defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad, "FMLA_ZPmZZ", /*isReverseInstr*/ 1>; - defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb, "FMLS_ZPmZZ", /*isReverseInstr*/ 1>; - defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>; - defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>; - - defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; - - multiclass fma { - // Zd = Za + Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)), - (!cast("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zd = Za + -Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)), - (!cast("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zd = -Za + Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))), - (!cast("FNMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zd = -Za + -Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))), - (!cast("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zd = -(Za + Zn * Zm) - // (with nsz neg.) - def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))), - (!cast("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zda = Zda + Zn * Zm - def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), ZPR:$Zn, ZPR:$Zm, ZPR:$Za)), ZPR:$Za), - (!cast("FMLA_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - - // Zda = Zda + -Zn * Zm - def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), (AArch64fneg_mt (PredTy (AArch64ptrue 31)), Ty:$Zn, (Ty (undef))), ZPR:$Zm, ZPR:$Za)), ZPR:$Za), - (!cast("FMLS_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; - } - - defm : fma; - defm : fma; - defm : fma; - defm : fma; - defm : fma; - defm : fma; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; - defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; - - defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; - defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - // SVE floating point reductions. - defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>; - defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>; - defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>; - defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>; - defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>; - - // Splat immediate (unpredicated) - defm DUP_ZI : sve_int_dup_imm<"dup">; - defm FDUP_ZI : sve_int_dup_fpimm<"fdup">; - defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">; - - // Splat immediate (predicated) - defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">; - defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">; - defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; - - // Splat scalar register (unpredicated, GPR or vector + element index) - defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>; - defm DUP_ZZI : sve_int_perm_dup_i<"dup">; - - // Splat scalar register (predicated) - defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>; - defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>; - - // Duplicate FP scalar into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), - (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))), - (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))), - (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))), - (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))), - (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))), - (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), - (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - - // Duplicate +0.0 into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; - - // Duplicate Int immediate into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), - (DUP_ZI_B $a, $b)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), - (DUP_ZI_H $a, $b)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), - (DUP_ZI_S $a, $b)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm64 i32:$a, i32:$b)))), - (DUP_ZI_D $a, $b)>; - - // Duplicate immediate FP into all vector elements. - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), - (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), - (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), - (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; - - // Duplicate FP immediate into all vector elements - let AddedComplexity = 2 in { - def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)), - (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)), - (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)), - (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)), - (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)), - (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)), - (FDUP_ZI_D fpimm64:$imm8)>; - } - - // Select elements from either vector (predicated) - defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; - - defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; - defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; - defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; - - defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>; - defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>; - defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", AArch64revh_mt>; - defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", AArch64revw_mt>; - - defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>; - defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>; - - defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; - defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; - defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>; - defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>; - - defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>; - defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>; - - defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; - defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; - def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>; - defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>; - defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>; - defm BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs", null_frag>; - - defm BRKN_PPzP : sve_int_brkn<0b0, "brkn", int_aarch64_sve_brkn_z>; - defm BRKNS_PPzP : sve_int_brkn<0b1, "brkns", null_frag>; - - defm BRKA_PPzP : sve_int_break_z<0b000, "brka", int_aarch64_sve_brka_z>; - defm BRKA_PPmP : sve_int_break_m<0b001, "brka", int_aarch64_sve_brka>; - defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas", null_frag>; - defm BRKB_PPzP : sve_int_break_z<0b100, "brkb", int_aarch64_sve_brkb_z>; - defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>; - defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>; - - def PTEST_PP : sve_int_ptest<0b010000, "ptest">; - defm PFALSE : sve_int_pfalse<0b000000, "pfalse">; - defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>; - defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>; - - defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>; - defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>; - defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>; - defm SEL_PPPP : sve_int_pred_log_v2<0b0011, "sel", vselect, or>; - defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>; - defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>; - defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>; - defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>; - defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>; - defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>; - defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>; - defm ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs", null_frag>; - defm ORNS_PPzPP : sve_int_pred_log<0b1101, "orns", null_frag>; - defm NORS_PPzPP : sve_int_pred_log<0b1110, "nors", null_frag>; - defm NANDS_PPzPP : sve_int_pred_log<0b1111, "nands", null_frag>; - - defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>; - defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>; - defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>; - defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>; - defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>; - defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>; - - defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>; - defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>; - defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>; - defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>; - - // continuous load with reg+immediate - defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; - defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>; - defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>; - defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>; - defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>; - defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>; - defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>; - defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>; - defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>; - defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; - defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; - defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; - defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; - defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; - defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; - defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; - - // LD1R loads (splat scalar to vector) - defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; - defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>; - defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>; - defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>; - defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>; - defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>; - defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>; - defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>; - defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>; - defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>; - defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>; - defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>; - defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>; - defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>; - defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>; - defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>; - - // LD1RQ loads (load quadword-vector and splat to scalable vector) - defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>; - defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>; - defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>; - defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>; - defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; - - // continuous load with reg+reg addressing. - defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; - defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>; - defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>; - defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>; - defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>; - defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>; - defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>; - defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; - defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; - defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; - defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; - defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; - defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - // non-faulting continuous load with reg+immediate - defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>; - defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>; - defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>; - defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>; - defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>; - defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>; - defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>; - defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>; - defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>; - defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>; - defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>; - defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>; - defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>; - defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>; - defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>; - defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>; - - // First-faulting loads with reg+reg addressing. - defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>; - defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>; - defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>; - defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>; - defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>; - defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>; - defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>; - defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>; - defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>; - defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>; - defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>; - defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>; - defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>; - defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>; - defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>; - defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - // LD(2|3|4) structured loads with reg+immediate - defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>; - defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>; - defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4s4>; - defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h, "ld2h", simm4s2>; - defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h, "ld3h", simm4s3>; - defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4s4>; - defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s, "ld2w", simm4s2>; - defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s, "ld3w", simm4s3>; - defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4s4>; - defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d, "ld2d", simm4s2>; - defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d, "ld3d", simm4s3>; - defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4s4>; - - // LD(2|3|4) structured loads (register + register) - def LD2B : sve_mem_eld_ss<0b00, 0b01, ZZ_b, "ld2b", GPR64NoXZRshifted8>; - def LD3B : sve_mem_eld_ss<0b00, 0b10, ZZZ_b, "ld3b", GPR64NoXZRshifted8>; - def LD4B : sve_mem_eld_ss<0b00, 0b11, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>; - def LD2H : sve_mem_eld_ss<0b01, 0b01, ZZ_h, "ld2h", GPR64NoXZRshifted16>; - def LD3H : sve_mem_eld_ss<0b01, 0b10, ZZZ_h, "ld3h", GPR64NoXZRshifted16>; - def LD4H : sve_mem_eld_ss<0b01, 0b11, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>; - def LD2W : sve_mem_eld_ss<0b10, 0b01, ZZ_s, "ld2w", GPR64NoXZRshifted32>; - def LD3W : sve_mem_eld_ss<0b10, 0b10, ZZZ_s, "ld3w", GPR64NoXZRshifted32>; - def LD4W : sve_mem_eld_ss<0b10, 0b11, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>; - def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>; - def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; - def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - // Gathers using unscaled 32-bit offsets, e.g. - // ld1h z0.s, p0/z, [x0, z0.s, uxtw] - defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; - - // Gathers using scaled 32-bit offsets, e.g. - // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] - defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; - - // Gathers using 32-bit pointers with scaled offset, e.g. - // ld1h z0.s, p0/z, [z0.s, #16] - defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv4i8>; - defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv4i8>; - defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv4i8>; - defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv4i8>; - defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv4i32>; - - // Gathers using 64-bit pointers with scaled offset, e.g. - // ld1h z0.d, p0/z, [z0.d, #16] - defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm_z, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm_z, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm_z, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm_z, nxv2i64>; - - // Gathers using unscaled 64-bit offsets, e.g. - // ld1h z0.d, p0/z, [x0, z0.d] - defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_z, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_z, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather_z, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_z, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_z, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_z, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather_z, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_z, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_z, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_z, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather_z, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_z, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>; - - // Gathers using scaled 64-bit offsets, e.g. - // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] - defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; - - // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. - // ld1h z0.d, p0/z, [x0, z0.d, uxtw] - defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; - - // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. - // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] - defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - // Non-temporal contiguous loads (register + immediate) - defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; - defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; - defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>; - defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>; - - // Non-temporal contiguous loads (register + register) - defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; - - // contiguous store with immediates - defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>; - defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>; - defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>; - defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>; - defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>; - defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>; - defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; - defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; - defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; - defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; - - // contiguous store with reg+reg addressing. - defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>; - defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>; - defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>; - defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>; - defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; - defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; - defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - // Scatters using unpacked, unscaled 32-bit offsets, e.g. - // st1h z0.d, p0, [x0, z0.d, uxtw] - defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm SST1H_D : sve_mem_64b_sst_sv_32_unscaled<0b010, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm SST1W_D : sve_mem_64b_sst_sv_32_unscaled<0b100, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm SST1D : sve_mem_64b_sst_sv_32_unscaled<0b110, "st1d", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; - - // Scatters using packed, unscaled 32-bit offsets, e.g. - // st1h z0.s, p0, [x0, z0.s, uxtw] - defm SST1B_S : sve_mem_32b_sst_sv_32_unscaled<0b001, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm SST1H_S : sve_mem_32b_sst_sv_32_unscaled<0b011, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm SST1W : sve_mem_32b_sst_sv_32_unscaled<0b101, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; - - // Scatters using packed, scaled 32-bit offsets, e.g. - // st1h z0.s, p0, [x0, z0.s, uxtw #1] - defm SST1H_S : sve_mem_32b_sst_sv_32_scaled<0b011, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm SST1W : sve_mem_32b_sst_sv_32_scaled<0b101, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; - - // Scatters using unpacked, scaled 32-bit offsets, e.g. - // st1h z0.d, p0, [x0, z0.d, uxtw #1] - defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; - - // Scatters using 32/64-bit pointers with offset, e.g. - // st1h z0.s, p0, [z0.s, #16] - defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>; - defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>; - defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>; - - // Scatters using 32/64-bit pointers with offset, e.g. - // st1h z0.d, p0, [z0.d, #16] - defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>; - defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>; - defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>; - defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>; - - // Scatters using unscaled 64-bit offsets, e.g. - // st1h z0.d, p0, [x0, z0.d] - defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>; - defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>; - defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>; - defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>; - - // Scatters using scaled 64-bit offsets, e.g. - // st1h z0.d, p0, [x0, z0.d, lsl #1] - defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; - defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; - defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - // ST(2|3|4) structured stores (register + immediate) - defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; - defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; - defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>; - defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>; - defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>; - defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>; - defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>; - defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>; - defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>; - defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>; - defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>; - defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>; - - // ST(2|3|4) structured stores (register + register) - def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>; - def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>; - def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>; - def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>; - def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>; - def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>; - def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>; - def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>; - def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>; - def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>; - def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>; - def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>; - - // Non-temporal contiguous stores (register + immediate) - defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>; - defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>; - defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>; - defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>; - - // Non-temporal contiguous stores (register + register) - defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; - - // Fill/Spill - defm LDR_ZXI : sve_mem_z_fill<"ldr">; - defm LDR_PXI : sve_mem_p_fill<"ldr">; - defm STR_ZXI : sve_mem_z_spill<"str">; - defm STR_PXI : sve_mem_p_spill<"str">; - - // Contiguous prefetch (register + immediate) - defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">; - defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">; - defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">; - defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">; - - // Contiguous prefetch (register + register) - def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>; - def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>; - def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; - def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>; - - multiclass sve_prefetch { - // reg + imm - let AddedComplexity = 2 in { - def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)), - (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, simm6s1:$offset)>; - } - - // reg + reg - let AddedComplexity = 1 in { - def _reg_reg : Pat<(prefetch (PredTy PPR_3b:$gp), (AddrCP GPR64sp:$base, GPR64:$index), (i32 sve_prfop:$prfop)), - (RegRegInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, GPR64:$index)>; - } - - // default fallback - def _default : Pat<(prefetch (PredTy PPR_3b:$gp), GPR64:$base, (i32 sve_prfop:$prfop)), - (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>; - } - - defm : sve_prefetch; - defm : sve_prefetch; - defm : sve_prefetch; - defm : sve_prefetch; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - // Gather prefetch using scaled 32-bit offsets, e.g. - // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] - defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>; - defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>; - defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>; - defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>; - - // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. - // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] - defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>; - defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>; - defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>; - defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>; - - // Gather prefetch using scaled 64-bit offsets, e.g. - // prfh pldl1keep, p0, [x0, z0.d, lsl #1] - defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8, int_aarch64_sve_prfb_gather_index>; - defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16, int_aarch64_sve_prfh_gather_index>; - defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32, int_aarch64_sve_prfw_gather_index>; - defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64, int_aarch64_sve_prfd_gather_index>; - - // Gather prefetch using 32/64-bit pointers with offset, e.g. - // prfh pldl1keep, p0, [z0.s, #16] - // prfh pldl1keep, p0, [z0.d, #16] - defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>; - defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>; - defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>; - defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>; - - defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>; - defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>; - defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>; - defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>; - - defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; - defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; - defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">; - defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">; - - def : Pat<(nxv4i32 (int_aarch64_sve_adrb nxv4i32:$Op1, nxv4i32:$Op2)), - (ADR_LSL_ZZZ_S_0 $Op1, $Op2)>; - def : Pat<(nxv4i32 (int_aarch64_sve_adrh nxv4i32:$Op1, nxv4i32:$Op2)), - (ADR_LSL_ZZZ_S_1 $Op1, $Op2)>; - def : Pat<(nxv4i32 (int_aarch64_sve_adrw nxv4i32:$Op1, nxv4i32:$Op2)), - (ADR_LSL_ZZZ_S_2 $Op1, $Op2)>; - def : Pat<(nxv4i32 (int_aarch64_sve_adrd nxv4i32:$Op1, nxv4i32:$Op2)), - (ADR_LSL_ZZZ_S_3 $Op1, $Op2)>; - - def : Pat<(nxv2i64 (int_aarch64_sve_adrb nxv2i64:$Op1, nxv2i64:$Op2)), - (ADR_LSL_ZZZ_D_0 $Op1, $Op2)>; - def : Pat<(nxv2i64 (int_aarch64_sve_adrh nxv2i64:$Op1, nxv2i64:$Op2)), - (ADR_LSL_ZZZ_D_1 $Op1, $Op2)>; - def : Pat<(nxv2i64 (int_aarch64_sve_adrw nxv2i64:$Op1, nxv2i64:$Op2)), - (ADR_LSL_ZZZ_D_2 $Op1, $Op2)>; - def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)), - (ADR_LSL_ZZZ_D_3 $Op1, $Op2)>; - - // Patterns to generate adr instruction. - // adr z0.d, [z0.d, z0.d, uxtw] - def : Pat<(add nxv2i64:$Op1, - (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))), - (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>; - // adr z0.d, [z0.d, z0.d, sxtw] - def : Pat<(add nxv2i64:$Op1, - (nxv2i64 (sext_inreg nxv2i64:$Op2, nxv2i32))), - (ADR_SXTW_ZZZ_D_0 $Op1, $Op2)>; - - // adr z0.s, [z0.s, z0.s, lsl #] - // adr z0.d, [z0.d, z0.d, lsl #] - multiclass adrShiftPat { - def : Pat<(add Ty:$Op1, - (Ty (AArch64lsl_p (PredTy (SVEAllActive)), - Ty:$Op2, - (Ty (AArch64dup (ShiftTy ShiftAmt)))))), - (DestAdrIns $Op1, $Op2)>; - } - defm : adrShiftPat; - defm : adrShiftPat; - defm : adrShiftPat; - defm : adrShiftPat; - defm : adrShiftPat; - defm : adrShiftPat; - - // adr z0.d, [z0.d, z0.d, uxtw #] - // adr z0.d, [z0.d, z0.d, sxtw #] - multiclass adrXtwShiftPat { - def : Pat<(add Ty:$Op1, - (Ty (AArch64lsl_p (PredTy (SVEAllActive)), - (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))), - (Ty (AArch64dup (i64 ShiftAmt)))))), - (!cast("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; - - def : Pat<(add Ty:$Op1, - (Ty (AArch64lsl_p (PredTy (SVEAllActive)), - (Ty (sext_inreg Ty:$Op2, nxv2i32)), - (Ty (AArch64dup (i64 ShiftAmt)))))), - (!cast("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; - } - defm : adrXtwShiftPat; - defm : adrXtwShiftPat; - defm : adrXtwShiftPat; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; - - defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; - defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>; - defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>; - defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2", AArch64uzp2>; - defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>; - defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>; - - defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>; - defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>; - defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>; - defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>; - defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>; - defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; - - // Extract lo/hi halves of legal predicate types. - def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP PPR:$Ps)>; - def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), - (PUNPKHI_PP PPR:$Ps)>; - def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP PPR:$Ps)>; - def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), - (PUNPKHI_PP PPR:$Ps)>; - def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP PPR:$Ps)>; - def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), - (PUNPKHI_PP PPR:$Ps)>; - - def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; - def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))), - (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; - def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), - (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; - def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))), - (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; - - def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; - def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), - (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; - def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), - (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; - def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), - (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; - - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), - (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))), - (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), - (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))), - (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), - (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))), - (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), - (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; - def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))), - (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; - - // Extract subvectors from FP SVE vectors - def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_D ZPR:$Zs)>; - def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_D ZPR:$Zs)>; - def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_S ZPR:$Zs)>; - def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), - (UUNPKHI_ZZ_S ZPR:$Zs)>; - def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_D ZPR:$Zs)>; - def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_D ZPR:$Zs)>; - - def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_D ZPR:$Zs)>; - def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_D ZPR:$Zs)>; - def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_S ZPR:$Zs)>; - def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), - (UUNPKHI_ZZ_S ZPR:$Zs)>; - - def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), - (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))), - (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; - - def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), - (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; - def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))), - (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; - - // Concatenate two predicates. - def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), - (UZP1_PPP_S $p1, $p2)>; - def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)), - (UZP1_PPP_H $p1, $p2)>; - def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), - (UZP1_PPP_B $p1, $p2)>; - - // Concatenate two floating point vectors. - def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), - (UZP1_ZZZ_S $v1, $v2)>; - def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), - (UZP1_ZZZ_H $v1, $v2)>; - def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), - (UZP1_ZZZ_S $v1, $v2)>; - def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)), - (UZP1_ZZZ_S $v1, $v2)>; - def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)), - (UZP1_ZZZ_H $v1, $v2)>; - - // Splice with lane equal to -1 - def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 -1))), - (INSR_ZV_B ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), - (LASTB_VPZ_B (PTRUE_B 31), ZPR:$Z1), bsub))>; - def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 -1))), - (INSR_ZV_H ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), - (LASTB_VPZ_H (PTRUE_H 31), ZPR:$Z1), hsub))>; - def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 -1))), - (INSR_ZV_S ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), - (LASTB_VPZ_S (PTRUE_S 31), ZPR:$Z1), ssub))>; - def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 -1))), - (INSR_ZV_D ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), - (LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>; - - // Splice with lane bigger or equal to 0 - def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - - defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; - defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; - defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; - defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", SETGT, SETLT>; - defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", SETEQ, SETEQ>; - defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", SETNE, SETNE>; - - defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>; - defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>; - defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge", int_aarch64_sve_cmpge_wide>; - defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt", int_aarch64_sve_cmpgt_wide>; - defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt", int_aarch64_sve_cmplt_wide>; - defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple", int_aarch64_sve_cmple_wide>; - defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs", int_aarch64_sve_cmphs_wide>; - defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi", int_aarch64_sve_cmphi_wide>; - defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>; - defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>; - - defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, SETLE>; - defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, SETLT>; - defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, SETGT>; - defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, SETGE>; - defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, SETEQ>; - defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, SETEQ>; - defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, SETULE>; - defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, SETULT>; - defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>; - defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>; - - defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; - defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; - defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>; - defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>; - defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>; - defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>; - - defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; - defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; - defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>; - defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>; - defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>; - - defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; - defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>; - defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>; - defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>; - - defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; - defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>; - defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>; - defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>; - - def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; - def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; - def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; - def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; - - def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; - def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; - def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; - - defm CNTB_XPiI : sve_int_count<0b000, "cntb", int_aarch64_sve_cntb>; - defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>; - defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; - defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; - defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>; -} - - defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>; - defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>; - defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>; - defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>; - defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>; - defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>; - defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>; - defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>; - -let Predicates = [HasSVEorStreamingSVE] in { - defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; - defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; - defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; - defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb", int_aarch64_sve_uqdecb_n32>; - defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb", int_aarch64_sve_sqincb_n64>; - defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb", int_aarch64_sve_uqincb_n64>; - defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb", int_aarch64_sve_sqdecb_n64>; - defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb", int_aarch64_sve_uqdecb_n64>; - - defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch", int_aarch64_sve_sqinch_n32>; - defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch", int_aarch64_sve_uqinch_n32>; - defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech", int_aarch64_sve_sqdech_n32>; - defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech", int_aarch64_sve_uqdech_n32>; - defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch", int_aarch64_sve_sqinch_n64>; - defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch", int_aarch64_sve_uqinch_n64>; - defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech", int_aarch64_sve_sqdech_n64>; - defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech", int_aarch64_sve_uqdech_n64>; - - defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw", int_aarch64_sve_sqincw_n32>; - defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw", int_aarch64_sve_uqincw_n32>; - defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw", int_aarch64_sve_sqdecw_n32>; - defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw", int_aarch64_sve_uqdecw_n32>; - defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw", int_aarch64_sve_sqincw_n64>; - defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw", int_aarch64_sve_uqincw_n64>; - defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw", int_aarch64_sve_sqdecw_n64>; - defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw", int_aarch64_sve_uqdecw_n64>; - - defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd", int_aarch64_sve_sqincd_n32>; - defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd", int_aarch64_sve_uqincd_n32>; - defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd", int_aarch64_sve_sqdecd_n32>; - defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd", int_aarch64_sve_uqdecd_n32>; - defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd", int_aarch64_sve_sqincd_n64>; - defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd", int_aarch64_sve_uqincd_n64>; - defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd", int_aarch64_sve_sqdecd_n64>; - defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd", int_aarch64_sve_uqdecd_n64>; - - defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16, int_aarch64_sve_sqinch, nxv8i16>; - defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16, int_aarch64_sve_uqinch, nxv8i16>; - defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16, int_aarch64_sve_sqdech, nxv8i16>; - defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16, int_aarch64_sve_uqdech, nxv8i16>; - defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>; - defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>; - defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32, int_aarch64_sve_sqincw, nxv4i32>; - defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32, int_aarch64_sve_uqincw, nxv4i32>; - defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32, int_aarch64_sve_sqdecw, nxv4i32>; - defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32, int_aarch64_sve_uqdecw, nxv4i32>; - defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>; - defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>; - defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64, int_aarch64_sve_sqincd, nxv2i64>; - defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64, int_aarch64_sve_uqincd, nxv2i64>; - defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64, int_aarch64_sve_sqdecd, nxv2i64>; - defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64, int_aarch64_sve_uqdecd, nxv2i64>; - defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>; - defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>; - - defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp", int_aarch64_sve_sqincp_n32>; - defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp", int_aarch64_sve_sqincp_n64>; - defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp", int_aarch64_sve_uqincp_n32>; - defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp", int_aarch64_sve_uqincp_n64>; - defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp", int_aarch64_sve_sqdecp_n32>; - defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp", int_aarch64_sve_sqdecp_n64>; - defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp", int_aarch64_sve_uqdecp_n32>; - defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp", int_aarch64_sve_uqdecp_n64>; - defm INCP_XP : sve_int_count_r_x64<0b10000, "incp", null_frag, add>; - defm DECP_XP : sve_int_count_r_x64<0b10100, "decp", null_frag, sub>; - - defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp", int_aarch64_sve_sqincp>; - defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp", int_aarch64_sve_uqincp>; - defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp", int_aarch64_sve_sqdecp>; - defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp", int_aarch64_sve_uqdecp>; - defm INCP_ZP : sve_int_count_v<0b10000, "incp">; - defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - - defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>; - defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>; - defm INDEX_RI : sve_int_index_ri<"index">; - defm INDEX_II : sve_int_index_ii<"index">; - - // Unpredicated shifts - defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; - defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>; - defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>; - - defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr", int_aarch64_sve_asr_wide>; - defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr", int_aarch64_sve_lsr_wide>; - defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl", int_aarch64_sve_lsl_wide>; - - // Predicated shifts - defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; - defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; - defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>; - - defm ASR_ZPZI : sve_int_shift_pred_bhsd; - defm LSR_ZPZI : sve_int_shift_pred_bhsd; - defm LSL_ZPZI : sve_int_shift_pred_bhsd; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { - defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos - -let Predicates = [HasSVEorStreamingSVE] in { - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; - defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>; - defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>; - defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>; - - defm ASR_ZPZZ : sve_int_bin_pred_bhsd; - defm LSR_ZPZZ : sve_int_bin_pred_bhsd; - defm LSL_ZPZZ : sve_int_bin_pred_bhsd; - - defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; - defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; - defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; - - defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; - defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - - //These patterns exist to improve the code quality of conversions on unpacked types. - def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), - (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. - // This is ignored by the pattern below where it is matched by (i64 timm0_1) - def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), - (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - // Signed integer -> Floating-point - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg), - (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), - (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), - (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - // Unsigned integer -> Floating-point - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg), - (and (nxv4i32 ZPR:$Zs), - (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), - (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), - (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; - defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; - defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>; - defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>; - defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>; - defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>; - defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>; - defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>; - defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>; -} // End HasSVEorStreamingSVE - -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { - defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; -} // End HasBF16, HasSVEorStreamingSVE - -let Predicates = [HasBF16, HasSVE] in { - defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>; -} // End HasBF16, HasSVE - -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { - defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; - defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>; - defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; - defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; - defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>; - defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; -} // End HasBF16, HasSVEorStreamingSVE - -let Predicates = [HasSVEorStreamingSVE] in { - // InstAliases - def : InstAlias<"mov $Zd, $Zn", - (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; - def : InstAlias<"mov $Pd, $Pg/m, $Pn", - (SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>; - def : InstAlias<"mov $Pd, $Pn", - (ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; - def : InstAlias<"mov $Pd, $Pg/z, $Pn", - (AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; - - def : InstAlias<"movs $Pd, $Pn", - (ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; - def : InstAlias<"movs $Pd, $Pg/z, $Pn", - (ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; - - def : InstAlias<"not $Pd, $Pg/z, $Pn", - (EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; - - def : InstAlias<"nots $Pd, $Pg/z, $Pn", - (EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; - - def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", - (CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; - def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", - (CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", - (CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", - (CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", - (CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; - def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", - (CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", - (CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", - (CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", - (CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; - def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", - (CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", - (CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", - (CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", - (CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; - def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", - (CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", - (CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", - (CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", - (FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", - (FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", - (FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", - (FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", - (FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", - (FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", - (FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", - (FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", - (FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", - (FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", - (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", - (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; - - // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4. - // These get expanded to individual LDR_ZXI/STR_ZXI instructions in - // AArch64ExpandPseudoInsts. - let mayLoad = 1, hasSideEffects = 0 in { - def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - } - let mayStore = 1, hasSideEffects = 0 in { - def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - } - - def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - - let AddedComplexity = 1 in { - class LD1RPat : - Pat<(vt (AArch64dup (index_vt (operator (CP GPR64:$base, immtype:$offset))))), - (load (ptrue 31), GPR64:$base, $offset)>; - } - - // LDR1 of 8-bit data - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - - // LDR1 of 16-bit data - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - - // LDR1 of 32-bit data - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - - // LDR1 of 64-bit data - def : LD1RPat; - - // LD1R of FP data - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - def : LD1RPat; - - // LD1R of 128-bit masked data - def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), - (LD1RQ_B_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), - (LD1RQ_H_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), - (LD1RQ_W_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), - (LD1RQ_D_IMM $gp, $base, (i64 0))>; - - def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), - (LD1RQ_B_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), - (LD1RQ_H_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), - (LD1RQ_W_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), - (LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>; - - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; - - // General case that we ideally never want to match. - def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>; - - let AddedComplexity = 5 in { - def : Pat<(vscale (i64 1)), (UBFMXri (RDVLI_XI 1), 4, 63)>; - def : Pat<(vscale (i64 -1)), (SBFMXri (RDVLI_XI -1), 4, 63)>; - - def : Pat<(vscale (sve_rdvl_imm i32:$imm)), (RDVLI_XI $imm)>; - def : Pat<(vscale (sve_cnth_imm i32:$imm)), (CNTH_XPiI 31, $imm)>; - def : Pat<(vscale (sve_cntw_imm i32:$imm)), (CNTW_XPiI 31, $imm)>; - def : Pat<(vscale (sve_cntd_imm i32:$imm)), (CNTD_XPiI 31, $imm)>; - - def : Pat<(vscale (sve_cnth_imm_neg i32:$imm)), (SUBXrs XZR, (CNTH_XPiI 31, $imm), 0)>; - def : Pat<(vscale (sve_cntw_imm_neg i32:$imm)), (SUBXrs XZR, (CNTW_XPiI 31, $imm), 0)>; - def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>; - } - - let AddedComplexity = 5 in { - def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), - (ADDVL_XXI GPR64:$op, $imm)>; - - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))), - (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), $imm), - sub_32))>; - - def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), - (INCH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), - (INCW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), - (INCD_ZPiI ZPR:$op, 31, $imm)>; - - def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), - (DECH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), - (DECW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), - (DECD_ZPiI ZPR:$op, 31, $imm)>; - } - - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in { - def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))), - (INCH_XPiI GPR64:$op, 31, $imm)>; - def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))), - (INCW_XPiI GPR64:$op, 31, $imm)>; - def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))), - (INCD_XPiI GPR64:$op, 31, $imm)>; - - def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))), - (DECH_XPiI GPR64:$op, 31, $imm)>; - def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))), - (DECW_XPiI GPR64:$op, 31, $imm)>; - def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))), - (DECD_XPiI GPR64:$op, 31, $imm)>; - - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))), - (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))), - (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))), - (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))), - (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))), - (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))), - (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$op, sub_32), 31, $imm), - sub_32))>; - } - - def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), - (ADDVL_XXI GPR64:$op, $imm)>; - - // FIXME: BigEndian requires an additional REV instruction to satisfy the - // constraint that none of the bits change when stored to memory as one - // type, and and reloaded as another type. - let Predicates = [IsLE] in { - def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>; - - def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>; - - def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>; - - def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; - - def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>; - - def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>; - - def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; - - def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; - - def : Pat<(nxv16i8 (bitconvert (nxv8bf16 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - } - - // These allow casting from/to unpacked predicate types. - def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; - - // These allow casting from/to unpacked floating-point types. - def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - - def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), - (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; - def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)), - (AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>; - def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)), - (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>; - def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)), - (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; - - // Add more complex addressing modes here as required - multiclass pred_load { - let AddedComplexity = 1 in { - def _reg_reg_z : Pat<(Ty (Load (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), (SVEDup0Undef))), - (RegRegInst PPR:$gp, GPR64:$base, GPR64:$offset)>; - } - let AddedComplexity = 2 in { - def _reg_imm_z : Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp), (SVEDup0Undef))), - (RegImmInst PPR:$gp, GPR64:$base, simm4s1:$offset)>; - } - def _default_z : Pat<(Ty (Load GPR64:$base, (PredTy PPR:$gp), (SVEDup0Undef))), - (RegImmInst PPR:$gp, GPR64:$base, (i64 0))>; - } - - // 2-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - - // 4-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - - // 8-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - - // 16-element contiguous loads - defm : pred_load; - - multiclass pred_store { - let AddedComplexity = 1 in { - def _reg_reg : Pat<(Store (Ty ZPR:$vec), (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp)), - (RegRegInst ZPR:$vec, PPR:$gp, GPR64:$base, GPR64:$offset)>; - } - let AddedComplexity = 2 in { - def _reg_imm : Pat<(Store (Ty ZPR:$vec), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp)), - (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, simm4s1:$offset)>; - } - def _default : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp)), - (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>; - } - - // 2-element contiguous stores - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - - // 4-element contiguous stores - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - - // 8-element contiguous stores - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - - // 16-element contiguous stores - defm : pred_store; - - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - - multiclass unpred_store { - let AddedComplexity = 1 in { - def _reg : Pat<(Store (Ty ZPR:$val), (AddrCP GPR64sp:$base, GPR64:$offset)), - (RegRegInst ZPR:$val, (PTrue 31), GPR64sp:$base, GPR64:$offset)>; - } - let AddedComplexity = 2 in { - def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)), - (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - } - - def : Pat<(Store (Ty ZPR:$val), GPR64:$base), - (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; - } - - defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; - defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store; - defm : unpred_store; - defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_store; - defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; - defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv4bf16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv2bf16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; - defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; - - multiclass unpred_load { - let AddedComplexity = 1 in { - def _reg: Pat<(Ty (Load (AddrCP GPR64sp:$base, GPR64:$offset))), - (RegRegInst (PTrue 31), GPR64sp:$base, GPR64:$offset)>; - } - let AddedComplexity = 2 in { - def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))), - (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - } - - def : Pat<(Ty (Load GPR64:$base)), - (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; - } - - defm : unpred_load< load, nxv16i8, LD1B, LD1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; - defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H, LD1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; - defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S, LD1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; - defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D, LD1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; - defm : unpred_load< extloadvi8, nxv8i16, LD1B_H, LD1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; - defm : unpred_load< extloadvi8, nxv4i32, LD1B_S, LD1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; - defm : unpred_load< extloadvi8, nxv2i64, LD1B_D, LD1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; - defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H, LD1SB_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; - defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S, LD1SB_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; - defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D, LD1SB_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; - defm : unpred_load< load, nxv8i16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_load; - defm : unpred_load; - defm : unpred_load< extloadvi16, nxv4i32, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_load< extloadvi16, nxv2i64, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_load; - defm : unpred_load; - defm : unpred_load< load, nxv4i32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_load; - defm : unpred_load< extloadvi32, nxv2i64, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; - defm : unpred_load; - defm : unpred_load< load, nxv2i64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; - defm : unpred_load< load, nxv8f16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv8bf16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv4f16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv4bf16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv2f16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv2bf16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_load< load, nxv4f32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; - defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; - - // Allow using the reg+reg form of ld1b/st1b for memory accesses with the - // same width as nxv16i8. This saves an add in cases where we would - // otherwise compute the address separately. - multiclass unpred_loadstore_bitcast { - let Predicates = [IsLE] in { - def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))), - (LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; - def : Pat<(store (Ty ZPR:$val), (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)), - (ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; - } - } - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - defm : unpred_loadstore_bitcast; - - multiclass unpred_store_predicate { - def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), - (Store PPR:$val, GPR64sp:$base, simm9:$offset)>; - - def _default : Pat<(store (Ty PPR:$Val), GPR64:$base), - (Store PPR:$Val, GPR64:$base, (i64 0))>; - } - - defm Pat_Store_P16 : unpred_store_predicate; - - multiclass unpred_load_predicate { - def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))), - (Load GPR64sp:$base, simm9:$offset)>; - - def _default : Pat<(Ty (load GPR64:$base)), - (Load GPR64:$base, (i64 0))>; - } - - defm Pat_Load_P16 : unpred_load_predicate; - - multiclass ld1 { - // reg + reg - let AddedComplexity = 1 in { - def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)), - (RegRegInst PPR:$gp, GPR64sp:$base, GPR64:$offset)>; - } - - // scalar + immediate (mul vl) - let AddedComplexity = 2 in { - def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)), - (RegImmInst PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; - } - - // base - def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), - (RegImmInst PPR:$gp, GPR64sp:$base, (i64 0))>; - } - - // 2-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - - // 4-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - - // 8-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - - // 16-element contiguous loads - defm : ld1; -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE] in { - multiclass ldnf1 { - // scalar + immediate (mul vl) - let AddedComplexity = 1 in { - def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)), - (I PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; - } - - // base - def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), - (I PPR:$gp, GPR64sp:$base, (i64 0))>; - } - - // 2-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - - // 4-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - - // 8-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - - // 16-element contiguous non-faulting loads - defm : ldnf1; - - multiclass ldff1 { - // reg + reg - let AddedComplexity = 1 in { - def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)), - (I PPR:$gp, GPR64sp:$base, GPR64:$offset)>; - } - - // Base - def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), - (I PPR:$gp, GPR64sp:$base, XZR)>; - } - - // 2-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - - // 4-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - - // 8-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - - // 16-element contiguous first faulting loads - defm : ldff1; -} // End HasSVE - -let Predicates = [HasSVEorStreamingSVE] in { - multiclass st1 { - // reg + reg - let AddedComplexity = 1 in { - def : Pat<(Store (Ty ZPR:$vec), (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), MemVT), - (RegRegInst ZPR:$vec, PPR:$gp, GPR64sp:$base, GPR64:$offset)>; - } - - // scalar + immediate (mul vl) - let AddedComplexity = 2 in { - def : Pat<(Store (Ty ZPR:$vec), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp), MemVT), - (RegImmInst ZPR:$vec, PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; - } - - // base - def : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp), MemVT), - (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>; - } - - // 2-element contiguous store - defm : st1; - defm : st1; - defm : st1; - defm : st1; - - // 4-element contiguous store - defm : st1; - defm : st1; - defm : st1; - - // 8-element contiguous store - defm : st1; - defm : st1; - - // 16-element contiguous store - defm : st1; - - // Insert scalar into undef[0] - def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)), - (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>; - def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)), - (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>; - def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)), - (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; - def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)), - (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - - def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)), - (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; - def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)), - (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; - def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)), - (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; - def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)), - (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; - def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)), - (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; - def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)), - (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - - // Insert scalar into vector[0] - def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)), - (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>; - def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), (i32 GPR32:$src), 0)), - (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>; - def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), (i32 GPR32:$src), 0)), - (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>; - def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), (i64 GPR64:$src), 0)), - (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>; - - def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)), - (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>; - def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)), - (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>; - def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)), - (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>; - - // Insert scalar into vector with scalar index - def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), GPR32:$src, GPR64:$index)), - (CPY_ZPmR_B ZPR:$vec, - (CMPEQ_PPzZZ_B (PTRUE_B 31), - (INDEX_II_B 0, 1), - (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - GPR32:$src)>; - def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), GPR32:$src, GPR64:$index)), - (CPY_ZPmR_H ZPR:$vec, - (CMPEQ_PPzZZ_H (PTRUE_H 31), - (INDEX_II_H 0, 1), - (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - GPR32:$src)>; - def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), GPR32:$src, GPR64:$index)), - (CPY_ZPmR_S ZPR:$vec, - (CMPEQ_PPzZZ_S (PTRUE_S 31), - (INDEX_II_S 0, 1), - (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - GPR32:$src)>; - def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), GPR64:$src, GPR64:$index)), - (CPY_ZPmR_D ZPR:$vec, - (CMPEQ_PPzZZ_D (PTRUE_D 31), - (INDEX_II_D 0, 1), - (DUP_ZR_D GPR64:$index)), - GPR64:$src)>; - - // Insert FP scalar into vector with scalar index - def : Pat<(nxv2f16 (vector_insert (nxv2f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), - (CPY_ZPmV_H ZPR:$vec, - (CMPEQ_PPzZZ_D (PTRUE_D 31), - (INDEX_II_D 0, 1), - (DUP_ZR_D GPR64:$index)), - $src)>; - def : Pat<(nxv4f16 (vector_insert (nxv4f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), - (CPY_ZPmV_H ZPR:$vec, - (CMPEQ_PPzZZ_S (PTRUE_S 31), - (INDEX_II_S 0, 1), - (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - $src)>; - def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), - (CPY_ZPmV_H ZPR:$vec, - (CMPEQ_PPzZZ_H (PTRUE_H 31), - (INDEX_II_H 0, 1), - (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - $src)>; - def : Pat<(nxv2f32 (vector_insert (nxv2f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)), - (CPY_ZPmV_S ZPR:$vec, - (CMPEQ_PPzZZ_D (PTRUE_D 31), - (INDEX_II_D 0, 1), - (DUP_ZR_D GPR64:$index)), - $src) >; - def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)), - (CPY_ZPmV_S ZPR:$vec, - (CMPEQ_PPzZZ_S (PTRUE_S 31), - (INDEX_II_S 0, 1), - (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), - $src)>; - def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)), - (CPY_ZPmV_D ZPR:$vec, - (CMPEQ_PPzZZ_D (PTRUE_D 31), - (INDEX_II_D 0, 1), - (DUP_ZR_D $index)), - $src)>; - - // Extract element from vector with scalar index - def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; - def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; - - // Extract element from vector with immediate index - def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)), - (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>; - def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), - (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), ssub)>; - def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; - def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)), - (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; - def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), - (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; - def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; - def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), - (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; - def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; - def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)), - (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>; - def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)), - (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; - - // Extract element from vector with immediate index that's within the bottom 128-bits. - let AddedComplexity = 1 in { - def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), - (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; - def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), - (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; - def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)), - (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>; - def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)), - (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>; - } - - def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8), - (i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; - def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8), - (i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; - - def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16), - (i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; - def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16), - (i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; - - def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)), - (i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>; - - // Extract first element from vector. - let AddedComplexity = 2 in { - def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)), - (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; - def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)), - (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; - def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)), - (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; - def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)), - (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; - def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), - (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; - def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)), - (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; - def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)), - (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; - def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), - (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; - def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)), - (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; - def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), - (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; - } -} // End HasSVEorStreamingSVE - -let Predicates = [HasSVE, HasMatMulInt8] in { - defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; - defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>; - defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>; -} // End HasSVE, HasMatMulInt8 - -let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in { - defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>; - defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>; - defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>; -} // End HasSVEorStreamingSVE, HasMatMulInt8 - -let Predicates = [HasSVE, HasMatMulFP32] in { - defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>; -} // End HasSVE, HasMatMulFP32 - -let Predicates = [HasSVE, HasMatMulFP64] in { - defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>; - defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>; - defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>; - defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>; - defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro_z>; - defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro_z, am_sve_regreg_lsl0>; - defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>; - defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>; - defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>; -} // End HasSVE, HasMatMulFP64 - -let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in { - defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; - defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; - defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; - defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>; - defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>; - defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>; -} // End HasSVEorStreamingSVE, HasMatMulFP64 - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 integer multiply-add (indexed) - defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; - defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>; - - // SVE2 saturating multiply-add high (indexed) - defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah_lane>; - defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh_lane>; - - // SVE2 saturating multiply-add high (vectors, unpredicated) - defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah>; - defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh>; - - // SVE2 integer multiply (indexed) - defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul", int_aarch64_sve_mul_lane>; - - // SVE2 saturating multiply high (indexed) - defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh", int_aarch64_sve_sqdmulh_lane>; - defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh", int_aarch64_sve_sqrdmulh_lane>; - - // SVE2 signed saturating doubling multiply high (unpredicated) - defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>; - defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>; - - // SVE2 integer multiply vectors (unpredicated) - defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>; - defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag, AArch64smulh_p>; - defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag, AArch64umulh_p>; - defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>; - - // SVE2 complex integer dot product (indexed) - defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>; - - // SVE2 complex integer dot product - defm CDOT_ZZZ : sve2_cintx_dot<"cdot", int_aarch64_sve_cdot>; - - // SVE2 complex integer multiply-add (indexed) - defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla", int_aarch64_sve_cmla_lane_x>; - // SVE2 complex saturating multiply-add (indexed) - defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_lane_x>; - - // SVE2 complex integer multiply-add - defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla", int_aarch64_sve_cmla_x>; - defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; - - // SVE2 integer multiply long (indexed) - defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb", int_aarch64_sve_smullb_lane>; - defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt", int_aarch64_sve_smullt_lane>; - defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb", int_aarch64_sve_umullb_lane>; - defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt", int_aarch64_sve_umullt_lane>; - - // SVE2 saturating multiply (indexed) - defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb", int_aarch64_sve_sqdmullb_lane>; - defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt", int_aarch64_sve_sqdmullt_lane>; - - // SVE2 integer multiply-add long (indexed) - defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>; - defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt", int_aarch64_sve_smlalt_lane>; - defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb", int_aarch64_sve_umlalb_lane>; - defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt", int_aarch64_sve_umlalt_lane>; - defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb", int_aarch64_sve_smlslb_lane>; - defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt", int_aarch64_sve_smlslt_lane>; - defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb", int_aarch64_sve_umlslb_lane>; - defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt_lane>; - - // SVE2 integer multiply-add long (vectors, unpredicated) - defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb", int_aarch64_sve_smlalb>; - defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt", int_aarch64_sve_smlalt>; - defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb", int_aarch64_sve_umlalb>; - defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt", int_aarch64_sve_umlalt>; - defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb", int_aarch64_sve_smlslb>; - defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt", int_aarch64_sve_smlslt>; - defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb", int_aarch64_sve_umlslb>; - defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt", int_aarch64_sve_umlslt>; - - // SVE2 saturating multiply-add long (indexed) - defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", int_aarch64_sve_sqdmlalb_lane>; - defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", int_aarch64_sve_sqdmlalt_lane>; - defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb", int_aarch64_sve_sqdmlslb_lane>; - defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", int_aarch64_sve_sqdmlslt_lane>; - - // SVE2 saturating multiply-add long (vectors, unpredicated) - defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb", int_aarch64_sve_sqdmlalb>; - defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt", int_aarch64_sve_sqdmlalt>; - defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb", int_aarch64_sve_sqdmlslb>; - defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt", int_aarch64_sve_sqdmlslt>; - - // SVE2 saturating multiply-add interleaved long - defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt", int_aarch64_sve_sqdmlalbt>; - defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt", int_aarch64_sve_sqdmlslbt>; - - // SVE2 integer halving add/subtract (predicated) - defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", int_aarch64_sve_shadd>; - defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", int_aarch64_sve_uhadd>; - defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>; - defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>; - defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", int_aarch64_sve_srhadd>; - defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", int_aarch64_sve_urhadd>; - defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>; - defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>; - - // SVE2 integer pairwise add and accumulate long - defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>; - defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>; - - // SVE2 integer pairwise arithmetic - defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>; - defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>; - defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>; - defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>; - defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>; - - // SVE2 integer unary operations (predicated) - defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe", int_aarch64_sve_urecpe>; - defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte", int_aarch64_sve_ursqrte>; - defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs", int_aarch64_sve_sqabs>; - defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>; - - // SVE2 saturating add/subtract - defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>; - defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>; - defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>; - defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>; - defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>; - defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>; - defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>; - defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>; - - // SVE2 saturating/rounding bitwise shift left (predicated) - defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl, "SRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SRSHLR_ZPmZ">; - defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl, "URSHL_ZPZZ", DestructiveBinaryCommWithRev, "URSHLR_ZPmZ">; - defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag, "SRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SRSHL_ZPmZ", /*isReverseInstr*/ 1>; - defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag, "URSHLR_ZPZZ", DestructiveBinaryCommWithRev, "URSHL_ZPmZ", /*isReverseInstr*/ 1>; - defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl, "SQSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQSHLR_ZPmZ">; - defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl, "UQSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQSHLR_ZPmZ">; - defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl, "SQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHLR_ZPmZ">; - defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl, "UQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHLR_ZPmZ">; - defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag, "SQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQSHL_ZPmZ", /*isReverseInstr*/ 1>; - defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag, "UQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQSHL_ZPmZ", /*isReverseInstr*/ 1>; - defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>; - defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>; - - defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; - defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; - defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; - defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; - defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; - defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in { - defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; - defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; - defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; - defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; - defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; -} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; - - // SVE2 integer add/subtract long - defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; - defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>; - defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb", int_aarch64_sve_uaddlb>; - defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt", int_aarch64_sve_uaddlt>; - defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb", int_aarch64_sve_ssublb>; - defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt", int_aarch64_sve_ssublt>; - defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb", int_aarch64_sve_usublb>; - defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt", int_aarch64_sve_usublt>; - defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb", int_aarch64_sve_sabdlb>; - defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt", int_aarch64_sve_sabdlt>; - defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb", int_aarch64_sve_uabdlb>; - defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>; - - // SVE2 integer add/subtract wide - defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>; - defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>; - defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>; - defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>; - defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>; - defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>; - defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>; - defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt", int_aarch64_sve_usubwt>; - - // SVE2 integer multiply long - defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>; - defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt", int_aarch64_sve_sqdmullt>; - defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb", int_aarch64_sve_smullb>; - defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt", int_aarch64_sve_smullt>; - defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb", int_aarch64_sve_umullb>; - defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt", int_aarch64_sve_umullt>; - defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb", int_aarch64_sve_pmullb_pair>; - defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>; - - // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>; - defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>; - - // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>; - defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>; - defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; - defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; - - // SVE2 complex integer add - defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; - defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; - - // SVE2 integer absolute difference and accumulate - defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; - defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; - - // SVE2 integer absolute difference and accumulate long - defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>; - defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt", int_aarch64_sve_sabalt>; - defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb", int_aarch64_sve_uabalb>; - defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt", int_aarch64_sve_uabalt>; - - // SVE2 integer add/subtract long with carry - defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb", int_aarch64_sve_adclb>; - defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt", int_aarch64_sve_adclt>; - defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb", int_aarch64_sve_sbclb>; - defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt", int_aarch64_sve_sbclt>; - - // SVE2 bitwise shift right narrow (bottom) - defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>; - defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>; - defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>; - defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>; - defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>; - defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>; - defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>; - defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>; - - // SVE2 bitwise shift right narrow (top) - defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>; - defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>; - defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>; - defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>; - defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>; - defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>; - defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>; - defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>; - - // SVE2 integer add/subtract narrow high part (bottom) - defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>; - defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb", int_aarch64_sve_raddhnb>; - defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb", int_aarch64_sve_subhnb>; - defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb", int_aarch64_sve_rsubhnb>; - - // SVE2 integer add/subtract narrow high part (top) - defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt", int_aarch64_sve_addhnt>; - defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt", int_aarch64_sve_raddhnt>; - defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt", int_aarch64_sve_subhnt>; - defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt", int_aarch64_sve_rsubhnt>; - - // SVE2 saturating extract narrow (bottom) - defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb", int_aarch64_sve_sqxtnb>; - defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb", int_aarch64_sve_uqxtnb>; - defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb", int_aarch64_sve_sqxtunb>; - - // SVE2 saturating extract narrow (top) - defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>; - defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>; - defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2] in { - // SVE2 character match - defm MATCH_PPzZZ : sve2_char_match<0b0, "match", int_aarch64_sve_match>; - defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>; -} // End HasSVE2 - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 bitwise exclusive-or interleaved - defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>; - defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>; - - // SVE2 bitwise shift left long - defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb", int_aarch64_sve_sshllb>; - defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt", int_aarch64_sve_sshllt>; - defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb", int_aarch64_sve_ushllb>; - defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt", int_aarch64_sve_ushllt>; - - // SVE2 integer add/subtract interleaved long - defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; - defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; - defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2] in { - // SVE2 histogram generation (segment) - def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>; - - // SVE2 histogram generation (vector) - defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>; -} // End HasSVE2 - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 floating-point base 2 logarithm as integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>; - - // SVE2 floating-point convert precision - defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; - defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">; - defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; - defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; - - // SVE2 floating-point pairwise operations - defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp", int_aarch64_sve_faddp>; - defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp", int_aarch64_sve_fmaxnmp>; - defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp", int_aarch64_sve_fminnmp>; - defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp", int_aarch64_sve_fmaxp>; - defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp", int_aarch64_sve_fminp>; - - // SVE2 floating-point multiply-add long (indexed) - defm FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb", int_aarch64_sve_fmlalb_lane>; - defm FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt", int_aarch64_sve_fmlalt_lane>; - defm FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb", int_aarch64_sve_fmlslb_lane>; - defm FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt", int_aarch64_sve_fmlslt_lane>; - - // SVE2 floating-point multiply-add long - defm FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb", int_aarch64_sve_fmlalb>; - defm FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt", int_aarch64_sve_fmlalt>; - defm FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb", int_aarch64_sve_fmlslb>; - defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>; - - // SVE2 bitwise ternary operations - defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", int_aarch64_sve_eor3>; - defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>; - defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl>; - defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; - defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; - defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>; - - // SVE2 bitwise xor and rotate right by immediate - defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>; - - // SVE2 extract vector (immediate offset, constructive) - def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2] in { - // SVE2 non-temporal gather loads - defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>; - defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>; - defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv4i16>; - defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather_z, nxv4i16>; - defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather_z, nxv4i32>; - - defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv2i8>; - defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather_z, nxv2i8>; - defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv2i16>; - defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather_z, nxv2i16>; - defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>; - defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>; - defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>; -} // End HasSVE2 - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 vector splice (constructive) - defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2] in { - // SVE2 non-temporal scatter stores - defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>; - defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>; - defm STNT1W_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>; - - defm STNT1B_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>; - defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>; - defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>; - defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>; -} // End HasSVE2 - -let Predicates = [HasSVE2orStreamingSVE] in { - // SVE2 table lookup (three sources) - defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>; - defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>; - - // SVE2 integer compare scalar count and limit - defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>; - defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>; - defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs>; - defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi>; - - defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege>; - defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt>; - defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs>; - defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>; - - // SVE2 pointer conflict compare - defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">; - defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">; -} // End HasSVE2orStreamingSVE - -let Predicates = [HasSVE2AES] in { - // SVE2 crypto destructive binary operations - defm AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8, int_aarch64_sve_aese, nxv16i8>; - defm AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8, int_aarch64_sve_aesd, nxv16i8>; - - // SVE2 crypto unary operations - defm AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc", int_aarch64_sve_aesmc>; - defm AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc", int_aarch64_sve_aesimc>; - - // PMULLB and PMULLT instructions which operate with 64-bit source and - // 128-bit destination elements are enabled with crypto extensions, similar - // to NEON PMULL2 instruction. - defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>; - defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>; -} // End HasSVE2AES - -let Predicates = [HasSVE2SM4] in { - // SVE2 crypto constructive binary operations - defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>; - // SVE2 crypto destructive binary operations - defm SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32, int_aarch64_sve_sm4e, nxv4i32>; -} // End HasSVE2SM4 - -let Predicates = [HasSVE2SHA3] in { - // SVE2 crypto constructive binary operations - defm RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64, int_aarch64_sve_rax1, nxv2i64>; -} // End HasSVE2SHA3 - -let Predicates = [HasSVE2BitPerm] in { - // SVE2 bitwise permute - defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext", int_aarch64_sve_bext_x>; - defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep", int_aarch64_sve_bdep_x>; - defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp", int_aarch64_sve_bgrp_x>; -} // End HasSVE2BitPerm diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedA53.td b/suite/synctools/tablegen/AArch64/AArch64SchedA53.td deleted file mode 100644 index d18a05fda1..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedA53.td +++ /dev/null @@ -1,298 +0,0 @@ -//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM Cortex A53 processors. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedule.h for details. - -// Cortex-A53 machine model for scheduling and other instruction cost heuristics. -def CortexA53Model : SchedMachineModel { - let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order. - let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let LoadLatency = 3; // Optimistic load latency assuming bypass. - // This is overriden by OperandCycles if the - // Itineraries are queried instead. - let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation - // Specification - Instruction Timings" - // v 1.0 Spreadsheet - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the BufferSize = 0 since -// Cortex-A53 is in-order. - -def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU -def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC -def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division -def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store -def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch -def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU -def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -let SchedModel = CortexA53Model in { - -// ALU - Despite having a full latency of 4, most of the ALU instructions can -// forward a cycle earlier and then two cycles earlier in the case of a -// shift-only instruction. These latencies will be incorrect when the -// result cannot be forwarded, but modeling isn't rocket surgery. -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 3; } - -// MAC -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -// Div -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -// Load -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd -// below, choosing the median of 3 which makes the latency 6. -// May model this more carefully in the future. The remaining -// A53WriteVLD# types represent the 1-5 cycle issues explicitly. -def : WriteRes { let Latency = 6; - let ResourceCycles = [3]; } -def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } -def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; - let ResourceCycles = [2]; } -def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; - let ResourceCycles = [3]; } -def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7; - let ResourceCycles = [4]; } -def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8; - let ResourceCycles = [5]; } - -// Pre/Post Indexing - Performed as part of address generation which is already -// accounted for in the WriteST* latencies below -def : WriteRes { let Latency = 0; } - -// Store -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. -def : WriteRes { let Latency = 5; - let ResourceCycles = [2];} -def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } -def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; - let ResourceCycles = [2]; } -def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; - let ResourceCycles = [3]; } - -def : WriteRes { let Unsupported = 1; } - -// Branch -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; - -// FP ALU -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } - -// FP Mul, Div, Sqrt -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 33; - let ResourceCycles = [29]; } -def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; } -def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18; - let ResourceCycles = [14]; } -def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33; - let ResourceCycles = [29]; } -def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17; - let ResourceCycles = [13]; } -def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32; - let ResourceCycles = [28]; } - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -// No forwarding for these reads. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable -// operands are needed one cycle later if and only if they are to be -// shifted. Otherwise, they too are needed two cycles later. This same -// ReadAdvance applies to Extended registers as well, even though there is -// a separate SchedPredicate for them. -def : ReadAdvance; -def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def A53ReadISReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def A53ReadIEReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// MAC - Operands are generally needed one cycle later in the MAC pipe. -// Accumulator operands are needed two cycles later. -def : ReadAdvance; -def : ReadAdvance; - -// Div -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Subtarget-specific InstRWs. - -//--- -// Miscellaneous -//--- -def : InstRW<[WriteI], (instrs COPY)>; - -//--- -// Vector Loads -//--- -def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>; - -def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; - -//--- -// Vector Stores -//--- -def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; - -def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; -def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; - -//--- -// Floating Point MAC, DIV, SQRT -//--- -def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; -def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>; -def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; - -} diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedA55.td b/suite/synctools/tablegen/AArch64/AArch64SchedA55.td deleted file mode 100644 index 009219ce3c..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedA55.td +++ /dev/null @@ -1,361 +0,0 @@ -//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the ARM Cortex-A55 processors. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the per-operand machine model. -// This works with MachineScheduler. See MCSchedModel.h for details. - -// Cortex-A55 machine model for scheduling and other instruction cost heuristics. -def CortexA55Model : SchedMachineModel { - let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor - let IssueWidth = 2; // It dual-issues under most circumstances - let LoadLatency = 4; // Cycles for loads to access the cache. The - // optimisation guide shows that most loads have - // a latency of 3, but some have a latency of 4 - // or 5. Setting it 4 looked to be good trade-off. - let MispredictPenalty = 8; // A branch direction mispredict. - let PostRAScheduler = 1; // Enable PostRA scheduler pass. - let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. - - list UnsupportedFeatures = [HasSVE]; - - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the -// Cortex-A55 is in-order. - -def CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU -def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide -def CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined -def CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe -def CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe -def CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch - -// The FP DIV/SQRT instructions execute totally differently from the FP ALU -// instructions, which can mostly be dual-issued; that's why for now we model -// them with 2 resources. -def CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU -def CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC -def CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128 - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types - -let SchedModel = CortexA55Model in { - -// These latencies are modeled without taking into account forwarding paths -// (the software optimisation guide lists latencies taking into account -// typical forwarding paths). -def : WriteRes { let Latency = 3; } // MOVN, MOVZ -def : WriteRes { let Latency = 3; } // ALU -def : WriteRes { let Latency = 3; } // ALU of Shifted-Reg -def : WriteRes { let Latency = 3; } // ALU of Extended-Reg -def : WriteRes { let Latency = 3; } // EXTR from a reg pair -def : WriteRes { let Latency = 3; } // Shift/Scale - -// MAC -def : WriteRes { let Latency = 4; } // 32-bit Multiply -def : WriteRes { let Latency = 4; } // 64-bit Multiply - -// Div -def : WriteRes { - let Latency = 8; let ResourceCycles = [8]; -} -def : WriteRes { - let Latency = 8; let ResourceCycles = [8]; -} - -// Load -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } - -// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd -// below, choosing the median of 3 which makes the latency 6. -// An extra cycle is needed to get the swizzling right. -def : WriteRes { let Latency = 6; - let ResourceCycles = [3]; } -def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; } -def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; } -def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; - let ResourceCycles = [2]; } -def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6; - let ResourceCycles = [3]; } -def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7; - let ResourceCycles = [4]; } -def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8; - let ResourceCycles = [5]; } -def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9; - let ResourceCycles = [6]; } -def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10; - let ResourceCycles = [7]; } -def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11; - let ResourceCycles = [8]; } - -def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; } -def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; } -def CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; } - -// Pre/Post Indexing - Performed as part of address generation -def : WriteRes { let Latency = 0; } - -// Store -let RetireOOO = 1 in { -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -} -def : WriteRes { let Latency = 4; } - -// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. -def : WriteRes { let Latency = 5; - let ResourceCycles = [2];} -def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; } -def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; - let ResourceCycles = [2]; } -def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6; - let ResourceCycles = [3]; } -def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; - let ResourceCycles = [4]; } - -def : WriteRes { let Unsupported = 1; } - -// Branch -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; - -// FP ALU -// As WriteF result is produced in F5 and it can be mostly forwarded -// to consumer at F1, the effectively latency is set as 4. -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; let BeginGroup = 1; } - -// FP ALU specific new schedwrite definitions -def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} -def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;} -def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;} -def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;} - -// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined -def : WriteRes { let Latency = 4; } - -let RetireOOO = 1 in { -def : WriteRes { let Latency = 22; - let ResourceCycles = [29]; } -def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; } -def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; - let ResourceCycles = [5]; } -def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13; - let ResourceCycles = [10]; } -def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; - let ResourceCycles = [19]; } -def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; - let ResourceCycles = [5]; } -def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12; - let ResourceCycles = [9]; } -def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; - let ResourceCycles = [19]; } -} -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// ALU - ALU input operands are generally needed in EX1. An operand produced in -// in say EX2 can be forwarded for consumption to ALU in EX1, thereby -// allowing back-to-back ALU operations such as add. If an operand requires -// a shift, it will, however, be required in ISS stage. -def : ReadAdvance; -// Shifted operand -def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def CortexA55ReadISReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def CortexA55ReadIEReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// MUL -def : ReadAdvance; -def : ReadAdvance; - -// Div -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Subtarget-specific InstRWs. - -//--- -// Miscellaneous -//--- -def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>; -def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>; -def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>; -def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; -def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>; -def : InstRW<[WriteI], (instrs COPY)>; -//--- -// Vector Loads - 64-bit per cycle -//--- -// 1-element structures -def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element -def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate -def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures -def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; - -def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; - -// 2-element structures -def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; - -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; - -// 3-element structures -def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>; -def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>; - -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; - -// 4-element structures -def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs. -def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs. -def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs. -def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; - -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; - -//--- -// Vector Stores -//--- -def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; - -def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -//--- -// Floating Point Conversions, MAC, DIV, SQRT -//--- -def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>; -def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>; -def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>; -def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>; - -def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>; -def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>; -def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>; - -def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; -def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>; -def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>; -def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>; -def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>; -def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; - -} diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedA57.td b/suite/synctools/tablegen/AArch64/AArch64SchedA57.td deleted file mode 100644 index a860aa907f..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedA57.td +++ /dev/null @@ -1,690 +0,0 @@ -//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for ARM Cortex-A57 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The Cortex-A57 is a traditional superscalar microprocessor with a -// conservative 3-wide in-order stage for decode and dispatch. Combined with the -// much wider out-of-order issue stage, this produced a need to carefully -// schedule micro-ops so that all three decoded each cycle are successfully -// issued as the reservation station(s) simply don't stay occupied for long. -// Therefore, IssueWidth is set to the narrower of the two at three, while still -// modeling the machine as out-of-order. - -def CortexA57Model : SchedMachineModel { - let IssueWidth = 3; // 3-way decode and dispatch - let MicroOpBufferSize = 128; // 128 micro-op re-order buffer - let LoadLatency = 4; // Optimistic load latency - let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch - - // Enable partial & runtime unrolling. The magic number is chosen based on - // experiments and benchmarking data. - let LoopMicroOpBufferSize = 16; - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Cortex-A57. -// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where -// micro-ops wait for their operands and then issue out-of-order. - -def A57UnitB : ProcResource<1>; // Type B micro-ops -def A57UnitI : ProcResource<2>; // Type I micro-ops -def A57UnitM : ProcResource<1>; // Type M micro-ops -def A57UnitL : ProcResource<1>; // Type L micro-ops -def A57UnitS : ProcResource<1>; // Type S micro-ops -def A57UnitX : ProcResource<1>; // Type X micro-ops -def A57UnitW : ProcResource<1>; // Type W micro-ops -let SchedModel = CortexA57Model in { - def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops -} - -let SchedModel = CortexA57Model in { - -//===----------------------------------------------------------------------===// -// Define customized scheduler read/write types specific to the Cortex-A57. - -include "AArch64SchedA57WriteRes.td" - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latency for -// Cortex-A57. The Cortex-A57 types are directly associated with resources, so -// defining the aliases precludes the need for mapping them using WriteRes. The -// aliases are sufficient for creating a coarse, working model. As the model -// evolves, InstRWs will be used to override some of these SchedAliases. -// -// WARNING: Using SchedAliases is convenient and works well for latency and -// resource lookup for instructions. However, this creates an entry in -// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking -// any SchedReadAdvance since the lookup will fail. - -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 5; } -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : WriteRes { let Latency = 5;} -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -def : WriteRes { let Unsupported = 1; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 4; } - -// Forwarding logic is only modeled for multiply and accumulate -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - - -//===----------------------------------------------------------------------===// -// Specialize the coarse model by associating instruction groups with the -// subtarget-defined types. As the modeled is refined, this will override most -// of the above ShchedAlias mappings. - -// Miscellaneous -// ----------------------------------------------------------------------------- - -def : InstRW<[WriteI], (instrs COPY)>; - - -// Branch Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; -def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; - - -// Shifted Register with Shift == 0 -// ---------------------------------------------------------------------------- - -def A57WriteISReg : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; - - -// Divide and Multiply Instructions -// ----------------------------------------------------------------------------- - -// Multiply high -def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>; - - -// Miscellaneous Data-Processing Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>; -def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>; -def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>; - - -// Cryptography Extensions -// ----------------------------------------------------------------------------- - -def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>; -def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>; -def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>; -def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; -def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; -def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; -def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; -def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; -def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>; - - -// Vector Load -// ----------------------------------------------------------------------------- - -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; -def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>; -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>; -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>; -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>; -def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>; - -def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>; -def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>; -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>; -def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>; -def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>; -def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>; - -def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>; -def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>; -def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; - -def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>; -def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>; -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>; -def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>; -def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>; - -def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>; -def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>; -def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>; -def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>; -def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>; -def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>; - -def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>; -def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>; -def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; - -// Vector Store -// ----------------------------------------------------------------------------- - -def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>; -def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; -def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>; -def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>; - -def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>; -def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; -def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>; -def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>; - -def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>; -def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>; -def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>; - -def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>; -def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>; -def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>; -def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>; -def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>; -def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>; - -def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>; -def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>; -def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; - -def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>; -def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>; -def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>; -def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>; -def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>; -def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>; - -def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>; -def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; -def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; - -// Vector - Integer -// ----------------------------------------------------------------------------- - -// Reference for forms in this group -// D form - v8i8, v4i16, v2i32 -// Q form - v16i8, v8i16, v4i32 -// D form - v1i8, v1i16, v1i32, v1i64 -// Q form - v16i8, v8i16, v4i32, v2i64 -// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 -// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 - -// Cortex A57 Software Optimization Guide Sec 3.14 -// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate -def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>; - -// ASIMD absolute diff accum, D-form -def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; -// ASIMD absolute diff accum, Q-form -def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; -// ASIMD absolute diff accum long -def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>; - -// ASIMD arith, reduce, 4H/4S -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; -// ASIMD arith, reduce, 8B/8H -def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; -// ASIMD arith, reduce, 16B -def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>; - -// ASIMD max/min, reduce, 4H/4S -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; -// ASIMD max/min, reduce, 8B/8H -def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; -// ASIMD max/min, reduce, 16B -def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; - -// ASIMD multiply, D-form -// MUL -def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; -// PMUL, SQDMULH, SQRDMULH -def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; - -// ASIMD multiply, Q-form -// MUL -def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>; -// PMUL, SQDMULH, SQRDMULH -def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; - -// Cortex A57 Software Optimization Guide Sec 3.14 -def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; -def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; - -// ASIMD multiply accumulate, D-form -def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; -// ASIMD multiply accumulate, Q-form -def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; - -// ASIMD multiply accumulate long -// ASIMD multiply accumulate saturating long -def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>; -def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>; - -// ASIMD multiply long -def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>; -def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>; -def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>; -def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>; - -// ASIMD pairwise add and accumulate -// ASIMD shift accumulate -def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>; -def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>; - -// ASIMD shift by immed, complex -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>; -def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>; - - -// ASIMD shift by register, basic, Q-form -def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; - -// ASIMD shift by register, complex, D-form -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; - -// ASIMD shift by register, complex, Q-form -def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; - - -// Vector - Floating Point -// ----------------------------------------------------------------------------- - -// Reference for forms in this group -// D form - v2f32 -// Q form - v4f32, v2f64 -// D form - 32, 64 -// D form - v1i32, v1i64 -// D form - v2i32 -// Q form - v4i32, v2i64 - -// ASIMD FP arith, normal, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>; -// ASIMD FP arith, normal, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>; - -// ASIMD FP arith, pairwise, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>; -// ASIMD FP arith, pairwise, Q-form -def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>; - -// ASIMD FP compare, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>; -// ASIMD FP compare, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP convert, long and narrow -def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>; -// ASIMD FP convert, other, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; -// ASIMD FP convert, other, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP divide, D-form, F32 -def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>; -// ASIMD FP divide, Q-form, F32 -def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>; -// ASIMD FP divide, Q-form, F64 -def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>; - -// Note: These were simply duplicated from ASIMD FDIV because of missing documentation -// ASIMD FP square root, D-form, F32 -def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>; -// ASIMD FP square root, Q-form, F32 -def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>; -// ASIMD FP square root, Q-form, F64 -def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>; - -// ASIMD FP max/min, normal, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>; -// ASIMD FP max/min, normal, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>; -// ASIMD FP max/min, pairwise, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>; -// ASIMD FP max/min, pairwise, Q-form -def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>; -// ASIMD FP max/min, reduce -def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>; - -// ASIMD FP multiply, D-form, FZ -def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; -// ASIMD FP multiply, Q-form, FZ -def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP multiply accumulate, D-form, FZ -// ASIMD FP multiply accumulate, Q-form, FZ -def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; } -def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; } - -// Cortex A57 Software Optimization Guide Sec 3.15 -// Advances from FP mul and mul-accum to mul-accum -def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>; -def A57ReadFPVMA6 : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>; - -def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; -def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP round, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>; -// ASIMD FP round, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; - - -// Vector - Miscellaneous -// ----------------------------------------------------------------------------- - -// Reference for forms in this group -// D form - v8i8, v4i16, v2i32 -// Q form - v16i8, v8i16, v4i32 -// D form - v1i8, v1i16, v1i32, v1i64 -// Q form - v16i8, v8i16, v4i32, v2i64 - -// ASIMD bitwise insert, Q-form -def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>; - -// ASIMD duplicate, gen reg, D-form and Q-form -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>; - -// ASIMD move, saturating -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>; - -// ASIMD reciprocal estimate, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>; -// ASIMD reciprocal estimate, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>; - -// ASIMD reciprocal step, D-form, FZ -def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>; -// ASIMD reciprocal step, Q-form, FZ -def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>; - -// ASIMD table lookup, D-form -def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>; -def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>; -def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>; -def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>; -// ASIMD table lookup, Q-form -def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>; -def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>; -def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>; -def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>; - -// ASIMD transfer, gen reg to element -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>; - -// ASIMD unzip/zip, Q-form -def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>; - - -// Remainder -// ----------------------------------------------------------------------------- - -def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>; - -// Cortex A57 Software Optimization Guide Sec 3.10 -def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } -def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>; -def A57ReadFPM : SchedReadAdvance<0>; -def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; - -def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; -def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>; - -def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>; -def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>; - -def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>; - -def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>; - -def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>; -def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>; - -def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>; -def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>; -def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>; -def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>; -def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>; -def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>; -def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>; -def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>; -def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>; -def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>; -def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>; -def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>; -def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>; -def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>; -def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>; -def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>; -def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>; -def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>; -def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>; -def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>; -def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>; - -def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>; -def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>; -def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>; -def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>; -def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>; -def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>; -def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>; -def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>; -def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>; -def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>; -def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>; -def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>; -def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>; -def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>; -def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>; - -} // SchedModel = CortexA57Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td b/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td deleted file mode 100644 index a4c090d439..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td +++ /dev/null @@ -1,562 +0,0 @@ -//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach -// below is to define a generic SchedWriteRes for every combination of -// latency and microOps. The naming conventions is to use a prefix, one field -// for latency, and one or more microOp count/type designators. -// Prefix: A57Write -// Latency: #cyc -// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) -// Postfix (optional): (XYZ)_Forward -// -// The postfix is added to differentiate SchedWriteRes that are used in -// subsequent SchedReadAdvances. -// -// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are -// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Define Generic 1 micro-op types - -def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; } -def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } -def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } -def A57Write_5cyc_1V_FP_Forward : SchedWriteRes<[A57UnitV]> { let Latency = 5; } -def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } -def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; } -def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } -def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; - let ResourceCycles = [17]; } -def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; - let ResourceCycles = [19]; } -def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } -def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } -def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } -def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } -def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; - let ResourceCycles = [32]; } -def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; - let ResourceCycles = [35]; } -def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } -def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } -def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } -def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } -def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; } -def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } -def A57Write_4cyc_1X_NonMul_Forward : SchedWriteRes<[A57UnitX]> { let Latency = 4; } -def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } -def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } -def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } - - -//===----------------------------------------------------------------------===// -// Define Generic 2 micro-op types - -def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { - let Latency = 64; - let NumMicroOps = 2; - let ResourceCycles = [32, 32]; -} -def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, - A57UnitL]> { - let Latency = 6; - let NumMicroOps = 2; -} -def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, - A57UnitX]> { - let Latency = 7; - let NumMicroOps = 2; -} -def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, - A57UnitV]> { - let Latency = 8; - let NumMicroOps = 2; -} -def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 2; -} -def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { - let Latency = 8; - let NumMicroOps = 2; -} -def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { - let Latency = 6; - let NumMicroOps = 2; -} -def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 6; - let NumMicroOps = 2; -} -def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { - let Latency = 6; - let NumMicroOps = 2; -} -def A57Write_6cyc_2W_Mul_Forward : SchedWriteRes<[A57UnitW, A57UnitW]> { - let Latency = 6; - let NumMicroOps = 2; -} -def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, - A57UnitL]> { - let Latency = 5; - let NumMicroOps = 2; -} -def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 5; - let NumMicroOps = 2; -} -def A57Write_5cyc_2V_FP_Forward : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 5; - let NumMicroOps = 2; -} -def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { - let Latency = 5; - let NumMicroOps = 2; -} -def A57Write_5cyc_2X_NonMul_Forward : SchedWriteRes<[A57UnitX, A57UnitX]> { - let Latency = 5; - let NumMicroOps = 2; -} -def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, - A57UnitV]> { - let Latency = 10; - let NumMicroOps = 2; -} -def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 10; - let NumMicroOps = 2; -} -def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, - A57UnitI]> { - let Latency = 1; - let NumMicroOps = 2; -} -def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, - A57UnitS]> { - let Latency = 1; - let NumMicroOps = 2; -} -def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, - A57UnitI]> { - let Latency = 2; - let NumMicroOps = 2; -} -def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { - let Latency = 2; - let NumMicroOps = 2; -} -def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 2; - let NumMicroOps = 2; -} -def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { - let Latency = 34; - let NumMicroOps = 2; - let ResourceCycles = [17, 17]; -} -def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, - A57UnitM]> { - let Latency = 3; - let NumMicroOps = 2; -} -def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, - A57UnitS]> { - let Latency = 3; - let NumMicroOps = 2; -} -def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, - A57UnitV]> { - let Latency = 3; - let NumMicroOps = 2; -} -def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { - let Latency = 3; - let NumMicroOps = 2; -} -def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI, - A57UnitL]> { - let Latency = 4; - let NumMicroOps = 2; -} -def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { - let Latency = 4; - let NumMicroOps = 2; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 3 micro-op types - -def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 10; - let NumMicroOps = 3; -} -def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS]> { - let Latency = 2; - let NumMicroOps = 3; -} -def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, - A57UnitS, - A57UnitV]> { - let Latency = 3; - let NumMicroOps = 3; -} -def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM, - A57UnitS, A57UnitS]> { - let Latency = 3; - let NumMicroOps = 3; -} -def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> { - let Latency = 3; - let NumMicroOps = 3; -} -def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS, - A57UnitV]> { - let Latency = 3; - let NumMicroOps = 3; -} -def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL]> { - let Latency = 5; - let NumMicroOps = 3; -} -def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL]> { - let Latency = 6; - let NumMicroOps = 3; -} -def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 6; - let NumMicroOps = 3; -} -def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> { - let Latency = 7; - let NumMicroOps = 3; -} -def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI, - A57UnitL, - A57UnitV]> { - let Latency = 8; - let NumMicroOps = 3; -} -def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL, - A57UnitV, A57UnitV]> { - let Latency = 8; - let NumMicroOps = 3; -} -def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 8; - let NumMicroOps = 3; -} -def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 3; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 4 micro-op types - -def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, - A57UnitS, A57UnitS]> { - let Latency = 2; - let NumMicroOps = 4; -} -def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, - A57UnitS, A57UnitS]> { - let Latency = 3; - let NumMicroOps = 4; -} -def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, A57UnitS]> { - let Latency = 3; - let NumMicroOps = 4; -} -def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, - A57UnitV]> { - let Latency = 3; - let NumMicroOps = 4; -} -def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS, - A57UnitS, A57UnitS]> { - let Latency = 4; - let NumMicroOps = 4; -} -def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, A57UnitL]> { - let Latency = 7; - let NumMicroOps = 4; -} -def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI, - A57UnitL, A57UnitL]> { - let Latency = 5; - let NumMicroOps = 4; -} -def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI, - A57UnitL, - A57UnitV, A57UnitV]> { - let Latency = 8; - let NumMicroOps = 4; -} -def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL, - A57UnitL, A57UnitL]> { - let Latency = 8; - let NumMicroOps = 4; -} -def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 4; -} -def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL, - A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 4; -} -def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 12; - let NumMicroOps = 4; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 5 micro-op types - -def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, - A57UnitV, A57UnitV]> { - let Latency = 3; - let NumMicroOps = 5; -} -def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, - A57UnitL, A57UnitL]> { - let Latency = 8; - let NumMicroOps = 5; -} -def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS]> { - let Latency = 4; - let NumMicroOps = 5; -} -def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 5; -} -def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI, - A57UnitL, - A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 5; -} -def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL, - A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 5; -} -def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 5; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 6 micro-op types - -def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, A57UnitS, - A57UnitV, A57UnitV]> { - let Latency = 3; - let NumMicroOps = 6; -} -def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS]> { - let Latency = 4; - let NumMicroOps = 6; -} -def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitV, A57UnitV]> { - let Latency = 4; - let NumMicroOps = 6; -} -def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS]> { - let Latency = 6; - let NumMicroOps = 6; -} -def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, - A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 6; -} -def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI, - A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 6; -} -def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 6; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 7 micro-op types - -def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 10; - let NumMicroOps = 7; -} -def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitV, A57UnitV]> { - let Latency = 4; - let NumMicroOps = 7; -} -def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS]> { - let Latency = 6; - let NumMicroOps = 7; -} -def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 9; - let NumMicroOps = 7; -} -def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 12; - let NumMicroOps = 7; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 8 micro-op types - -def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 10; - let NumMicroOps = 8; -} -def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL, - A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 11; - let NumMicroOps = 8; -} -def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS]> { - let Latency = 8; - let NumMicroOps = 8; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 9 micro-op types - -def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitS, A57UnitS]> { - let Latency = 8; - let NumMicroOps = 9; -} -def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI, - A57UnitL, A57UnitL, - A57UnitL, A57UnitL, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 11; - let NumMicroOps = 9; -} -def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, - A57UnitV, A57UnitV, A57UnitV, - A57UnitV, A57UnitV, A57UnitV]> { - let Latency = 15; - let NumMicroOps = 9; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 10 micro-op types - -def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 6; - let NumMicroOps = 10; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 11 micro-op types - -def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 6; - let NumMicroOps = 11; -} - - -//===----------------------------------------------------------------------===// -// Define Generic 12 micro-op types - -def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS, A57UnitS, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 8; - let NumMicroOps = 12; -} - -//===----------------------------------------------------------------------===// -// Define Generic 13 micro-op types - -def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI, - A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, A57UnitS, - A57UnitS, A57UnitS, - A57UnitV, A57UnitV, - A57UnitV, A57UnitV]> { - let Latency = 8; - let NumMicroOps = 13; -} - diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td b/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td deleted file mode 100644 index fa10d056b7..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td +++ /dev/null @@ -1,3896 +0,0 @@ -//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the scheduling model for the Fujitsu A64FX processors. -// -//===----------------------------------------------------------------------===// - -def A64FXModel : SchedMachineModel { - let IssueWidth = 6; // 6 micro-ops dispatched at a time. - let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. - let LoadLatency = 5; // Optimistic load latency. - let MispredictPenalty = 12; // Extra cycles for mispredicted branch. - // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 128; - let PostRAScheduler = 1; // Using PostRA sched. - let CompleteModel = 1; - - list UnsupportedFeatures = - [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, - HasSVE2orStreamingSVE]; - - let FullInstRWOverlapCheck = 0; -} - -let SchedModel = A64FXModel in { - -// Define the issue ports. - -// A64FXIP* - -// Port 0 -def A64FXIPFLA : ProcResource<1>; - -// Port 1 -def A64FXIPPR : ProcResource<1>; - -// Port 2 -def A64FXIPEXA : ProcResource<1>; - -// Port 3 -def A64FXIPFLB : ProcResource<1>; - -// Port 4 -def A64FXIPEXB : ProcResource<1>; - -// Port 5 -def A64FXIPEAGA : ProcResource<1>; - -// Port 6 -def A64FXIPEAGB : ProcResource<1>; - -// Port 7 -def A64FXIPBR : ProcResource<1>; - -// Define groups for the functional units on each issue port. Each group -// created will be used by a WriteRes later on. - -def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; - -def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; - -def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; - -def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; - -def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; - -def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; - -def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; - -def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; - -def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; - -def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; - -def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>; - -def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>; - -def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>; - -def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>; - -def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; - -def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>; - -def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; - -def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>; - -def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; - -def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; - -def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, - A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> { - let BufferSize = 60; -} - -def A64FXWrite_6Cyc : SchedWriteRes<[]> { - let Latency = 6; -} - -def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { - let Latency = 1; -} - -def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 2; -} - -def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 4; -} - -def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 5; -} - -def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 6; -} - -def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 8; -} - -def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 9; -} - -def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 13; -} - -def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 37; -} - -def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 98; -} - -def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 134; -} - -def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { - let Latency = 154; -} - -def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { - let Latency = 4; -} - -def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { - let Latency = 6; -} - -def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { - let Latency = 8; -} - -def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { - let Latency = 12; -} - -def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { - let Latency = 10; -} - -def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { - let Latency = 17; -} - -def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { - let Latency = 21; -} - -def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { - let Latency = 3; -} - -def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> { - let Latency = 4; -} - -def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { - let Latency = 3; -} - -def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { - let Latency = 5; -} - -def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { - let Latency = 6; -} - -def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { - let Latency = 4; -} - -def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { - let Latency = 6; -} - -def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> { - let Latency = 6; -} - -def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 3; -} - -def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 4; -} - -def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 6; -} - -def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 8; -} - -def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 9; -} - -def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; -} - -def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 12; -} - -def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; -} - -def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 15; -} - -def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 15; - let NumMicroOps = 2; -} - -def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 18; -} - -def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 45; -} - -def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 60; -} - -def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { - let Latency = 75; -} - -def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> { - let Latency = 6; -} - -def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { - let Latency = 10; -} - -def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { - let Latency = 12; -} - -def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { - let Latency = 20; -} - -def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { - let Latency = 5; -} - -def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { - let Latency = 11; -} - -def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { - let Latency = 5; -} - -def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { - let Latency = 1; -} - -def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { - let Latency = 2; -} - -def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { - let Latency = 4; - let NumMicroOps = 4; -} - -def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> { - let Latency = 6; -} - -def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 1; -} - -def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 5; -} - -def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 8; -} - -def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 11; -} - -def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { - let Latency = 44; -} - -def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { - let Latency = 10; -} - -def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { - let Latency = 15; -} - -def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { - let Latency = 19; -} - -def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { - let Latency = 25; -} - -def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { - let Latency = 14; -} - -def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { - let Latency = 19; -} - -def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { - let Latency = 29; -} - -def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { - let Latency = 5; - let NumMicroOps = 3; -} - -def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 2; - -} - -def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 3; -} - -def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 3; -} - -def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 4; -} - -def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 2; -} - -def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 3; -} - -def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 3; -} - -def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 4; -} - -def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { - let Latency = 8; - let NumMicroOps = 5; -} - -def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { - let Latency = 11; - let NumMicroOps = 5; -} - -def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 3; -} - -def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 5; -} - -def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 6; -} - -def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 7; -} - -def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 8; -} - -def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; - let NumMicroOps = 9; -} - -def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { - let Latency = 1; -} - -def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; -} - -def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; -} - -def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> { - let Latency = 25; -} - -def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { - let Latency = 12; -} - -def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; -} - -def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; -} - -def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { - let Latency = 6; -} - -def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { - let Latency = 8; -} - -def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; -} - -def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { - let Latency = 12; - let NumMicroOps = 6; -} - -def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; - let NumMicroOps = 6; -} - -def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { - let Latency = 9; -} - -def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { - let Latency = 8; -} - - -def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { - let Latency = 8; - let NumMicroOps = 3; -} - -def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 2; -} - - -def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { - let Latency = 15; - let NumMicroOps = 2; -} - -def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { - let Latency = 15; - let NumMicroOps = 3; -} - -def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { - let Latency = 15; - let NumMicroOps = 2; -} - -def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; - let NumMicroOps = 7; -} - -def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> { - let Latency = 14; -} - -def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { - let Latency = 5; -} - -def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; -} - -def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { - let Latency = 9; -} - -def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { - let Latency = 12; -} - -def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { - let Latency = 25; -} - -def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 5; -} - -def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 7; -} - -def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { - let Latency = 10; - let NumMicroOps = 9; -} - -def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { - let Latency = 0; -} - -def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { - let Latency = 1; -} - -def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { - let Latency = 1; -} - -def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { - let Latency = 1; -} - -def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { - let Latency = 1; -} - -def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> { - let Latency = 6; -} - -def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> { - let Latency = 7; -} - -def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> { - let Latency = 8; -} - -def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> { - let Latency = 9; -} - -def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> { - let Latency = 15; -} - -def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> { - let Latency = 19; -} - -def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { - let Latency = 7; -} - -// Define commonly used read types. - -// No forwarding is provided for these types. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// 3. Instruction Tables. - -//--- -// 3.1 Branch Instructions -//--- - -// Branch, immed -// Branch and link, immed -// Compare and branch -def : WriteRes { - let Latency = 1; -} - -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes { - let Latency = 1; -} - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { - let Latency = 4; -} - -//--- -// Branch -//--- -def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; -def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; -def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; -def : InstRW<[A64FXWrite_1Cyc_GI7], - (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; - -//--- -// 3.2 Arithmetic and Logical Instructions -// 3.3 Move and Shift Instructions -//--- - -// ALU, basic -// Conditional compare -// Conditional select -// Address generation -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -def : InstRW<[WriteI], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : InstRW<[WriteI], (instrs COPY)>; - -// ALU, extend and/or shift -def : WriteRes { - let Latency = 2; - let ResourceCycles = [1]; -} - -def : InstRW<[WriteISReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -def : InstRW<[WriteIEReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -// Move immed -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -def : InstRW<[A64FXWrite_1Cyc_GI2456], - (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; - -def : InstRW<[A64FXWrite_2Cyc_GI24], - (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; - -// Variable shift -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -//--- -// 3.4 Divide and Multiply Instructions -//--- - -// Divide, W-form -def : WriteRes { - let Latency = 39; - let ResourceCycles = [39]; -} - -// Divide, X-form -def : WriteRes { - let Latency = 23; - let ResourceCycles = [23]; -} - -// Multiply accumulate, W-form -def : WriteRes { - let Latency = 5; - let ResourceCycles = [1]; -} - -// Multiply accumulate, X-form -def : WriteRes { - let Latency = 5; - let ResourceCycles = [1]; -} - -def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; -def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; -def : InstRW<[A64FXWrite_MADDL], - (instregex "(S|U)(MADDL|MSUBL)rrr")>; - -def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; -def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; - -// Bitfield extract, two reg -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -// Multiply high -def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; - -// Miscellaneous Data-Processing Instructions -// Bitfield extract -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; - -// Bitifield move - basic -def : InstRW<[A64FXWrite_1Cyc_GI24], - (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; - -// Bitfield move, insert -def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; - -// Count leading -def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; - -// Reverse bits -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; - -// Cryptography Extensions -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; -def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; -def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; -def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; -def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; - -// CRC Instructions -def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; -def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; -def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; - -def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; -def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; -def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; - -// Reverse bits/bytes -// NOTE: Handled by WriteI. - -//--- -// 3.6 Load Instructions -// 3.10 FP Load Instructions -//--- - -// Load register, literal -// Load register, unscaled immed -// Load register, immed unprivileged -// Load register, unsigned immed -def : WriteRes { - let Latency = 4; - let ResourceCycles = [3]; -} - -// Load register, immed post-index -// NOTE: Handled by WriteLD, WriteI. -// Load register, immed pre-index -// NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} - -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handled by WriteLD. -def : WriteRes { - let Latency = 5; -} - -// Load register offset, basic -// Load register, register offset, scale by 4/8 -// Load register, register offset, scale by 2 -// Load register offset, extend -// Load register, register offset, extend, scale by 4/8 -// Load register, register offset, extend, scale by 2 -def A64FXWriteLDIdx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def A64FXReadAdrBase : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// Load pair, immed pre-index, normal -// Load pair, immed pre-index, signed words -// Load pair, immed post-index, normal -// Load pair, immed post-index, signed words -// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. - -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; - -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; -def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; - -def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; -def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; -def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; -def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; - -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPDpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPQpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPSpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; - -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPDpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPQpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPSpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPWpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPXpost)>; - -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; - -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPDpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPQpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPSpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPXpre)>; - -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; -def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; - -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPDpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPQpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPSpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPWpost)>; -def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], - (instrs LDPXpost)>; - -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; -def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRBroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRBroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRDroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRHroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRHHroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRQroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSHWroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSHXroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRWroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRXroW)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRBroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRDroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRHroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRHHroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRQroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSHWroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRSHXroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRWroX)>; -def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], - (instrs LDRXroX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; -def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; - -//--- -// Prefetch -//--- -def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; -def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; -def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; -def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; -def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; - -//-- -// 3.7 Store Instructions -// 3.11 FP Store Instructions -//-- - -// Store register, unscaled immed -// Store register, immed unprivileged -// Store register, unsigned immed -def : WriteRes { - let Latency = 1; -} - -// Store register, immed post-index -// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase - -// Store register, immed pre-index -// NOTE: Handled by WriteAdr, WriteST - -// Store register, register offset, basic -// Store register, register offset, scaled by 4/8 -// Store register, register offset, scaled by 2 -// Store register, register offset, extend -// Store register, register offset, extend, scale by 4/8 -// Store register, register offset, extend, scale by 1 -def : WriteRes { - let Latency = 1; -} - -// Store pair, immed offset, W-form -// Store pair, immed offset, X-form -def : WriteRes { - let Latency = 1; -} - -// Store pair, immed post-index, W-form -// Store pair, immed post-index, X-form -// Store pair, immed pre-index, W-form -// Store pair, immed pre-index, X-form -// NOTE: Handled by WriteAdr, WriteSTP. - -def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; -def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; - -def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; -def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; -def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; -def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; - -def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; - -def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; -def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; - -def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; -def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; - -def : InstRW<[A64FXWrite_STP01], - (instrs STPDpre, STPDpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPDpre, STPDpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPDpre, STPDpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPDpre, STPDpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPQpre, STPQpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPQpre, STPQpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPQpre, STPQpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPQpre, STPQpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPSpre, STPSpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPSpre, STPSpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPSpre, STPSpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPSpre, STPSpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPWpre, STPWpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPWpre, STPWpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPWpre, STPWpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPWpre, STPWpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPXpre, STPXpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPXpre, STPXpost)>; -def : InstRW<[A64FXWrite_STP01], - (instrs STPXpre, STPXpost)>; -def : InstRW<[A64FXWrite_STP01, ReadAdrBase], - (instrs STPXpre, STPXpost)>; - -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], - (instrs STRXpre, STRXpost)>; - -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRBroW, STRBroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRBroW, STRBroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRBBroW, STRBBroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRBBroW, STRBBroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRDroW, STRDroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRDroW, STRDroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRHroW, STRHroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRHroW, STRHroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRHHroW, STRHHroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRHHroW, STRHHroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRQroW, STRQroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRQroW, STRQroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRSroW, STRSroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRSroW, STRSroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRWroW, STRWroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRWroW, STRWroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRXroW, STRXroX)>; -def : InstRW<[A64FXWrite_STUR, ReadAdrBase], - (instrs STRXroW, STRXroX)>; - -//--- -// 3.8 FP Data Processing Instructions -//--- - -// FP absolute value -// FP min/max -// FP negate -def : WriteRes { - let Latency = 4; - let ResourceCycles = [2]; -} - -// FP arithmetic - -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; - -// FP compare -def : WriteRes { - let Latency = 4; - let ResourceCycles = [2]; -} - -// FP Div, Sqrt -def : WriteRes { - let Latency = 43; -} - -def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { - let Latency = 38; -} - -def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { - let Latency = 29; -} - -def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { - let Latency = 43; -} - -def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { - let Latency = 29; -} - -def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { - let Latency = 43; -} - -// FP divide, S-form -// FP square root, S-form -def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; -def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; -def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; - -// FP divide, D-form -// FP square root, D-form -def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; -def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; -def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; -def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; - -// FP multiply -// FP multiply accumulate -def : WriteRes { - let Latency = 9; - let ResourceCycles = [2]; -} - -def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> { - let Latency = 9; - let ResourceCycles = [2]; -} - -def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> { - let Latency = 9; - let ResourceCycles = [2]; -} - -def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>; -def : InstRW<[A64FXXWriteFMulAcc], - (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; - -// FP round to integral -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; - -// FP select -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; - -//--- -// 3.9 FP Miscellaneous Instructions -//--- - -// FP convert, from vec to vec reg -// FP convert, from gen to vec reg -// FP convert, from vec to gen reg -def : WriteRes { - let Latency = 9; - let ResourceCycles = [2]; -} - -// FP move, immed -// FP move, register -def : WriteRes { - let Latency = 4; - let ResourceCycles = [2]; -} - -// FP transfer, from gen to vec reg -// FP transfer, from vec to gen reg -def : WriteRes { - let Latency = 4; - let ResourceCycles = [2]; -} - -def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; -def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; - -//--- -// 3.12 ASIMD Integer Instructions -//--- - -// ASIMD absolute diff, D-form -// ASIMD absolute diff, Q-form -// ASIMD absolute diff accum, D-form -// ASIMD absolute diff accum, Q-form -// ASIMD absolute diff accum long -// ASIMD absolute diff long -// ASIMD arith, basic -// ASIMD arith, complex -// ASIMD compare -// ASIMD logical (AND, BIC, EOR) -// ASIMD max/min, basic -// ASIMD max/min, reduce, 4H/4S -// ASIMD max/min, reduce, 8B/8H -// ASIMD max/min, reduce, 16B -// ASIMD multiply, D-form -// ASIMD multiply, Q-form -// ASIMD multiply accumulate long -// ASIMD multiply accumulate saturating long -// ASIMD multiply long -// ASIMD pairwise add and accumulate -// ASIMD shift accumulate -// ASIMD shift by immed, basic -// ASIMD shift by immed and insert, basic, D-form -// ASIMD shift by immed and insert, basic, Q-form -// ASIMD shift by immed, complex -// ASIMD shift by register, basic, D-form -// ASIMD shift by register, basic, Q-form -// ASIMD shift by register, complex, D-form -// ASIMD shift by register, complex, Q-form -def : WriteRes { - let Latency = 4; - let ResourceCycles = [1]; -} -def : WriteRes { - let Latency = 4; - let ResourceCycles = [1]; -} - -// ASIMD arith, reduce, 4H/4S -// ASIMD arith, reduce, 8B/8H -// ASIMD arith, reduce, 16B - -// ASIMD logical (MVN (alias for NOT), ORN, ORR) -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; - -// ASIMD arith, reduce -def : InstRW<[A64FXWrite_ADDLV], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; - -// ASIMD polynomial (8x8) multiply long -def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; -def : InstRW<[A64FXWrite_MULLV], - (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; -def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; -def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; - -// ASIMD absolute diff accum, D-form -def : InstRW<[A64FXWrite_ABA], - (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; -// ASIMD absolute diff accum, Q-form -def : InstRW<[A64FXWrite_ABA], - (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; -// ASIMD absolute diff accum long -def : InstRW<[A64FXWrite_ABAL], - (instregex "^[SU]ABAL")>; -// ASIMD arith, reduce, 4H/4S -def : InstRW<[A64FXWrite_ADDLV1], - (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; -// ASIMD arith, reduce, 8B -def : InstRW<[A64FXWrite_ADDLV1], - (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; -// ASIMD arith, reduce, 16B/16H -def : InstRW<[A64FXWrite_ADDLV1], - (instregex "^[SU]?ADDL?Vv16i8v$")>; -// ASIMD max/min, reduce, 4H/4S -def : InstRW<[A64FXWrite_MINMAXV], - (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; -// ASIMD max/min, reduce, 8B/8H -def : InstRW<[A64FXWrite_MINMAXV], - (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; -// ASIMD max/min, reduce, 16B/16H -def : InstRW<[A64FXWrite_MINMAXV], - (instregex "^[SU](MIN|MAX)Vv16i8v$")>; -// ASIMD multiply, D-form -def : InstRW<[A64FXWrite_PMUL], - (instregex "^(P?MUL|SQR?DMUL)" # - "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # - "(_indexed)?$")>; - -// ASIMD multiply, Q-form -def : InstRW<[A64FXWrite_PMUL], - (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; - -// ASIMD multiply, Q-form -def : InstRW<[A64FXWrite_SQRDMULH], - (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; - -// ASIMD multiply accumulate, D-form -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; -// ASIMD multiply accumulate, Q-form -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; -// ASIMD shift accumulate -def : InstRW<[A64FXWrite_SRSRAV], - (instregex "SRSRAv", "URSRAv")>; -def : InstRW<[A64FXWrite_SSRAV], - (instregex "SSRAv", "USRAv")>; - -// ASIMD shift by immed, basic -def : InstRW<[A64FXWrite_RSHRN], - (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; -def : InstRW<[A64FXWrite_SHRN], - (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; - -def : InstRW<[A64FXWrite_6Cyc_GI3], - (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; - -// ASIMD shift by immed, complex -def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; -def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; -// ASIMD shift by register, basic, Q-form -def : InstRW<[A64FXWrite_6Cyc_GI3], - (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// ASIMD shift by register, complex, D-form -def : InstRW<[A64FXWrite_6Cyc_GI3], - (instregex "^[SU][QR]{1,2}SHL" # - "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; -// ASIMD shift by register, complex, Q-form -def : InstRW<[A64FXWrite_6Cyc_GI3], - (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; - -// ASIMD Arithmetic -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; -def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", - "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; -def : InstRW<[A64FXWrite_ADDP], - (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # - "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; -def : InstRW<[A64FXWrite_4Cyc_GI0], - (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; -def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; -def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; -def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; -def : InstRW<[A64FXWrite_MINMAXV], - (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; -def : InstRW<[A64FXWrite_ABA], - (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; -def : InstRW<[A64FXWrite_SHRN], - (instregex "^ADDHNv", "^SUBHNv")>; -def : InstRW<[A64FXWrite_RSHRN], - (instregex "^RADDHNv", "^RSUBHNv")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", - "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", - "^URHADD", "^USQADD")>; - -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^CMEQv", "^CMGEv", "^CMGTv", - "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; -def : InstRW<[A64FXWrite_MINMAXV], - (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; -def : InstRW<[A64FXWrite_ADDP], - (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^SABDv", "^UABDv")>; -def : InstRW<[A64FXWrite_TBX1], - (instregex "^SABDLv", "^UABDLv")>; - -//--- -// 3.13 ASIMD Floating-point Instructions -//--- - -// ASIMD FP absolute value -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; - -// ASIMD FP arith, normal, D-form -// ASIMD FP arith, normal, Q-form -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^FABDv", "^FADDv", "^FSUBv")>; - -// ASIMD FP arith, pairwise, D-form -// ASIMD FP arith, pairwise, Q-form -def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; - -// ASIMD FP compare, D-form -// ASIMD FP compare, Q-form -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; -// ASIMD FP round, D-form -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^FRINT[AIMNPXZ](v2f32)")>; -// ASIMD FP round, Q-form -def : InstRW<[A64FXWrite_9Cyc_GI03], - (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; - -// ASIMD FP convert, long -// ASIMD FP convert, narrow -// ASIMD FP convert, other, D-form -// ASIMD FP convert, other, Q-form - -// ASIMD FP convert, long and narrow -def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; -// ASIMD FP convert, other, D-form -def : InstRW<[A64FXWrite_FCVTXNV], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; -// ASIMD FP convert, other, Q-form -def : InstRW<[A64FXWrite_FCVTXNV], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP divide, D-form, F32 -def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; -def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; - -// ASIMD FP divide, Q-form, F32 -def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; -def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; - -// ASIMD FP divide, Q-form, F64 -def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; -def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; - -// ASIMD FP max/min, normal, D-form -// ASIMD FP max/min, normal, Q-form -def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", - "^FMINv", "^FMINNMv")>; - -// ASIMD FP max/min, pairwise, D-form -// ASIMD FP max/min, pairwise, Q-form -def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", - "^FMINPv", "^FMINNMPv")>; - -// ASIMD FP max/min, reduce -def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", - "^FMINVv", "^FMINNMVv")>; - -// ASIMD FP multiply, D-form, FZ -// ASIMD FP multiply, D-form, no FZ -// ASIMD FP multiply, Q-form, FZ -// ASIMD FP multiply, Q-form, no FZ -def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; -def : InstRW<[A64FXWrite_FMULXE], - (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; -def : InstRW<[A64FXWrite_FMULXE], - (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP multiply accumulate, Dform, FZ -// ASIMD FP multiply accumulate, Dform, no FZ -// ASIMD FP multiply accumulate, Qform, FZ -// ASIMD FP multiply accumulate, Qform, no FZ -def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; -def : InstRW<[A64FXWrite_FMULXE], - (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; -def : InstRW<[A64FXWrite_FMULXE], - (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP negate -def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; - -//-- -// 3.14 ASIMD Miscellaneous Instructions -//-- - -// ASIMD bit reverse -def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; - -// ASIMD bitwise insert, D-form -// ASIMD bitwise insert, Q-form -def : InstRW<[A64FXWrite_BIF], - (instregex "^BIFv", "^BITv", "^BSLv")>; - -// ASIMD count, D-form -// ASIMD count, Q-form -def : InstRW<[A64FXWrite_4Cyc_GI0], - (instregex "^CLSv", "^CLZv", "^CNTv")>; - -// ASIMD duplicate, gen reg -// ASIMD duplicate, element -def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; - -// ASIMD extract -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; - -// ASIMD extract narrow -def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; - -// ASIMD extract narrow, saturating -def : InstRW<[A64FXWrite_6Cyc_GI3], - (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; - -// ASIMD insert, element to element -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; - -// ASIMD move, integer immed -def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; - -// ASIMD move, FP immed -def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; - -// ASIMD table lookup, D-form -def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; -def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; -def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; -def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; -def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; -def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; -def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; -def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; - -// ASIMD table lookup, Q-form -def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; -def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; -def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; -def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; -def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; -def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; -def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; -def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; - -// ASIMD transpose -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>; - -// ASIMD unzip/zip -def : InstRW<[A64FXWrite_6Cyc_GI0], - (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; - -// ASIMD reciprocal estimate, D-form -// ASIMD reciprocal estimate, Q-form -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; - -// ASIMD reciprocal step, D-form, FZ -// ASIMD reciprocal step, D-form, no FZ -// ASIMD reciprocal step, Q-form, FZ -// ASIMD reciprocal step, Q-form, no FZ -def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; - -// ASIMD reverse -def : InstRW<[A64FXWrite_4Cyc_GI03], - (instregex "^REV16v", "^REV32v", "^REV64v")>; - -// ASIMD table lookup, D-form -// ASIMD table lookup, Q-form -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; - -// ASIMD transfer, element to word or word -def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; - -// ASIMD transfer gen reg to element -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; - -// ASIMD transpose -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; - -// ASIMD unzip/zip -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; - -//-- -// 3.15 ASIMD Load Instructions -//-- - -// ASIMD load, 1 element, multiple, 1 reg, D-form -// ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[A64FXWrite_8Cyc_GI56], - (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; -def : InstRW<[A64FXWrite_11Cyc_GI56], - (instregex "^LD1Onev(16b|8h|4s)$")>; -def : InstRW<[A64FXWrite_LD108, WriteAdr], - (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; -def : InstRW<[A64FXWrite_LD109, WriteAdr], - (instregex "^LD1Onev(16b|8h|4s)_POST$")>; - -// ASIMD load, 1 element, multiple, 2 reg, D-form -// ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[A64FXWrite_LD102], - (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; -def : InstRW<[A64FXWrite_LD103], - (instregex "^LD1Twov(16b|8h|4s)$")>; -def : InstRW<[A64FXWrite_LD110, WriteAdr], - (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; -def : InstRW<[A64FXWrite_LD111, WriteAdr], - (instregex "^LD1Twov(16b|8h|4s)_POST$")>; - -// ASIMD load, 1 element, multiple, 3 reg, D-form -// ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[A64FXWrite_LD104], - (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; -def : InstRW<[A64FXWrite_LD105], - (instregex "^LD1Threev(16b|8h|4s)$")>; -def : InstRW<[A64FXWrite_LD112, WriteAdr], - (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; -def : InstRW<[A64FXWrite_LD113, WriteAdr], - (instregex "^LD1Threev(16b|8h|4s)_POST$")>; - -// ASIMD load, 1 element, multiple, 4 reg, D-form -// ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[A64FXWrite_LD106], - (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; -def : InstRW<[A64FXWrite_LD107], - (instregex "^LD1Fourv(16b|8h|4s)$")>; -def : InstRW<[A64FXWrite_LD114, WriteAdr], - (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; -def : InstRW<[A64FXWrite_LD115, WriteAdr], - (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; - -// ASIMD load, 1 element, one lane, B/H/S -// ASIMD load, 1 element, one lane, D -def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_LD1I1, WriteAdr], - (instregex "^LD1i(8|16|32|64)_POST$")>; - -// ASIMD load, 1 element, all lanes, D-form, B/H/S -// ASIMD load, 1 element, all lanes, D-form, D -// ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[A64FXWrite_8Cyc_GI03], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD108, WriteAdr], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, multiple, D-form, B/H/S -// ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_LD103], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD111, WriteAdr], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, one lane, B/H -// ASIMD load, 2 element, one lane, S -// ASIMD load, 2 element, one lane, D -def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_LD2I1, WriteAdr], - (instregex "^LD2i(8|16|32|64)_POST$")>; - -// ASIMD load, 2 element, all lanes, D-form, B/H/S -// ASIMD load, 2 element, all lanes, D-form, D -// ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[A64FXWrite_LD102], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD110, WriteAdr], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, multiple, D-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_LD105], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD113, WriteAdr], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, one lone, B/H -// ASIMD load, 3 element, one lane, S -// ASIMD load, 3 element, one lane, D -def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_LD3I1, WriteAdr], - (instregex "^LD3i(8|16|32|64)_POST$")>; - -// ASIMD load, 3 element, all lanes, D-form, B/H/S -// ASIMD load, 3 element, all lanes, D-form, D -// ASIMD load, 3 element, all lanes, Q-form, B/H/S -// ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[A64FXWrite_LD104], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD112, WriteAdr], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, multiple, D-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_LD107], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD115, WriteAdr], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, one lane, B/H -// ASIMD load, 4 element, one lane, S -// ASIMD load, 4 element, one lane, D -def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_LD4I1, WriteAdr], - (instregex "^LD4i(8|16|32|64)_POST$")>; - -// ASIMD load, 4 element, all lanes, D-form, B/H/S -// ASIMD load, 4 element, all lanes, D-form, D -// ASIMD load, 4 element, all lanes, Q-form, B/H/S -// ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[A64FXWrite_LD106], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_LD114, WriteAdr], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -//-- -// 3.16 ASIMD Store Instructions -//-- - -// ASIMD store, 1 element, multiple, 1 reg, D-form -// ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[A64FXWrite_ST10], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST14, WriteAdr], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 2 reg, D-form -// ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[A64FXWrite_ST11], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST15, WriteAdr], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 3 reg, D-form -// ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[A64FXWrite_ST12], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST16, WriteAdr], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 4 reg, D-form -// ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[A64FXWrite_ST13], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST17, WriteAdr], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, one lane, B/H/S -// ASIMD store, 1 element, one lane, D -def : InstRW<[A64FXWrite_ST10], - (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_ST14, WriteAdr], - (instregex "^ST1i(8|16|32|64)_POST$")>; - -// ASIMD store, 2 element, multiple, D-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_ST11], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST15, WriteAdr], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 2 element, one lane, B/H/S -// ASIMD store, 2 element, one lane, D -def : InstRW<[A64FXWrite_ST11], - (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_ST15, WriteAdr], - (instregex "^ST2i(8|16|32|64)_POST$")>; - -// ASIMD store, 3 element, multiple, D-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_ST12], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST16, WriteAdr], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 3 element, one lane, B/H -// ASIMD store, 3 element, one lane, S -// ASIMD store, 3 element, one lane, D -def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_ST16, WriteAdr], - (instregex "^ST3i(8|16|32|64)_POST$")>; - -// ASIMD store, 4 element, multiple, D-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[A64FXWrite_ST13], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[A64FXWrite_ST17, WriteAdr], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 4 element, one lane, B/H -// ASIMD store, 4 element, one lane, S -// ASIMD store, 4 element, one lane, D -def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[A64FXWrite_ST17, WriteAdr], - (instregex "^ST4i(8|16|32|64)_POST$")>; - -// V8.1a Atomics (LSE) -def : InstRW<[A64FXWrite_CAS, WriteAtomic], - (instrs CASB, CASH, CASW, CASX)>; - -def : InstRW<[A64FXWrite_CAS, WriteAtomic], - (instrs CASAB, CASAH, CASAW, CASAX)>; - -def : InstRW<[A64FXWrite_CAS, WriteAtomic], - (instrs CASLB, CASLH, CASLW, CASLX)>; - -def : InstRW<[A64FXWrite_CAS, WriteAtomic], - (instrs CASALB, CASALH, CASALW, CASALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, - LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, - LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, - LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, - LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, - LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, - LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, - LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, - LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, - LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; - -def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], - (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, - LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, - LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, - LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; - -def : InstRW<[A64FXWrite_SWP, WriteAtomic], - (instrs SWPB, SWPH, SWPW, SWPX)>; - -def : InstRW<[A64FXWrite_SWP, WriteAtomic], - (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; - -def : InstRW<[A64FXWrite_SWP, WriteAtomic], - (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; - -def : InstRW<[A64FXWrite_SWP, WriteAtomic], - (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; - -def : InstRW<[A64FXWrite_STUR, WriteAtomic], - (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; - -// [ 1] "abs $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>; - -// [ 2] "add $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>; - -// [ 3] "add $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>; - -// [ 4] "add $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>; - -// [ 5] "addpl $Rd, $Rn, $imm6"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>; - -// [ 6] "addvl $Rd, $Rn, $imm6"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>; - -// [ 7] "adr $Zd, [$Zn, $Zm]"; -def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>; - -// [ 8] "and $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>; - -// [ 9] "and $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>; - -// [10] "and $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>; - -// [11] "and $Zdn, $_Zdn, $imms13"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>; - -// [12] "ands $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>; - -// [13] "andv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>; - -// [14] "asr $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>; - -// [15] "asr $Zd, $Zn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>; - -// [16] "asr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>; - -// [17] "asr $Zdn, $Pg/m, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>; - -// [18] "asrd $Zdn, $Pg/m, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>; - -// [19] "asrr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>; - -// [20] "bic $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>; - -// [21] "bic $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>; - -// [22] "bic $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>; - -// [23] "bics $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>; - -// [24] "brka $Pd, $Pg/m, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>; - -// [25] "brka $Pd, $Pg/z, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>; - -// [26] "brkas $Pd, $Pg/z, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>; - -// [27] "brkb $Pd, $Pg/m, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>; - -// [28] "brkb $Pd, $Pg/z, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>; - -// [29] "brkbs $Pd, $Pg/z, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>; - -// [30] "brkn $Pdm, $Pg/z, $Pn, $_Pdm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>; - -// [31] "brkns $Pdm, $Pg/z, $Pn, $_Pdm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>; - -// [32] "brkpa $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>; - -// [33] "brkpas $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>; - -// [34] "brkpb $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>; - -// [35] "brkpbs $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>; - -// [36] "clasta $Rdn, $Pg, $_Rdn, $Zm"; -def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>; - -// [37] "clasta $Vdn, $Pg, $_Vdn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>; - -// [38] "clasta $Zdn, $Pg, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>; - -// [39] "clastb $Rdn, $Pg, $_Rdn, $Zm"; -def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>; - -// [40] "clastb $Vdn, $Pg, $_Vdn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>; - -// [41] "clastb $Zdn, $Pg, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>; - -// [42] "cls $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>; - -// [43] "clz $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>; - -// [44] "cmpeq $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>; - -// [45] "cmpeq $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>; - -// [46] "cmpge $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>; - -// [47] "cmpge $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>; - -// [48] "cmpgt $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>; - -// [49] "cmpgt $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>; - -// [50] "cmphi $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>; - -// [51] "cmphi $Pd, $Pg/z, $Zn, $imm7"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>; - -// [52] "cmphs $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>; - -// [53] "cmphs $Pd, $Pg/z, $Zn, $imm7"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>; - -// [54] "cmple $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>; - -// [55] "cmple $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>; - -// [56] "cmplo $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>; - -// [57] "cmplo $Pd, $Pg/z, $Zn, $imm7"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>; - -// [58] "cmpls $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>; - -// [59] "cmpls $Pd, $Pg/z, $Zn, $imm7"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>; - -// [60] "cmplt $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>; - -// [61] "cmplt $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>; - -// [62] "cmpne $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>; - -// [63] "cmpne $Pd, $Pg/z, $Zn, $imm5"; -def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>; - -// [64] "cnot $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>; - -// [65] "cnt $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>; - -// [66] "cntb $Rd, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>; - -// [67] "cntd $Rd, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>; - -// [68] "cnth $Rd, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>; - -// [69] "cntp $Rd, $Pg, $Pn"; -def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>; - -// [70] "cntw $Rd, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>; - -// [71] "compact $Zd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>; - -// [72] "cpy $Zd, $Pg/m, $Rn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>; - -// [73] "cpy $Zd, $Pg/m, $Vn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>; - -// [74] "cpy $Zd, $Pg/m, $imm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>; - -// [75] "cpy $Zd, $Pg/z, $imm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>; - -// [76] "ctermeq $Rn, $Rm"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>; - -// [77] "ctermne $Rn, $Rm"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>; - -// [78] "decb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>; - -// [79] "decd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>; - -// [80] "decd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>; - -// [81] "dech $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>; - -// [82] "dech $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>; - -// [83] "decp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>; - -// [84] "decp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>; - -// [85] "decw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>; - -// [86] "decw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>; - -// [87] "dup $Zd, $Rn"; -def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>; - -// [88] "dup $Zd, $Zn$idx"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>; - -// [89] "dup $Zd, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>; - -// [90] "dupm $Zd, $imms"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>; - -// [91] "eor $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>; - -// [92] "eor $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>; - -// [93] "eor $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>; - -// [94] "eor $Zdn, $_Zdn, $imms13"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>; - -// [95] "eors $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>; - -// [96] "eorv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>; - -// [97] "ext $Zdn, $_Zdn, $Zm, $imm8"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>; - -// [99] "fabd $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>; - -// [100] "fabs $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>; - -// [101] "facge $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>; - -// [102] "facgt $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>; - -// [103] "fadd $Zd, $Zn, $Zm"; def is line 1638 -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>; - -// [104] "fadd $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638 -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>; - -// [105] "fadd $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638 -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>; - -// [106] "fadda $Vdn, $Pg, $_Vdn, $Zm"; -def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>; - -// [107] "faddv $Vd, $Pg, $Zn"; -// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle -// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle -// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle -def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>; -def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>; -def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>; - -// [108] "fcadd $Zdn, $Pg/m, $_Zdn, $Zm, $imm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>; - -// [109] "fcmeq $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>; - -// [110] "fcmeq $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>; - -// [111] "fcmge $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>; - -// [112] "fcmge $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>; - -// [113] "fcmgt $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>; - -// [114] "fcmgt $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>; - -// [115] "fcmla $Zda, $Pg/m, $Zn, $Zm, $imm"; -def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>; - -// [116] "fcmla $Zda, $Zn, $Zm$iop, $imm"; -def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>; - -// [117] "fcmle $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>; - -// [118] "fcmlt $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>; - -// [119] "fcmne $Pd, $Pg/z, $Zn, #0.0"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>; - -// [120] "fcmne $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>; - -// [121] "fcmuo $Pd, $Pg/z, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>; - -// [122] "fcpy $Zd, $Pg/m, $imm8"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>; - -// [123] "fcvt $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>; - -// [124] "fcvtzs $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>; - -// [125] "fcvtzu $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>; - -// [126] "fdiv $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>; -def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>; -def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>; - -// [127] "fdivr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>; -def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>; -def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>; - -// [128] "fdup $Zd, $imm8"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>; - -// [129] "fexpa $Zd, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>; - -// [130] "fmad $Zdn, $Pg/m, $Zm, $Za"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>; - -// [131] "fmax $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>; - -// [132] "fmax $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>; - -// [133] "fmaxnm $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>; - -// [134] "fmaxnm $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>; - -// [135] "fmaxnmv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>; - -// [136] "fmaxv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>; - -// [137] "fmin $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>; - -// [138] "fmin $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>; - -// [139] "fminnm $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>; - -// [140] "fminnm $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>; - -// [141] "fminnmv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>; - -// [142] "fminv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>; - -// [143] "fmla $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>; - -// [144] "fmla $Zda, $Zn, $Zm$iop"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>; - -// [145] "fmls $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>; - -// [146] "fmls $Zda, $Zn, $Zm$iop"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>; - -// [147] "fmsb $Zdn, $Pg/m, $Zm, $Za"; - -// [148] "fmul $Zd, $Zn, $Zm"; - -// [149] "fmul $Zd, $Zn, $Zm$iop"; - -// [150] "fmul $Zdn, $Pg/m, $_Zdn, $Zm"; - -// [151] "fmul $Zdn, $Pg/m, $_Zdn, $i1"; - -// [152] "fmulx $Zdn, $Pg/m, $_Zdn, $Zm"; - -// [153] "fneg $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>; - -// [154] "fnmad $Zdn, $Pg/m, $Zm, $Za"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>; - -// [155] "fnmla $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>; - -// [156] "fnmls $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>; - -// [157] "fnmsb $Zdn, $Pg/m, $Zm, $Za"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>; - -// [158] "frecpe $Zd, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>; - -// [159] "frecps $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>; - -// [160] "frecpx $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>; - -// [161] "frinta $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>; - -// [162] "frinti $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>; - -// [163] "frintm $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>; - -// [164] "frintn $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>; - -// [165] "frintp $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>; - -// [166] "frintx $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>; - -// [167] "frintz $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>; - -// [168] "frsqrte $Zd, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>; - -// [169] "frsqrts $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>; - -// [170] "fscale $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>; - -// [171] "fsqrt $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>; -def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>; -def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>; - -// [172] "fsub $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>; - -// [173] "fsub $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>; - -// [174] "fsub $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>; - -// [175] "fsubr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>; - -// [176] "fsubr $Zdn, $Pg/m, $_Zdn, $i1"; -def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>; - -// [177] "ftmad $Zdn, $_Zdn, $Zm, $imm3"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>; - -// [178] "ftsmul $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>; - -// [180] "incb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>; - -// [181] "incd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>; - -// [182] "incd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>; - -// [183] "inch $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>; - -// [184] "inch $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>; - -// [185] "incp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>; - -// [186] "incp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>; - -// [187] "incw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>; - -// [188] "incw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>; - -// [189] "index $Zd, $Rn, $Rm"; -def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>; - -// [190] "index $Zd, $Rn, $imm5"; -def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>; - -// [191] "index $Zd, $imm5, $Rm"; -def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>; - -// [192] "index $Zd, $imm5, $imm5b"; -def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>; - -// [193] "insr $Zdn, $Rm"; -def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>; - -// [194] "insr $Zdn, $Vm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>; - -// [195] "lasta $Rd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>; - -// [196] "lasta $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>; - -// [197] "lastb $Rd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>; - -// [198] "lastb $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>; - -// [199] "ld1b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>; - -// [200] "ld1b $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>; - -// [201] "ld1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>; - -// [202] "ld1b $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>; - -// [203] "ld1d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>; - -// [204] "ld1d $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>; - -// [205] "ld1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>; - -// [206] "ld1d $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>; - -// [207] "ld1h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>; - -// [208] "ld1h $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>; - -// [209] "ld1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>; - -// [210] "ld1h $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>; - -// [211] "ld1rb $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>; - -// [212] "ld1rd $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>; - -// [213] "ld1rh $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>; - -// [214] "ld1rqb $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>; - -// [215] "ld1rqb $Zt, $Pg/z, [$Rn, $imm4]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>; - -// [216] "ld1rqd $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>; - -// [217] "ld1rqd $Zt, $Pg/z, [$Rn, $imm4]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>; - -// [218] "ld1rqh $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>; - -// [219] "ld1rqh $Zt, $Pg/z, [$Rn, $imm4]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>; - -// [220] "ld1rqw $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>; - -// [221] "ld1rqw $Zt, $Pg/z, [$Rn, $imm4]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>; - -// [222] "ld1rsb $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>; - -// [223] "ld1rsh $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>; - -// [224] "ld1rsw $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>; - -// [225] "ld1rw $Zt, $Pg/z, [$Rn, $imm6]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>; - -// [226] "ld1sb $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>; - -// [227] "ld1sb $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>; - -// [228] "ld1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>; - -// [229] "ld1sb $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>; - -// [230] "ld1sh $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>; - -// [231] "ld1sh $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>; - -// [232] "ld1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>; - -// [233] "ld1sh $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>; - -// [234] "ld1sw $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>; - -// [235] "ld1sw $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>; - -// [236] "ld1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>; - -// [237] "ld1sw $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>; - -// [238] "ld1w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>; - -// [239] "ld1w $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>; - -// [240] "ld1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>; - -// [241] "ld1w $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>; - -// [242] "ld2b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>; - -// [243] "ld2b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>; - -// [244] "ld2d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>; - -// [245] "ld2d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>; - -// [246] "ld2h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>; - -// [247] "ld2h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>; - -// [248] "ld2w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>; - -// [249] "ld2w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>; - -// [250] "ld3b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>; - -// [251] "ld3b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>; - -// [252] "ld3d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>; - -// [253] "ld3d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>; - -// [254] "ld3h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>; - -// [255] "ld3h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>; - -// [256] "ld3w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>; - -// [257] "ld3w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>; - -// [258] "ld4b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>; - -// [259] "ld4b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>; - -// [260] "ld4d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>; - -// [261] "ld4d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>; - -// [262] "ld4h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>; - -// [263] "ld4h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>; - -// [264] "ld4w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>; - -// [265] "ld4w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>; - -// [266] "ldff1b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>; - -// [267] "ldff1b $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>; - -// [268] "ldff1b $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>; - -// [269] "ldff1d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>; - -// [270] "ldff1d $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>; - -// [271] "ldff1d $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>; - -// [272] "ldff1h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>; - -// [273] "ldff1h $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>; - -// [274] "ldff1h $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>; - -// [275] "ldff1sb $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>; - -// [276] "ldff1sb $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>; - -// [277] "ldff1sb $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>; - -// [278] "ldff1sh $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>; - -// [279] "ldff1sh $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>; - -// [280] "ldff1sh $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>; - -// [281] "ldff1sw $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>; - -// [282] "ldff1sw $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>; - -// [283] "ldff1sw $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>; - -// [284] "ldff1w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>; - -// [285] "ldff1w $Zt, $Pg/z, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>; - -// [286] "ldff1w $Zt, $Pg/z, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>; - -// [287] "ldnf1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>; - -// [288] "ldnf1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>; - -// [289] "ldnf1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>; - -// [290] "ldnf1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>; - -// [291] "ldnf1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>; - -// [292] "ldnf1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>; - -// [293] "ldnf1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>; - -// [294] "ldnt1b $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>; - -// [295] "ldnt1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>; - -// [296] "ldnt1d $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>; - -// [297] "ldnt1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>; - -// [298] "ldnt1h $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>; - -// [299] "ldnt1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>; - -// [300] "ldnt1w $Zt, $Pg/z, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>; - -// [301] "ldnt1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>; - -// [302] "ldr $Pt, [$Rn, $imm9, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>; - -// [303] "ldr $Zt, [$Rn, $imm9, mul vl]"; -def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>; - -// [304] "lsl $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>; - -// [305] "lsl $Zd, $Zn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>; - -// [306] "lsl $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>; - -// [307] "lsl $Zdn, $Pg/m, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>; - -// [308] "lslr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>; - -// [309] "lsr $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>; - -// [310] "lsr $Zd, $Zn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>; - -// [311] "lsr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>; - -// [312] "lsr $Zdn, $Pg/m, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>; - -// [313] "lsrr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>; - -// [314] "mad $Zdn, $Pg/m, $Zm, $Za"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>; - -// [315] "mla $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>; - -// [316] "mls $Zda, $Pg/m, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>; - -// [317] "movprfx $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>; - -// [318] "movprfx $Zd, $Pg/z, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>; - -// [319] "movprfx $Zd, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>; - -// [320] "msb $Zdn, $Pg/m, $Zm, $Za"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>; - -// [321] "mul $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>; - -// [322] "mul $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>; - -// [323] "nand $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>; - -// [324] "nands $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>; - -// [325] "neg $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>; - -// [326] "nor $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>; - -// [327] "nors $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>; - -// [328] "not $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>; - -// [329] "orn $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>; - -// [330] "orns $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>; - -// [331] "orr $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>; - -// [332] "orr $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>; - -// [333] "orr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>; - -// [334] "orr $Zdn, $_Zdn, $imms13"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>; - -// [335] "orrs $Pd, $Pg/z, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>; - -// [336] "orv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>; - -// [337] "pfalse $Pd"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>; - -// [338] "pnext $Pdn, $Pg, $_Pdn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>; - -// [339] "prfb $prfop, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>; - -// [340] "prfb $prfop, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>; - -// [341] "prfb $prfop, $Pg, [$Rn, $imm6, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>; - -// [342] "prfb $prfop, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>; - -// [343] "prfd $prfop, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>; - -// [344] "prfd $prfop, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>; - -// [345] "prfd $prfop, $Pg, [$Rn, $imm6, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>; - -// [346] "prfd $prfop, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>; - -// [347] "prfh $prfop, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>; - -// [348] "prfh $prfop, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>; - -// [349] "prfh $prfop, $Pg, [$Rn, $imm6, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>; - -// [350] "prfh $prfop, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>; - -// [351] "prfw $prfop, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>; - -// [352] "prfw $prfop, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>; - -// [353] "prfw $prfop, $Pg, [$Rn, $imm6, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>; - -// [354] "prfw $prfop, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>; - -// [355] "ptest $Pg, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>; - -// [356] "ptrue $Pd, $pattern"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>; - -// [357] "ptrues $Pd, $pattern"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>; - -// [358] "punpkhi $Pd, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>; - -// [359] "punpklo $Pd, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>; - -// [360] "rbit $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>; - -// [361] "rdffr $Pd"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>; - -// [362] "rdffr $Pd, $Pg/z"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>; - -// [363] "rdffrs $Pd, $Pg/z"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>; - -// [364] "rdvl $Rd, $imm6"; -def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>; - -// [365] "rev $Pd, $Pn"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>; - -// [366] "rev $Zd, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>; - -// [367] "revb $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>; - -// [368] "revh $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>; - -// [369] "revw $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>; - -// [370] "sabd $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>; - -// [371] "saddv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>; - -// [372] "scvtf $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>; - -// [373] "sdiv $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>; - -// [374] "sdivr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>; - -// [375] "sdot $Zda, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>; - -// [376] "sdot $Zda, $Zn, $Zm$iop"; -def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>; - -// [377] "sel $Pd, $Pg, $Pn, $Pm"; -def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>; - -// [378] "sel $Zd, $Pg, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>; - -// [379] "setffr"; -def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>; - -// [380] "smax $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>; - -// [381] "smax $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>; - -// [382] "smaxv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>; - -// [383] "smin $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>; - -// [384] "smin $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>; - -// [385] "sminv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>; - -// [386] "smulh $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>; - -// [387] "splice $Zdn, $Pg, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>; - -// [388] "sqadd $Zd, $Zn, $Zm"; - -// [389] "sqadd $Zdn, $_Zdn, $imm"; - -// [390] "sqdecb $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>; - -// [391] "sqdecb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>; - -// [392] "sqdecd $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>; - -// [393] "sqdecd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>; - -// [394] "sqdecd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>; - -// [395] "sqdech $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>; - -// [396] "sqdech $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>; - -// [397] "sqdech $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>; - -// [398] "sqdecp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>; - -// [399] "sqdecp $Rdn, $Pg, $_Rdn"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>; - -// [400] "sqdecp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>; - -// [401] "sqdecw $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>; - -// [402] "sqdecw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>; - -// [403] "sqdecw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>; - -// [404] "sqincb $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>; - -// [405] "sqincb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>; - -// [406] "sqincd $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>; - -// [407] "sqincd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>; - -// [408] "sqincd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>; - -// [409] "sqinch $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>; - -// [410] "sqinch $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>; - -// [411] "sqinch $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>; - -// [412] "sqincp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>; - -// [413] "sqincp $Rdn, $Pg, $_Rdn"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>; - -// [414] "sqincp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>; - -// [415] "sqincw $Rdn, $_Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>; - -// [416] "sqincw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>; - -// [417] "sqincw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>; - -// [418] "sqsub $Zd, $Zn, $Zm"; - -// [419] "sqsub $Zdn, $_Zdn, $imm"; - -// [420] "st1b $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>; - -// [421] "st1b $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; - -// [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>; - -// [423] "st1b $Zt, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>; - -// [424] "st1d $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>; - -// [425] "st1d $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; - -// [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>; - -// [427] "st1d $Zt, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>; - -// [428] "st1h $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>; - -// [429] "st1h $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; - -// [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>; - -// [431] "st1h $Zt, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>; - -// [432] "st1w $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>; - -// [433] "st1w $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; - -// [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>; - -// [435] "st1w $Zt, $Pg, [$Zn, $imm5]"; -def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>; - -// [436] "st2b $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>; - -// [437] "st2b $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>; - -// [438] "st2d $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>; - -// [439] "st2d $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>; - -// [440] "st2h $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>; - -// [441] "st2h $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>; - -// [442] "st2w $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>; - -// [443] "st2w $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>; - -// [444] "st3b $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>; - -// [445] "st3b $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>; - -// [446] "st3d $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>; - -// [447] "st3d $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>; - -// [448] "st3h $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>; - -// [449] "st3h $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>; - -// [450] "st3w $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>; - -// [451] "st3w $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>; - -// [452] "st4b $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>; - -// [453] "st4b $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>; - -// [454] "st4d $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>; - -// [455] "st4d $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>; - -// [456] "st4h $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>; - -// [457] "st4h $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>; - -// [458] "st4w $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>; - -// [459] "st4w $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>; - -// [460] "stnt1b $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>; - -// [461] "stnt1b $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>; - -// [462] "stnt1d $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>; - -// [463] "stnt1d $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>; - -// [464] "stnt1h $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>; - -// [465] "stnt1h $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>; - -// [466] "stnt1w $Zt, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>; - -// [467] "stnt1w $Zt, $Pg, [$Rn, $imm4, mul vl]"; -def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>; - -// [468] "str $Pt, [$Rn, $imm9, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>; - -// [469] "str $Zt, [$Rn, $imm9, mul vl]"; -def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>; - -// [470] "sub $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>; - -// [471] "sub $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>; - -// [472] "sub $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>; - -// [473] "subr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>; - -// [474] "subr $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>; - -// [475] "sunpkhi $Zd, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>; - -// [476] "sunpklo $Zd, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>; - -// [477] "sxtb $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>; - -// [478] "sxth $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>; - -// [479] "sxtw $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>; - -// [480] "tbl $Zd, $Zn, $Zm"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>; - -// [481] "trn1 $Pd, $Pn, $Pm"; - -// [482] "trn1 $Zd, $Zn, $Zm"; - -// [483] "trn2 $Pd, $Pn, $Pm"; - -// [484] "trn2 $Zd, $Zn, $Zm"; - -// [486] "uabd $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>; - -// [487] "uaddv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>; - -// [488] "ucvtf $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>; - -// [489] "udiv $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>; - -// [490] "udivr $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>; - -// [491] "udot $Zda, $Zn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>; - -// [492] "udot $Zda, $Zn, $Zm$iop"; -def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>; - -// [493] "umax $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>; - -// [494] "umax $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>; - -// [495] "umaxv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>; - -// [496] "umin $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>; - -// [497] "umin $Zdn, $_Zdn, $imm"; -def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>; - -// [498] "uminv $Vd, $Pg, $Zn"; -def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>; - -// [499] "umulh $Zdn, $Pg/m, $_Zdn, $Zm"; -def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>; - -// [500] "uqadd $Zd, $Zn, $Zm"; - -// [501] "uqadd $Zdn, $_Zdn, $imm"; - -// [502] "uqdecb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>; - -// [503] "uqdecd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>; - -// [504] "uqdecd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>; - -// [505] "uqdech $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>; - -// [506] "uqdech $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>; - -// [507] "uqdecp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>; - -// [508] "uqdecp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>; - -// [509] "uqdecw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>; - -// [510] "uqdecw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>; - -// [511] "uqincb $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>; - -// [512] "uqincd $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>; - -// [513] "uqincd $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>; - -// [514] "uqinch $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>; - -// [515] "uqinch $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>; - -// [516] "uqincp $Rdn, $Pg"; -def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>; - -// [517] "uqincp $Zdn, $Pg"; -def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>; - -// [518] "uqincw $Rdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>; - -// [519] "uqincw $Zdn, $pattern, mul $imm4"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>; - -// [520] "uqsub $Zd, $Zn, $Zm"; -//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>; - -// [521] "uqsub $Zdn, $_Zdn, $imm"; -//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>; - -// [522] "uunpkhi $Zd, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>; - -// [523] "uunpklo $Zd, $Zn"; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>; - -// [524] "uxtb $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>; - -// [525] "uxth $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>; - -// [526] "uxtw $Zd, $Pg/m, $Zn"; -def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>; - -// [527] "uzp1 $Pd, $Pn, $Pm"; - -// [528] "uzp1 $Zd, $Zn, $Zm"; - -// [529] "uzp2 $Pd, $Pn, $Pm"; - -// [530] "uzp2 $Zd, $Zn, $Zm"; - -// [531] "whilele $Pd, $Rn, $Rm"; -def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>; - -// [532] "whilelo $Pd, $Rn, $Rm"; -def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>; - -// [533] "whilels $Pd, $Rn, $Rm"; -def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>; - -// [534] "whilelt $Pd, $Rn, $Rm"; -def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>; - -// [535] "wrffr $Pn"; -def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>; - -// [536] "zip1 $Pd, $Pn, $Pm"; - -// [537] "zip1 $Zd, $Zn, $Zm"; - -// [538] "zip2 $Pd, $Pn, $Pm"; - -// [539] "zip2 $Zd, $Zn, $Zm"; - -} // SchedModel = A64FXModel diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td b/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td deleted file mode 100644 index 32f7299fbf..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td +++ /dev/null @@ -1,1136 +0,0 @@ -//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Ampere Computing Ampere-1 to -// support instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -// The Ampere-1 core is an out-of-order micro-architecture. The front -// end has branch prediction, with a 10-cycle recovery time from a -// mispredicted branch. Instructions coming out of the front end are -// decoded into internal micro-ops (uops). - -def Ampere1Model : SchedMachineModel { - let IssueWidth = 4; // 4-way decode and dispatch - let MicroOpBufferSize = 174; // micro-op re-order buffer size - let LoadLatency = 4; // Optimistic load latency - let MispredictPenalty = 10; // Branch mispredict penalty - let LoopMicroOpBufferSize = 32; // Instruction queue size - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - SMEUnsupported.F); -} - -let SchedModel = Ampere1Model in { - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Ampere-1. -// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP, -// and 2 memory) issue into. The integer and FP schedulers can each issue -// one uop per cycle, while the memory schedulers can each issue one load -// and one store address calculation per cycle. - -def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w -def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts -def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle -def Ampere1UnitL : ProcResource<2>; // load -def Ampere1UnitS : ProcResource<2>; // store address calculation -def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write -def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto -def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves - -def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>; -def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>; - -//===----------------------------------------------------------------------===// -// Define customized scheduler read/write types specific to the Ampere-1. - -def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { - let Latency = 2; - let NumMicroOps = 1; -} - -def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, - Ampere1UnitS]> { - let Latency = 2; - let NumMicroOps = 3; -} - -def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, - Ampere1UnitZ]> { - let Latency = 2; - let NumMicroOps = 3; -} - -def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 2; - let NumMicroOps = 1; -} - -def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS, - Ampere1UnitAB]> { - let Latency = 2; - let NumMicroOps = 3; -} - -def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 2; - let NumMicroOps = 3; -} - -def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 2; - let NumMicroOps = 4; -} - -def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 4; - let NumMicroOps = 6; -} - -def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 5; - let NumMicroOps = 8; -} - -def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 5; - let NumMicroOps = 6; -} - -def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 6; - let NumMicroOps = 6; -} - -def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 6; - let NumMicroOps = 9; -} - -def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 6; - let NumMicroOps = 1; -} - -def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitL, Ampere1UnitL]> { - let Latency = 6; - let NumMicroOps = 4; -} - -def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 7; - let NumMicroOps = 1; -} - -def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 7; - let NumMicroOps = 4; -} - -def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 7; - let NumMicroOps = 12; -} - -def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA, - Ampere1UnitA]> { - let Latency = 8; - let NumMicroOps = 3; -} - -def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 8; - let NumMicroOps = 6; -} - -def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 8; - let NumMicroOps = 8; -} - -def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 9; - let NumMicroOps = 6; -} - -def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 9; - let NumMicroOps = 8; -} - -def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 9; - let NumMicroOps = 3; -} - -def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 9; - let NumMicroOps = 5; -} - -def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 9; - let NumMicroOps = 14; -} - -def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitS, Ampere1UnitS, - Ampere1UnitZ, Ampere1UnitZ, - Ampere1UnitZ, Ampere1UnitZ]> { - let Latency = 9; - let NumMicroOps = 16; -} - -def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 10; - let NumMicroOps = 6; -} - -def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { - let Latency = 10; - let NumMicroOps = 3; -} - -def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> { - let Latency = 11; - let NumMicroOps = 2; -} - -def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { - let Latency = 11; - let NumMicroOps = 3; -} - -def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { - let Latency = 11; - let NumMicroOps = 3; -} - -def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 11; - let NumMicroOps = 12; -} - -def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, - Ampere1UnitL, Ampere1UnitL, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 12; - let NumMicroOps = 12; -} - -def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 12; - let NumMicroOps = 3; -} - -def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, - Ampere1UnitXY, Ampere1UnitXY]> { - let Latency = 12; - let NumMicroOps = 4; -} - -def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 18; - let NumMicroOps = 1; -} - -def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 19; - let NumMicroOps = 1; -} - -def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 25; - let NumMicroOps = 1; -} - -def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 32; - let NumMicroOps = 1; -} - -def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { - let Latency = 34; - let NumMicroOps = 1; -} - -def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 34; - let NumMicroOps = 1; -} - -def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 39; - let NumMicroOps = 1; -} - -def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { - let Latency = 62; - let NumMicroOps = 1; -} - -// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), -// which are a single uop, and for extended registers, which have full flexibility -// across Unit A or B for both uops. -def Ampere1Write_Arith : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; - -def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latencies for Ampere-1. -// This provides a coarse model, which is then specialised below. - -def : WriteRes; // MOVN, MOVZ -def : WriteRes; // ALU -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} // ALU of Shifted-Reg -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} // ALU of Extended-Reg -def : WriteRes; // EXTR shifts a reg pair -def : WriteRes; // Shift/Scale -def : WriteRes { - let Latency = 18; -} // 32-bit Divide -def : WriteRes { - let Latency = 34; -} // 64-bit Divide -def : WriteRes { - let Latency = 3; -} // 32-bit Multiply -def : WriteRes { - let Latency = 3; -} // 32-bit Multiply -def : WriteRes; -def : WriteRes; -def : WriteRes { - let Latency = 4; -} // Load from base addr plus immediate offset -def : WriteRes { - let Latency = 1; -} // Store to base addr plus immediate offset -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} // Store a register pair. -def : WriteRes; -def : WriteRes { - let Latency = 5; - let NumMicroOps = 2; -} // Load from a register index (maybe scaled). -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} // Store to a register index (maybe scaled). -def : WriteRes { - let Latency = 2; -} // General floating-point ops. -def : WriteRes { - let Latency = 5; -} // Floating-point compare. -def : WriteRes { - let Latency = 6; -} // Float conversion. -def : WriteRes { -} // Float-int register copy. -def : WriteRes { - let Latency = 2; -} // Float-int register copy. -def : WriteRes { - let Latency = 5; -} // Floating-point multiply. -def : WriteRes { - let Latency = 34; -} // Floating-point division. -def : WriteRes { - let Latency = 3; -} // 64bit Vector D ops. -def : WriteRes { - let Latency = 3; -} // 128bit Vector Q ops. -def : WriteRes { - let Latency = 5; -} // Vector loads. -def : WriteRes { - let Latency = 2; -} // Vector stores. - -def : WriteRes { let Unsupported = 1; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { - let Latency = 4; -} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP - -// Forwarding logic. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Specialising the scheduling model further for Ampere-1. - -def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>; - -// Branch instructions -def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>; -def : InstRW<[Ampere1Write_1cyc_1A], - (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; -def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>; - -// Cryptography instructions -// -- AES encryption/decryption -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>; -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>; -// -- Polynomial multiplication -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; -// -- SHA-256 hash -def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>; -// -- SHA-256 schedule update -def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>; -// -- SHA-3 instructions -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; -// -- SHA-512 hash -def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>; -// -- SHA-512 schedule update -def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>; -// -- SHA1 choose/majority/parity -def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>; -// -- SHA1 hash/schedule update -def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>; -def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>; - -// FP and vector load instructions -// -- Load 1-element structure to one/all lanes -// ---- all lanes -def : InstRW<[Ampere1Write_7cyc_1L_1XY], - (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// ---- one lane -def : InstRW<[Ampere1Write_7cyc_1L_1XY], - (instregex "^LD1i(8|16|32|64)")>; -// -- Load 1-element structure to one/all lanes, 1D size -def : InstRW<[Ampere1Write_5cyc_1L], - (instregex "^LD1Rv1d")>; -// -- Load 1-element structures to 1 register -def : InstRW<[Ampere1Write_5cyc_1L], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 2 registers -def : InstRW<[Ampere1Write_5cyc_2L], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 3 registers -def : InstRW<[Ampere1Write_6cyc_3L], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 4 registers -def : InstRW<[Ampere1Write_6cyc_4L], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 2-element structure to all lanes of 2 registers, 1D size -def : InstRW<[Ampere1Write_5cyc_2L], - (instregex "^LD2Rv1d")>; -// -- Load 2-element structure to all lanes of 2 registers, other sizes -def : InstRW<[Ampere1Write_7cyc_2L_2XY], - (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 2-element structure to one lane of 2 registers -def : InstRW<[Ampere1Write_7cyc_2L_2XY], - (instregex "^LD2i(8|16|32|64)")>; -// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size -def : InstRW<[Ampere1Write_7cyc_2L_2XY], - (instregex "^LD2Twov(16b|8h|4s|2d)")>; -// -- Load 2-element structures to 2 registers, 8B/4H/2S size -def : InstRW<[Ampere1Write_9cyc_2L_3XY], - (instregex "^LD2Twov(8b|4h|2s)")>; -// -- Load 3-element structure to all lanes of 3 registers, 1D size -def : InstRW<[Ampere1Write_6cyc_3L], - (instregex "^LD3Rv1d")>; -// -- Load 3-element structure to all lanes of 3 registers, other sizes -def : InstRW<[Ampere1Write_8cyc_3L_3XY], - (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 3-element structure to one lane of 3 registers -def : InstRW<[Ampere1Write_8cyc_3L_3XY], - (instregex "^LD3i(8|16|32|64)")>; -// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes -def : InstRW<[Ampere1Write_9cyc_3L_3XY], - (instregex "^LD3Threev(16b|8h|4s)")>; -// -- Load 3-element structures to 3 registers, 2D size -def : InstRW<[Ampere1Write_8cyc_3L_3XY], - (instregex "^LD3Threev2d")>; -// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1Write_10cyc_3L_3XY], - (instregex "^LD3Threev(8b|4h|2s)")>; -// -- Load 4-element structure to all lanes of 4 registers, 1D size -def : InstRW<[Ampere1Write_6cyc_4L], - (instregex "^LD4Rv1d")>; -// -- Load 4-element structure to all lanes of 4 registers, other sizes -def : InstRW<[Ampere1Write_8cyc_4L_4XY], - (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 4-element structure to one lane of 4 registers -def : InstRW<[Ampere1Write_6cyc_4L], - (instregex "^LD4i(8|16|32|64)")>; -// -- Load 4-element structures to 4 registers, 2D size -def : InstRW<[Ampere1Write_9cyc_4L_4XY], - (instregex "^LD4Fourv2d")>; -// -- Load 4-element structures to 4 registers, 2S size -def : InstRW<[Ampere1Write_12cyc_4L_8XY], - (instregex "^LD4Fourv2s")>; -// -- Load 4-element structures to 4 registers, other sizes -def : InstRW<[Ampere1Write_11cyc_4L_8XY], - (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; -// -- Load pair, Q-form -def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>; -// -- Load pair, S/D-form -def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; -// -- Load register -def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>; -// -- Load register, sign-extended register -def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; - -// FP and vector store instructions -// -- Store 1-element structure from one lane of 1 register -def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z], - (instregex "^ST1i(8|16|32|64)")>; -// -- Store 1-element structures from 1 register -def : InstRW<[Ampere1Write_2cyc_1S_1Z], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 2 registers -def : InstRW<[Ampere1Write_3cyc_2S_2Z], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 3 registers -def : InstRW<[Ampere1Write_4cyc_3S_3Z], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 4 registers -def : InstRW<[Ampere1Write_5cyc_4S_4Z], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 2-element structure from one lane of 2 registers -def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], - (instregex "^ST2i(8|16|32|64)")>; -// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes -def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], - (instregex "^ST2Twov(16b|8h|4s|2d)")>; -// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z], - (instregex "^ST2Twov(8b|4h|2s)")>; -// -- Store 3-element structure from one lane of 3 registers -def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], - (instregex "^ST3i(8|16|32|64)")>; -// -- Store 3-element structures from 3 registers -def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], - (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 4-element structure from one lane of 4 registers -def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], - (instregex "^ST4i(8|16|32|64)")>; -// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes -def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z], - (instregex "^ST4Fourv(16b|8h|4s)")>; -// -- Store 4-element structures from 4 registers, 2D sizes -def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], - (instregex "^ST4Fourv2d")>; -// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z], - (instregex "^ST4Fourv(8b|4h|2s)")>; -// -- Store pair, Q-form -def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>; -// -- Store pair, S/D-form -def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>; -// -- Store register -def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; -// -- Store register, sign-extended register offset -def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; - -// FP data processing, bfloat16 format -def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>; -def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>; -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; -def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>; - -// FP data processing, scalar/vector, half precision -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], - (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; -def : InstRW<[Ampere1Write_4cyc_1X], - (instregex "^FCMPE?H")>; -def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X], - (instregex "^FCCMPE?H")>; -def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY], - (instregex "^FCSELH")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>; -def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; -def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; -def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; -def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>; - -// FP data processing, scalar/vector, single/double precision -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; -def : InstRW<[Ampere1Write_5cyc_1XY], - (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; -def : InstRW<[Ampere1Write_5cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; -def : InstRW<[Ampere1Write_5cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; -def : InstRW<[Ampere1Write_5cyc_1X], - (instregex "^FCMPE?(S|D)")>; -def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X], - (instregex "^FCCMPE?(S|D)")>; -def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY], - (instregex "^FCSEL(S|D)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>; -def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>; -def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; -def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>; -def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>; -def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>; - -// FP miscellaneous instructions -def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; -def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>; -def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>; -def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>; -def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; -def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; -def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>; -def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; - -// Integer arithmetic and logical instructions -def : InstRW<[Ampere1Write_1cyc_1A], - (instregex "ADC(W|X)r", "SBC(W|X)r")>; -def : InstRW<[Ampere1Write_Arith], - (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>; -def : InstRW<[Ampere1Write_ArithFlagsetting], - (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>; -def : InstRW<[Ampere1Write_1cyc_1A], - (instregex "(ADC|SBC)S(W|X)r")>; -def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>; -def : InstRW<[Ampere1Write_1cyc_1A], - (instregex "(CCMN|CCMP)(X|W)")>; -def : InstRW<[Ampere1Write_1cyc_1A], - (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; -def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>; -def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>; -def : InstRW<[Ampere1Write_3cyc_1BS], - (instregex "(S|U)MULHr")>; -def : InstRW<[Ampere1Write_4cyc_1BS], - (instregex "(S|U)?M(ADD|SUB)L?r")>; - -// Integer load instructions -def : InstRW<[Ampere1Write_4cyc_2L], - (instregex "(LDNP|LDP|LDPSW)(X|W)")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDR(B|D|H|Q|S)ui")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDR(D|Q|W|X)l")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDTR(B|H|W|X)i")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDTRS(BW|BX|HW|HX|W)i")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDUR(BB|HH|X|W)i")>; -def : InstRW<[Ampere1Write_4cyc_1L], - (instregex "LDURS(BW|BX|HW|HX|W)i")>; -def : InstRW<[Ampere1Write_5cyc_1AB_1L], - (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; -def : InstRW<[Ampere1Write_1cyc_1L], - (instrs PRFMl, PRFUMi, PRFUMi)>; -def : InstRW<[Ampere1Write_2cyc_1AB_1L], - (instrs PRFMroW, PRFMroX)>; - -// Integer miscellaneous instructions -def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>; -def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>; -def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; -def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; -def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>; -def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>; -def : InstRW<[Ampere1Write_1cyc_1AB], - (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; -def : InstRW<[Ampere1Write_1cyc_1B], - (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; -def : InstRW<[Ampere1Write_1cyc_1B], - (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; - -// Integer store instructions -def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>; -def : InstRW<[Ampere1Write_2cyc_1B_1S], - (instrs STPWi, STPXi)>; -def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB], - (instregex "STP(W|X)(pre|post)")>; -def : InstRW<[Ampere1Write_1cyc_1S], - (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; -def : InstRW<[Ampere1Write_1cyc_1S], - (instregex "STUR(BB|HH|X|W)i", - "STR(X|W)ui", - "STUR(BB|HH|X|W)i")>; -def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>; -def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>; - -// Pointer authentication -//def : InstRW<[Ampere1Write_7cyc_1BS], -// (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>; -def : InstRW<[Ampere1Write_8cyc_1BS_1A], - (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; -def : InstRW<[Ampere1Write_8cyc_1BS_2A], - (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; -//def : InstRW<[Ampere1Write_7cyc_1BS], -// (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>; -def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>; -def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>; - -// Vector integer instructions -// -- absolute difference -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", - "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; -// -- arithmetic -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", - "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", - "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; -// -- arithmetic, horizontal, 16B -def : InstRW<[Ampere1Write_12cyc_4XY], - (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; -def : InstRW<[Ampere1Write_12cyc_4XY], - (instregex "^[SU](MIN|MAX)Vv16i8v")>; -// -- arithmetic, horizontal, 4H/4S -def : InstRW<[Ampere1Write_6cyc_2XY], - (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; -def : InstRW<[Ampere1Write_6cyc_2XY], - (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; -// -- arithmetic, horizontal, 8B/8H -def : InstRW<[Ampere1Write_9cyc_3XY], - (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; -def : InstRW<[Ampere1Write_9cyc_3XY], - (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; -// -- arithmetic, narrowing -def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; -def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; -// -- arithmetic, pairwise -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; -// -- arithmetic, saturating -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; -// -- bit count -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^(CLS|CLZ|CNT)v")>; -// -- compare -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", - "^CMHIv", "^CMHSv")>; -// -- compare non-zero -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>; -// -- dot product -def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; -// -- fp reciprocal estimate -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>; -// -- integer reciprocal estimate -def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; -// -- logical -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; -// -- logical, narrowing -def : InstRW<[Ampere1Write_5cyc_2XY], - (instregex "RSHRNv", - "SHRNv", "SQSHRNv", "SQSHRUNv", - "UQXTNv")>; -// -- matrix multiply -def : InstRW<[Ampere1Write_6cyc_2XY], - (instrs SMMLA, UMMLA, USMMLA)>; -// -- max/min -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; -// -- move immediate -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; -// -- multiply -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; -// -- multiply accumulate -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; -// -- negation, saturating -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; -// -- reverse bits/bytes -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; -// -- shift -def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// -- shift and accumulate -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; -// -- shift, saturating -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", - "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", - "^UQSHL")>; - -// Vector miscellaneous instructions -// -- duplicate element -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>; -// -- duplicate from GPR -def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>; -// -- extract narrow -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>; -// -- insert/extract element -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; -// -- move FP immediate -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>; -// -- move element to GPR -def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>; -// -- move from GPR to any element -def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; -// -- table lookup -def : InstRW<[Ampere1Write_2cyc_1XY], - (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; -def : InstRW<[Ampere1Write_4cyc_2XY], - (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; -def : InstRW<[Ampere1Write_6cyc_3XY], - (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; -def : InstRW<[Ampere1Write_8cyc_4XY], - (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; -// -- transpose -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; -// -- zip/unzip -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; - -} // SchedModel = Ampere1Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td b/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td deleted file mode 100644 index 9fbb469194..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td +++ /dev/null @@ -1,874 +0,0 @@ -//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for AArch64 Cyclone to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -def CycloneModel : SchedMachineModel { - let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. - let MicroOpBufferSize = 192; // Based on the reorder buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 16; // 14-19 cycles are typical. - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Cyclone. - -// 4 integer pipes -def CyUnitI : ProcResource<4> { - let BufferSize = 48; -} - -// 2 branch units: I[0..1] -def CyUnitB : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 indirect-branch unit: I[0] -def CyUnitBR : ProcResource<1> { - let Super = CyUnitB; -} - -// 2 shifter pipes: I[2..3] -// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI -def CyUnitIS : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 mul pipe: I[0] -def CyUnitIM : ProcResource<1> { - let Super = CyUnitBR; - let BufferSize = 32; -} - -// 1 div pipe: I[1] -def CyUnitID : ProcResource<1> { - let Super = CyUnitB; - let BufferSize = 16; -} - -// 1 integer division unit. This is driven by the ID pipe, but only -// consumes the pipe for one cycle at issue and another cycle at writeback. -def CyUnitIntDiv : ProcResource<1>; - -// 2 ld/st pipes. -def CyUnitLS : ProcResource<2> { - let BufferSize = 28; -} - -// 3 fp/vector pipes. -def CyUnitV : ProcResource<3> { - let BufferSize = 48; -} -// 2 fp/vector arithmetic and multiply pipes: V[0-1] -def CyUnitVM : ProcResource<2> { - let Super = CyUnitV; - let BufferSize = 32; -} -// 1 fp/vector division/sqrt pipe: V[2] -def CyUnitVD : ProcResource<1> { - let Super = CyUnitV; - let BufferSize = 16; -} -// 1 fp compare pipe: V[0] -def CyUnitVC : ProcResource<1> { - let Super = CyUnitVM; - let BufferSize = 16; -} - -// 2 fp division/square-root units. These are driven by the VD pipe, -// but only consume the pipe for one cycle at issue and a cycle at writeback. -def CyUnitFloatDiv : ProcResource<2>; - -//===----------------------------------------------------------------------===// -// Define scheduler read/write resources and latency on Cyclone. -// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. - -let SchedModel = CycloneModel in { - -//--- -// 7.8.1. Moves -//--- - -// A single nop micro-op (uX). -def WriteX : SchedWriteRes<[]> { let Latency = 0; } - -// Move zero is a register rename (to machine register zero). -// The move is replaced by a single nop micro-op. -// MOVZ Rd, #0 -// AND Rd, Rzr, #imm -def WriteZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>; -def WriteImmZ : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; - -// Move GPR is a register rename and single nop micro-op. -// ORR Xd, XZR, Xm -// ADD Xd, Xn, #0 -def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(*MI)}]>; -def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(*MI)}]>; -def WriteMov : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; -def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; - -// Move non-zero immediate is an integer ALU op. -// MOVN,MOVZ,MOVK -def : WriteRes; - -//--- -// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, -// Shifts and Bitfield Operations -//--- - -// ADR,ADRP -// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri -// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr -// ADC(S),SBC(S) -// Aliases: CMN, CMP, TST -// -// Conditional operations. -// CCMNi,CCMPi,CCMNr,CCMPr, -// CSEL,CSINC,CSINV,CSNEG -// -// Bit counting and reversal operations. -// CLS,CLZ,RBIT,REV,REV16,REV32 -def : WriteRes; - -// ADD with shifted register operand is a single micro-op that -// consumes a shift pipeline for two cycles. -// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs -// EXAMPLE: ADDrs Xn, Xm LSL #imm -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -// ADD with extended register operand is the same as shifted reg operand. -// ADD(S)re,SUB(S)re -// EXAMPLE: ADDXre Xn, Xm, UXTB #1 -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -// Variable shift and bitfield operations. -// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM -def : WriteRes; - -// EXTR Shifts a pair of registers and requires two micro-ops. -// The second micro-op is delayed, as modeled by ReadExtrHi. -// EXTR Xn, Xm, #imm -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} - -// EXTR's first register read is delayed by one cycle, effectively -// shortening its writer's latency. -// EXTR Xn, Xm, #imm -def : ReadAdvance; - -//--- -// 7.8.6. Multiplies -//--- - -// MUL/MNEG are aliases for MADD/MSUB. -// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL -def : WriteRes { - let Latency = 4; -} -// MADDX,MSUBX,SMULH,UMULH -def : WriteRes { - let Latency = 5; -} - -//--- -// 7.8.7. Divide -//--- - -// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVW,UDIVW -def : WriteRes { - let Latency = 10; - let ResourceCycles = [2, 10]; -} -// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVX,UDIVX -def : WriteRes { - let Latency = 13; - let ResourceCycles = [2, 13]; -} - -//--- -// 7.8.8,7.8.10. Load/Store, single element -//--- - -// Integer loads take 4 cycles and use one LS unit for one cycle. -def : WriteRes { - let Latency = 4; -} - -// Store-load forwarding is 4 cycles. -// -// Note: The store-exclusive sequence incorporates this -// latency. However, general heuristics should not model the -// dependence between a store and subsequent may-alias load because -// hardware speculation works. -def : WriteRes { - let Latency = 4; -} - -// Load from base address plus an optionally scaled register offset. -// Rt latency is latency WriteIS + WriteLD. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def CyWriteLDIdx : SchedWriteVariant<[ - SchedVar, // Load from scaled register. - SchedVar]>; // Load from register offset. -def : SchedAlias; // Map AArch64->Cyclone type. - -// EXAMPLE: STR Xn, Xm [, lsl 3] -def CyWriteSTIdx : SchedWriteVariant<[ - SchedVar, // Store to scaled register. - SchedVar]>; // Store to register offset. -def : SchedAlias; // Map AArch64->Cyclone type. - -// Read the (unshifted) base register Xn in the second micro-op one cycle later. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def ReadBaseRS : SchedReadAdvance<1>; -def CyReadAdrBase : SchedReadVariant<[ - SchedVar, // Read base reg after shifting offset. - SchedVar]>; // Read base reg with no shift. -def : SchedAlias; // Map AArch64->Cyclone type. -def : ReadAdvance; - -//--- -// 7.8.9,7.8.11. Load/Store, paired -//--- - -// Address pre/post increment is a simple ALU op with one cycle latency. -def : WriteRes; - -// LDP high register write is fused with the load, but a nop micro-op remains. -def : WriteRes { - let Latency = 4; -} - -// STP is a vector op and store, except for QQ, which is just two stores. -def : SchedAlias; -def : InstRW<[WriteST, WriteST], (instrs STPQi)>; - -//--- -// 7.8.13. Branches -//--- - -// Branches take a single micro-op. -// The misprediction penalty is defined as a SchedMachineModel property. -def : WriteRes {let Latency = 0;} -def : WriteRes {let Latency = 0;} - -//--- -// 7.8.14. Never-issued Instructions, Barrier and Hint Operations -//--- - -// NOP,SEV,SEVL,WFE,WFI,YIELD -def : WriteRes {let Latency = 0;} -// ISB -def : InstRW<[WriteI], (instrs ISB)>; -// SLREX,DMB,DSB -def : WriteRes; - -// System instructions get an invalid latency because the latency of -// other operations across them is meaningless. -def : WriteRes {let Latency = -1;} - -//===----------------------------------------------------------------------===// -// 7.9 Vector Unit Instructions - -// Simple vector operations take 2 cycles. -def : WriteRes {let Latency = 2;} -def : WriteRes {let Latency = 2;} - -// Define some longer latency vector op types for Cyclone. -def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} -def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} -def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} - -// Simple floating-point operations take 2 cycles. -def : WriteRes {let Latency = 2;} - -//--- -// 7.9.1 Vector Moves -//--- - -// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently -// generates expensive int-float conversion instead: -// FMOVDi Dd, #0.0 -// FMOVv2f64ns Vd.2d, #0.0 - -// FMOVSi,FMOVDi -def : WriteRes {let Latency = 2;} - -// MOVI,MVNI are WriteV -// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV - -// Move FPR is a register rename and single nop micro-op. -// ORR.16b Vd,Vn,Vn -// COPY is handled above in the WriteMov Variant. -def WriteVMov : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : InstRW<[WriteVMov], (instrs ORRv16i8)>; - -// FMOVSr,FMOVDr are WriteF. - -// MOV V,V is a WriteV. - -// CPY D,V[x] is a WriteV - -// INS V[x],V[y] is a WriteV. - -// FMOVWSr,FMOVXDr,FMOVXDHighr -def : WriteRes { - let Latency = 5; -} - -// FMOVSWr,FMOVDXr -def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; - -// INS V[x],R -def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; - -// SMOV,UMOV R,V[x] -def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; -def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; - -// DUP V,R -def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; - -// DUP V,V[x] is a WriteV. - -//--- -// 7.9.2 Integer Arithmetic, Logical, and Comparisons -//--- - -// BIC,ORR V,#imm are WriteV - -def : InstRW<[CyWriteV3], (instregex "ABSv")>; - -// MVN,NEG,NOT are WriteV - -def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; - -// ADDP is a WriteV. -def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; - -def : InstRW<[CyWriteV3], - (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; - -def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; - -// ADD,SUB are WriteV - -// Forward declare. -def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} - -// Add/Diff and accumulate uses the vector multiply unit. -def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVAccum : SchedReadAdvance<1, - [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SADALP","UADALP")>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SABAv","UABAv","SABALv","UABALv")>; - -def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; - -def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; - -def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; - -// WriteV includes: -// AND,BIC,CMTST,EOR,ORN,ORR -// ADDP -// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD -// SADDL,SSUBL,UADDL,USUBL -// SADDW,SSUBW,UADDW,USUBW - -def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", - "CMLEv","CMLTv", - "CMHIv","CMHSv")>; - -def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", - "SMAXPv","SMINPv","UMAXPv","UMINPv")>; - -def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", - "SABDLv","UABDLv")>; - -//--- -// 7.9.3 Floating Point Arithmetic and Comparisons -//--- - -// FABS,FNEG are WriteF - -def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; -def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; - -def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", - "FMINPv2i","FMINNMPv2i")>; - -def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; - -def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, - FSUBSrr,FSUBv2f32,FSUBv4f32, - FADDPv2f32,FADDPv4f32, - FABD32,FABDv2f32,FABDv4f32)>; -def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, - FSUBDrr,FSUBv2f64, - FADDPv2f64, - FABD64,FABDv2f64)>; - -def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; - -def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", - "FMAXS","FMAXD","FMAXv", - "FMINS","FMIND","FMINv", - "FMAXNMS","FMAXNMD","FMAXNMv", - "FMINNMS","FMINNMD","FMINNMv", - "FMAXPv2f","FMAXPv4f", - "FMINPv2f","FMINPv4f", - "FMAXNMPv2f","FMAXNMPv4f", - "FMINNMPv2f","FMINNMPv4f")>; - -// FCMP,FCMPE,FCCMP,FCCMPE -def : WriteRes {let Latency = 4;} - -// FCSEL is a WriteF. - -//--- -// 7.9.4 Shifts and Bitfield Operations -//--- - -// SHL is a WriteV - -def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; - -def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; - -// Shift and accumulate uses the vector multiply unit. -def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVShiftAcc : SchedReadAdvance<1, - [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; -def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], - (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; - -// SSHL,USHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; - -// SQSHL,SQSHLU,UQSHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; - -// WriteV includes: -// SHLL,SSHLL,USHLL -// SLI,SRI -// BIF,BIT,BSL,BSP -// EXT -// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN -// XTN2 - -def : InstRW<[CyWriteV4], - (instregex "RSHRNv","SHRNv", - "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", - "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; - -//--- -// 7.9.5 Multiplication -//--- - -def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} -def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", - "SQDMULLv","SQDMULHv","SQRDMULHv")>; - -// FMUL,FMULX,FNMUL default to WriteFMul. -def : WriteRes { let Latency = 4;} - -def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} -def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, - FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; - -def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; -def : InstRW<[CyWriteVMul, CyReadVMulAcc], - (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", - "SQDMLAL","SQDMLSL")>; - -def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} -def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} -def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; -def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; - -def : InstRW<[CyWriteSMul, CyReadSMul], - (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, - FMLAv2f32,FMLAv4f32, - FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; -def : InstRW<[CyWriteDMul, CyReadDMul], - (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, - FMLAv2f64,FMLAv2i64_indexed, - FMLSv2f64,FMLSv2i64_indexed)>; - -def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } -def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; - -//--- -// 7.9.6 Divide and Square Root -//--- - -// FDIV,FSQRT -// TODO: Add 64-bit variant with 19 cycle latency. -// TODO: Specialize FSQRT for longer latency. -def : WriteRes { - let Latency = 17; - let ResourceCycles = [2, 17]; -} - -def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; - -def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } -def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; - -def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } -def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } -def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; -def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; - -//--- -// 7.9.7 Integer-FP Conversions -//--- - -// FCVT lengthen f16/s32 -def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; - -// FCVT,FCVTN,FCVTXN -// SCVTF,UCVTF V,V -// FRINT(AIMNPXZ) V,V -def : WriteRes {let Latency = 4;} - -// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. -def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; - -// FCVT Rd, S/D = V6+LD4: 10 cycles -def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; -def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; - -// FCVTL is a WriteV - -//--- -// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup -//--- - -def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} -def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, - AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, - SHA1SU0rrr)>; - -def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} -def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; - -def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} -def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, - SHA256Hrrr,SHA256H2rrr)>; - -// TRN,UZP,ZUP are WriteV. - -// TBL,TBX are WriteV. - -//--- -// 7.9.11-7.9.14 Load/Store, single element and paired -//--- - -// Loading into the vector unit takes 5 cycles vs 4 for integer loads. -def : WriteRes { - let Latency = 5; -} - -// Store-load forwarding is 4 cycles. -def : WriteRes { - let Latency = 4; -} - -// WriteVLDPair/VSTPair sequences are expanded by the target description. - -//--- -// 7.9.15 Load, element operations -//--- - -// Only the first WriteVLD and WriteAdr for writeback matches def operands. -// Subsequent WriteVLDs consume resources. Since all loaded values have the -// same latency, this is acceptable. - -// Vd is read 5 cycles after issuing the vector load. -def : ReadAdvance; - -def : InstRW<[WriteVLD], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -// Register writes from the load's high half are fused micro-ops. -def : InstRW<[WriteVLD], - (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], - (instregex "LD1i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], - (instregex "LD1i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; - -def : InstRW<[WriteVLDShuffle], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVLDShuffle, WriteVq], - (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq], - (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq], - (instregex "LD2i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq], - (instregex "LD2i(8|16|32)_POST")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq], - (instregex "LD2i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq], - (instregex "LD2i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVq], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq], - (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq], - (instregex "LD3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq], - (instregex "LD3i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq], - (instregex "LD3i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq], - (instregex "LD3i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq], - (instregex "LD3i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq], - (instrs LD3Rv1d,LD3Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq], - (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq], - (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq], - (instregex "LD4Fourv(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq], - (instregex "LD4i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq], - (instregex "LD4i(8|16|32)_POST")>; - - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq], - (instrs LD4i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq], - (instrs LD4i64_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq], - (instrs LD4Rv1d,LD4Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq], - (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; - -//--- -// 7.9.16 Store, element operations -//--- - -// Only the WriteAdr for writeback matches a def operands. -// Subsequent WriteVLDs only consume resources. - -def : InstRW<[WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; -def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; - -def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; -def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; - -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; - -// Atomic operations are not supported. -def : WriteRes { let Unsupported = 1; } - -//--- -// Unused SchedRead types -//--- - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -} // SchedModel = CycloneModel diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td deleted file mode 100644 index d66efb82fc..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td +++ /dev/null @@ -1,877 +0,0 @@ -//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Samsung Exynos M3 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The Exynos-M3 is an advanced superscalar microprocessor with a 6-wide -// in-order stage for decode and dispatch and a wider issue stage. -// The execution units and loads and stores are out-of-order. - -def ExynosM3Model : SchedMachineModel { - let IssueWidth = 6; // Up to 6 uops per cycle. - let MicroOpBufferSize = 228; // ROB size. - let LoopMicroOpBufferSize = 40; // Based on the instruction queue size. - let LoadLatency = 4; // Optimistic load cases. - let MispredictPenalty = 16; // Minimum branch misprediction penalty. - let CompleteModel = 1; // Use the default model otherwise. - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on the Exynos-M3, -// which has 12 pipelines, each with its own queue with out-of-order dispatch. - -let SchedModel = ExynosM3Model in { - -def M3UnitA : ProcResource<2>; // Simple integer -def M3UnitC : ProcResource<2>; // Simple and complex integer -def M3UnitD : ProcResource<1>; // Integer division (inside C0, serialized) -def M3UnitB : ProcResource<2>; // Branch -def M3UnitL : ProcResource<2>; // Load -def M3UnitS : ProcResource<1>; // Store -def M3PipeF0 : ProcResource<1>; // FP #0 -let Super = M3PipeF0 in { - def M3UnitFMAC0 : ProcResource<1>; // FP multiplication - def M3UnitFADD0 : ProcResource<1>; // Simple FP - def M3UnitFCVT0 : ProcResource<1>; // FP conversion - def M3UnitFSQR : ProcResource<2>; // FP square root (serialized) - def M3UnitNALU0 : ProcResource<1>; // Simple vector - def M3UnitNMSC : ProcResource<1>; // FP and vector miscellanea - def M3UnitNSHT0 : ProcResource<1>; // Vector shifting - def M3UnitNSHF0 : ProcResource<1>; // Vector shuffling -} -def M3PipeF1 : ProcResource<1>; // FP #1 -let Super = M3PipeF1 in { - def M3UnitFMAC1 : ProcResource<1>; // FP multiplication - def M3UnitFADD1 : ProcResource<1>; // Simple FP - def M3UnitFDIV0 : ProcResource<2>; // FP division (serialized) - def M3UnitFCVT1 : ProcResource<1>; // FP conversion - def M3UnitFST0 : ProcResource<1>; // FP store - def M3UnitNALU1 : ProcResource<1>; // Simple vector - def M3UnitNCRY0 : ProcResource<1>; // Cryptographic - def M3UnitNMUL : ProcResource<1>; // Vector multiplication - def M3UnitNSHT1 : ProcResource<1>; // Vector shifting - def M3UnitNSHF1 : ProcResource<1>; // Vector shuffling -} -def M3PipeF2 : ProcResource<1>; // FP #2 -let Super = M3PipeF2 in { - def M3UnitFMAC2 : ProcResource<1>; // FP multiplication - def M3UnitFADD2 : ProcResource<1>; // Simple FP - def M3UnitFDIV1 : ProcResource<2>; // FP division (serialized) - def M3UnitFST1 : ProcResource<1>; // FP store - def M3UnitNALU2 : ProcResource<1>; // Simple vector - def M3UnitNCRY1 : ProcResource<1>; // Cryptographic - def M3UnitNSHT2 : ProcResource<1>; // Vector shifting - def M3UnitNSHF2 : ProcResource<1>; // Vector shuffling -} - - -def M3UnitALU : ProcResGroup<[M3UnitA, - M3UnitC]>; -def M3UnitFMAC : ProcResGroup<[M3UnitFMAC0, - M3UnitFMAC1, - M3UnitFMAC2]>; -def M3UnitFADD : ProcResGroup<[M3UnitFADD0, - M3UnitFADD1, - M3UnitFADD2]>; -def M3UnitFDIV : ProcResGroup<[M3UnitFDIV0, - M3UnitFDIV1]>; -def M3UnitFCVT : ProcResGroup<[M3UnitFCVT0, - M3UnitFCVT1]>; -def M3UnitFST : ProcResGroup<[M3UnitFST0, - M3UnitFST1]>; -def M3UnitNALU : ProcResGroup<[M3UnitNALU0, - M3UnitNALU1, - M3UnitNALU2]>; -def M3UnitNCRY : ProcResGroup<[M3UnitNCRY0, - M3UnitNCRY1]>; -def M3UnitNSHT : ProcResGroup<[M3UnitNSHT0, - M3UnitNSHT1, - M3UnitNSHT2]>; -def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0, - M3UnitNSHF1, - M3UnitNSHF2]>; - -//===----------------------------------------------------------------------===// -// Coarse scheduling model. - -def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0; - let NumMicroOps = 1; } -def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1; - let NumMicroOps = 0; } - -def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; } -def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2; - let ResourceCycles = [2]; } -def M3WriteAB : SchedWriteRes<[M3UnitALU, - M3UnitC]> { let Latency = 1; - let NumMicroOps = 2; } -def M3WriteAC : SchedWriteRes<[M3UnitALU, - M3UnitALU, - M3UnitC]> { let Latency = 2; - let NumMicroOps = 3; } -def M3WriteAD : SchedWriteRes<[M3UnitALU, - M3UnitC]> { let Latency = 2; - let NumMicroOps = 2; } -def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; } -def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; } -def M3WriteAU : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M3WriteAV : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M3WriteAW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M3WriteAX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M3WriteAY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; } -def M3WriteBX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; } -def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; } -def M3WriteLA : SchedWriteRes<[M3UnitL, - M3UnitL]> { let Latency = 5; - let NumMicroOps = 1; } -def M3WriteLB : SchedWriteRes<[M3UnitA, - M3UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteLC : SchedWriteRes<[M3UnitA, - M3UnitL, - M3UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteLD : SchedWriteRes<[M3UnitA, - M3UnitL]> { let Latency = 4; - let NumMicroOps = 2; } -def M3WriteLE : SchedWriteRes<[M3UnitA, - M3UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M3WriteLH : SchedWriteRes<[]> { let Latency = 5; - let NumMicroOps = 0; } -def M3WriteLX : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M3WriteLY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; } -def M3WriteSA : SchedWriteRes<[M3UnitA, - M3UnitS, - M3UnitFST]> { let Latency = 3; - let NumMicroOps = 2; } -def M3WriteSB : SchedWriteRes<[M3UnitA, - M3UnitS]> { let Latency = 2; - let NumMicroOps = 2; } -def M3WriteSC : SchedWriteRes<[M3UnitA, - M3UnitS, - M3UnitFST]> { let Latency = 1; - let NumMicroOps = 2; } -def M3WriteSY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M3ReadAdrBase : SchedReadVariant<[SchedVar, - SchedVar]>; - -// Branch instructions. -def : SchedAlias; -def : SchedAlias; - -// Arithmetic and logical integer instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Move instructions. -def : SchedAlias; - -// Divide and multiply instructions. -def : WriteRes { let Latency = 12; - let ResourceCycles = [1, 12]; } -def : WriteRes { let Latency = 21; - let ResourceCycles = [1, 21]; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; - let ResourceCycles = [2]; } - -// Miscellaneous instructions. -def : SchedAlias; - -// Addressing modes. -def : SchedAlias; -def : SchedAlias; - -// Load instructions. -def : SchedAlias; -def : WriteRes { let Latency = 4; - let NumMicroOps = 0; } -def : SchedAlias; - -// Store instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP data instructions. -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 12; - let ResourceCycles = [12]; } -def : WriteRes { let Latency = 4; } - -// FP miscellaneous instructions. -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// FP load instructions. -def : SchedAlias; - -// FP store instructions. -def : WriteRes { let Latency = 1; - let NumMicroOps = 1; } - -// ASIMD FP instructions. -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } - -// Other miscellaneous instructions. -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Generic fast forwarding. - -// TODO: Add FP register forwarding rules. - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -// TODO: The forwarding for 32 bits actually saves 2 cycles. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Finer scheduling model. - -def M3WriteNEONA : SchedWriteRes<[M3UnitNSHF, - M3UnitFADD]> { let Latency = 3; - let NumMicroOps = 2; } -def M3WriteNEONB : SchedWriteRes<[M3UnitNALU, - M3UnitFST]> { let Latency = 10; - let NumMicroOps = 2; } -def M3WriteNEOND : SchedWriteRes<[M3UnitNSHF, - M3UnitFST]> { let Latency = 6; - let NumMicroOps = 2; } -def M3WriteNEONH : SchedWriteRes<[M3UnitNALU, - M3UnitS]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF, - M3UnitS]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0, - M3UnitFDIV1]> { let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [8, 8]; } -def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0, - M3UnitFDIV1]> { let Latency = 12; - let NumMicroOps = 2; - let ResourceCycles = [13, 13]; } -def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR, - M3UnitFSQR]> { let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [19, 19]; } -def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR, - M3UnitFSQR]> { let Latency = 25; - let NumMicroOps = 2; - let ResourceCycles = [26, 26]; } -def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC, - M3UnitNMSC]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteFADD2 : SchedWriteRes<[M3UnitFADD]> { let Latency = 2; } -def M3WriteFCVT2 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 2; } -def M3WriteFCVT3 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 3; } -def M3WriteFCVT3A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 3; } -def M3WriteFCVT4A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 4; } -def M3WriteFCVT4 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 4; } -def M3WriteFDIV10 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 7; - let ResourceCycles = [8]; } -def M3WriteFDIV12 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 12; - let ResourceCycles = [13]; } -def M3WriteFMAC3 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 3; } -def M3WriteFMAC4 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 4; } -def M3WriteFMAC5 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 5; } -def M3WriteFSQR17 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 18; - let ResourceCycles = [19]; } -def M3WriteFSQR25 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 25; - let ResourceCycles = [26]; } -def M3WriteNALU1 : SchedWriteRes<[M3UnitNALU]> { let Latency = 1; } -def M3WriteNCRY1A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 1; } -def M3WriteNCRY3A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 3; } -def M3WriteNCRY5A : SchedWriteRes<[M3UnitNCRY]> { let Latency = 5; } -def M3WriteNMSC1 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 1; } -def M3WriteNMSC2 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 2; } -def M3WriteNMSC3 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 3; } -def M3WriteNMUL3 : SchedWriteRes<[M3UnitNMUL]> { let Latency = 3; } -def M3WriteNSHF1 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 1; } -def M3WriteNSHF3 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 3; } -def M3WriteNSHT1 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 1; } -def M3WriteNSHT2 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 2; } -def M3WriteNSHT3 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 3; } -def M3WriteVLDA : SchedWriteRes<[M3UnitL, - M3UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M3WriteVLDB : SchedWriteRes<[M3UnitL, - M3UnitL, - M3UnitL]> { let Latency = 6; - let NumMicroOps = 3; } -def M3WriteVLDC : SchedWriteRes<[M3UnitL, - M3UnitL, - M3UnitL, - M3UnitL]> { let Latency = 6; - let NumMicroOps = 4; } -def M3WriteVLDD : SchedWriteRes<[M3UnitL, - M3UnitNALU]> { let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [2, 1]; } -def M3WriteVLDE : SchedWriteRes<[M3UnitL, - M3UnitNALU]> { let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [2, 1]; } -def M3WriteVLDF : SchedWriteRes<[M3UnitL, - M3UnitL]> { let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [5, 5]; } -def M3WriteVLDG : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU]> { let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2, 1, 1]; } -def M3WriteVLDH : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [2, 1, 1]; } -def M3WriteVLDI : SchedWriteRes<[M3UnitL, - M3UnitL, - M3UnitL]> { let Latency = 12; - let NumMicroOps = 3; - let ResourceCycles = [6, 6, 6]; } -def M3WriteVLDJ : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU, - M3UnitNALU]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1, 1]; } -def M3WriteVLDK : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU, - M3UnitNALU, - M3UnitNALU]> { let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [4, 1, 1, 1, 1]; } -def M3WriteVLDL : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU, - M3UnitL, - M3UnitNALU]> { let Latency = 6; - let NumMicroOps = 5; - let ResourceCycles = [6, 1, 1, 6, 1]; } -def M3WriteVLDM : SchedWriteRes<[M3UnitL, - M3UnitNALU, - M3UnitNALU, - M3UnitL, - M3UnitNALU, - M3UnitNALU]> { let Latency = 7; - let NumMicroOps = 6; - let ResourceCycles = [6, 1, 1, 6, 1, 1]; } -def M3WriteVLDN : SchedWriteRes<[M3UnitL, - M3UnitL, - M3UnitL, - M3UnitL]> { let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [6, 6, 6, 6]; } -def M3WriteVSTA : WriteSequence<[WriteVST], 2>; -def M3WriteVSTB : WriteSequence<[WriteVST], 3>; -def M3WriteVSTC : WriteSequence<[WriteVST], 4>; -def M3WriteVSTD : SchedWriteRes<[M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1, 3, 1, 3]; } -def M3WriteVSTE : SchedWriteRes<[M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 8; - let NumMicroOps = 6; - let ResourceCycles = [1, 3, 1, 3, 1, 3]; } -def M3WriteVSTF : SchedWriteRes<[M3UnitNALU, - M3UnitFST, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 15; - let NumMicroOps = 7; - let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; } -def M3WriteVSTG : SchedWriteRes<[M3UnitNALU, - M3UnitFST, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 16; - let NumMicroOps = 9; - let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } -def M3WriteVSTH : SchedWriteRes<[M3UnitNALU, - M3UnitFST, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 14; - let NumMicroOps = 5; - let ResourceCycles = [1, 3, 3, 1, 3]; } -def M3WriteVSTI : SchedWriteRes<[M3UnitNALU, - M3UnitFST, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST, - M3UnitS, - M3UnitFST]> { let Latency = 17; - let NumMicroOps = 9; - let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } - -// Special cases. -def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; } -def M3WriteCOPY : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M3WriteMOVI : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Fast forwarding. -def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>; -def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4, - M3WriteFMAC5]>; -def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>; - -// Branch instructions -def : InstRW<[M3WriteB1], (instrs Bcc)>; -def : InstRW<[M3WriteA1], (instrs BL)>; -def : InstRW<[M3WriteBX], (instrs BLR)>; -def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>; -def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>; - -// Arithmetic and logical integer instructions. -def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>; -def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>; -def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>; -def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>; -def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>; -def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>; - -// Move instructions. -def : InstRW<[M3WriteCOPY], (instrs COPY)>; -def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>; -def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>; - -// Divide and multiply instructions. - -// Miscellaneous instructions. - -// Load instructions. -def : InstRW<[M3WriteLD, - WriteLDHi, - WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; -def : InstRW<[M3WriteLB, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>; -def : InstRW<[M3WriteLX, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>; -def : InstRW<[M3WriteLB, - ReadAdrBase], (instrs PRFMroW)>; -def : InstRW<[M3WriteLX, - ReadAdrBase], (instrs PRFMroX)>; - -// Store instructions. -def : InstRW<[M3WriteSB, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>; -def : InstRW<[WriteST, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>; - -// FP data instructions. -def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>; -def : InstRW<[M3WriteFADD2], (instregex "^F(ADD|SUB)[DS]rr")>; -def : InstRW<[M3WriteFDIV10], (instrs FDIVSrr)>; -def : InstRW<[M3WriteFDIV12], (instrs FDIVDrr)>; -def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN).+rr")>; -def : InstRW<[M3WriteFMAC3], (instregex "^FN?MUL[DS]rr")>; -def : InstRW<[M3WriteFMAC4, - M3ReadFMAC], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; -def : InstRW<[M3WriteNALU1], (instregex "^FNEG[DS]r")>; -def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT.+r")>; -def : InstRW<[M3WriteNEONH], (instregex "^FCSEL[DS]rrr")>; -def : InstRW<[M3WriteFSQR17], (instrs FSQRTSr)>; -def : InstRW<[M3WriteFSQR25], (instrs FSQRTDr)>; - -// FP miscellaneous instructions. -def : InstRW<[M3WriteFCVT3], (instregex "^FCVT[DHS][DHS]r")>; -def : InstRW<[M3WriteFCVT4A], (instregex "^[SU]CVTF[SU][XW][DHS]ri")>; -def : InstRW<[M3WriteFCVT3A], (instregex "^FCVT[AMNPZ][SU]U[XW][DHS]r")>; -def : InstRW<[M3WriteFCVT3A], (instregex "^FCVTZ[SU][dhs]")>; -def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][ir]")>; -def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>; -def : InstRW<[M3WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M3WriteFMAC4, - M3ReadFMAC], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; -def : InstRW<[M3WriteNALU1], (instregex "^FMOV[WX][DS]r")>; -def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][WX]r")>; -def : InstRW<[M3WriteNEONI], (instregex "^FMOV(DX|XD)Highr")>; - -// FP load instructions. -def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>; -def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>; -def : InstRW<[WriteVLD, - WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; -def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>; -def : InstRW<[M3WriteLE, - ReadAdrBase], (instregex "^LDR[BDHS]roW")>; -def : InstRW<[WriteVLD, - ReadAdrBase], (instregex "^LDR[BDHS]roX")>; -def : InstRW<[M3WriteLY, - ReadAdrBase], (instregex "^LDRQro[WX]")>; -def : InstRW<[WriteVLD, - M3WriteLH], (instregex "^LDN?P[DS]i")>; -def : InstRW<[M3WriteLA, - M3WriteLH], (instregex "^LDN?PQi")>; -def : InstRW<[M3WriteLB, - M3WriteLH, - WriteAdr], (instregex "^LDP[DS](post|pre)")>; -def : InstRW<[M3WriteLC, - M3WriteLH, - WriteAdr], (instregex "^LDPQ(post|pre)")>; - -// FP store instructions. -def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>; -def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>; -def : InstRW<[M3WriteSA, - ReadAdrBase], (instregex "^STR[BDHS]roW")>; -def : InstRW<[M3WriteSA, - ReadAdrBase], (instregex "^STRQroW")>; -def : InstRW<[WriteVST, - ReadAdrBase], (instregex "^STR[BDHS]roX")>; -def : InstRW<[M3WriteSY, - ReadAdrBase], (instregex "^STRQroX")>; -def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STP[DS](post|pre)")>; -def : InstRW<[M3WriteSC, - WriteAdr], (instregex "^STPQ(post|pre)")>; - -// ASIMD instructions. -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>; -def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>; -def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>; -def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU](ADD|SUB)[LW]V?v")>; -def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>; -def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>; -def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; -def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>; -def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; -def : InstRW<[M3WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; -def : InstRW<[M3WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M3WriteNMUL3], (instregex "^(MUL|SQR?DMULH)v")>; -def : InstRW<[M3WriteNMUL3, - M3ReadNMUL], (instregex "^ML[AS]v")>; -def : InstRW<[M3WriteNMUL3], (instregex "^[SU]ML[AS]Lv")>; -def : InstRW<[M3WriteNMUL3], (instregex "^SQDML[AS]L")>; -def : InstRW<[M3WriteNMUL3], (instregex "^(S|U|SQD)MULLv")>; -def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ADALPv")>; -def : InstRW<[M3WriteNSHT3], (instregex "^[SU]R?SRAv")>; -def : InstRW<[M3WriteNSHT1], (instregex "^SHL[dv]")>; -def : InstRW<[M3WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>; -def : InstRW<[M3WriteNSHT1], (instregex "^S[RS]I[dv]")>; -def : InstRW<[M3WriteNSHT2], (instregex "^[SU]?SHLLv")>; -def : InstRW<[M3WriteNSHT3], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; -def : InstRW<[M3WriteNSHT3], (instregex "^[SU]RSH[LR][dv]")>; -def : InstRW<[M3WriteNSHT3], (instregex "^[SU]QR?SHLU?[bdhsv]")>; - -// ASIMD FP instructions. -def : InstRW<[M3WriteNSHF1], (instregex "^FABSv")>; -def : InstRW<[M3WriteFADD2], (instregex "^F(ABD|ADD|SUB)v")>; -def : InstRW<[M3WriteNEONA], (instregex "^FADDP")>; -def : InstRW<[M3WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; -def : InstRW<[M3WriteFCVT3], (instregex "^FCVT(L|N|XN)v")>; -def : InstRW<[M3WriteFCVT2], (instregex "^FCVT[AMNPZ][SU]v")>; -def : InstRW<[M3WriteFCVT2], (instregex "^[SU]CVTFv")>; -def : InstRW<[M3WriteFDIV10], (instrs FDIVv2f32)>; -def : InstRW<[M3WriteNEONV], (instrs FDIVv4f32)>; -def : InstRW<[M3WriteNEONW], (instrs FDIVv2f64)>; -def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; -def : InstRW<[M3WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; -def : InstRW<[M3WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; -def : InstRW<[M3WriteFMAC3], (instregex "^FMULX?v.[fi]")>; -def : InstRW<[M3WriteFMAC4, - M3ReadFMAC], (instregex "^FML[AS]v.f")>; -def : InstRW<[M3WriteFMAC5, - M3ReadFMAC], (instregex "^FML[AS]v.i")>; -def : InstRW<[M3WriteNALU1], (instregex "^FNEGv")>; -def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; -def : InstRW<[M3WriteFSQR17], (instrs FSQRTv2f32)>; -def : InstRW<[M3WriteNEONX], (instrs FSQRTv4f32)>; -def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>; - -// ASIMD miscellaneous instructions. -def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>; -def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>; -def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>; -def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>; -def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>; -def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>; -def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>; -def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>; -def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>; -def : InstRW<[M3WriteFMAC4, - M3ReadFMAC], (instregex "^F(RECP|RSQRT)Sv")>; -def : InstRW<[M3WriteNSHF1], (instregex "^REV(16|32|64)v")>; -def : InstRW<[M3WriteNSHF1], (instregex "^TB[LX]v")>; -def : InstRW<[M3WriteNEOND], (instregex "^[SU]MOVv")>; -def : InstRW<[M3WriteNSHF3], (instregex "^INSv.+gpr")>; -def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>; - -// ASIMD load instructions. -def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteL5, - M3WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteL5, - M3WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDA, - M3WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDA, - M3WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDB, - M3WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDB, - M3WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDC, - M3WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDC, - M3WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>; -def : InstRW<[M3WriteVLDD, - M3WriteA1], (instregex "LD1i(8|16|32)_POST")>; -def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>; -def : InstRW<[M3WriteVLDE, - M3WriteA1], (instregex "LD1i(64)_POST")>; - -def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteL5, - M3WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteL5, - M3WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[M3WriteVLDF, - M3WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDF, - M3WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>; -def : InstRW<[M3WriteVLDG, - M3WriteA1], (instregex "LD2i(8|16|32)_POST")>; -def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>; -def : InstRW<[M3WriteVLDH, - M3WriteA1], (instregex "LD2i(64)_POST")>; - -def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDA, - M3WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDA, - M3WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[M3WriteVLDI, - M3WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDI, - M3WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[M3WriteVLDJ, - M3WriteA1], (instregex "LD3i(8|16|32)_POST")>; -def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>; -def : InstRW<[M3WriteVLDL, - M3WriteA1], (instregex "LD3i(64)_POST")>; - -def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDB, - M3WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDB, - M3WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[M3WriteVLDN, - M3WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDN, - M3WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>; -def : InstRW<[M3WriteVLDK, - M3WriteA1], (instregex "LD4i(8|16|32)_POST")>; -def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>; -def : InstRW<[M3WriteVLDM, - M3WriteA1], (instregex "LD4i(64)_POST")>; - -def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVLDC, - M3WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVLDC, - M3WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>; - -// ASIMD store instructions. -def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVSTA, - WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTA, - WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVSTB, - WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTB, - WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[M3WriteVSTC, - WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTC, - WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTD], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[M3WriteVSTD, - WriteAdr], (instregex "ST1i(8|16|32|64)_POST")>; - -def : InstRW<[M3WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[M3WriteVSTD, - WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTE, - WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTD], (instregex "ST2i(8|16|32)$")>; -def : InstRW<[M3WriteVSTD, - WriteAdr], (instregex "ST2i(8|16|32)_POST")>; -def : InstRW<[M3WriteVSTD], (instregex "ST2i(64)$")>; -def : InstRW<[M3WriteVSTD, - WriteAdr], (instregex "ST2i(64)_POST")>; - -def : InstRW<[M3WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[M3WriteVSTF, - WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTG, - WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTH], (instregex "ST3i(8|16|32)$")>; -def : InstRW<[M3WriteVSTH, - WriteAdr], (instregex "ST3i(8|16|32)_POST")>; -def : InstRW<[M3WriteVSTF], (instregex "ST3i(64)$")>; -def : InstRW<[M3WriteVSTF, - WriteAdr], (instregex "ST3i(64)_POST")>; - -def : InstRW<[M3WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[M3WriteVSTF, - WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST")>; -def : InstRW<[M3WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M3WriteVSTI, - WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[M3WriteVSTF], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[M3WriteVSTF, - WriteAdr], (instregex "ST4i(8|16|32|64)_POST")>; - -// Cryptography instructions. -def : InstRW<[M3WriteAES], (instregex "^AES[DE]")>; -def : InstRW<[M3WriteAES, - M3ReadAES], (instregex "^AESI?MC")>; - -def : InstRW<[M3WriteNCRY3A], (instregex "^PMULL?v")>; - -def : InstRW<[M3WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>; -def : InstRW<[M3WriteNCRY1A], (instregex "^SHA256SU0")>; -def : InstRW<[M3WriteNCRY5A], (instregex "^SHA256(H2?|SU1)")>; - -// CRC instructions. -def : InstRW<[M3WriteC2], (instregex "^CRC32")>; - -} // SchedModel = ExynosM3Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td deleted file mode 100644 index 94e70793e8..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td +++ /dev/null @@ -1,1017 +0,0 @@ -//=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Samsung Exynos M4 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The Exynos-M4 is an advanced superscalar microprocessor with a 6-wide -// in-order stage for decode and dispatch and a wider issue stage. -// The execution units and loads and stores are out-of-order. - -def ExynosM4Model : SchedMachineModel { - let IssueWidth = 6; // Up to 6 uops per cycle. - let MicroOpBufferSize = 228; // ROB size. - let LoopMicroOpBufferSize = 48; // Based on the instruction queue size. - let LoadLatency = 4; // Optimistic load cases. - let MispredictPenalty = 16; // Minimum branch misprediction penalty. - let CompleteModel = 1; // Use the default model otherwise. - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on the Exynos-M4. - -let SchedModel = ExynosM4Model in { - -def M4UnitA : ProcResource<2>; // Simple integer -def M4UnitC : ProcResource<2>; // Simple and complex integer -let Super = M4UnitC, BufferSize = 1 in -def M4UnitD : ProcResource<1>; // Integer division (inside C0, serialized) -let Super = M4UnitC in -def M4UnitE : ProcResource<1>; // CRC (inside C0) -def M4UnitB : ProcResource<2>; // Branch -def M4UnitL0 : ProcResource<1>; // Load -def M4UnitS0 : ProcResource<1>; // Store -def M4PipeLS : ProcResource<1>; // Load/Store -let Super = M4PipeLS in { - def M4UnitL1 : ProcResource<1>; - def M4UnitS1 : ProcResource<1>; -} -def M4PipeF0 : ProcResource<1>; // FP #0 -let Super = M4PipeF0 in { - def M4UnitFMAC0 : ProcResource<1>; // FP multiplication - def M4UnitFADD0 : ProcResource<1>; // Simple FP - def M4UnitFCVT0 : ProcResource<1>; // FP conversion - def M4UnitNALU0 : ProcResource<1>; // Simple vector - def M4UnitNHAD : ProcResource<1>; // Horizontal vector - def M4UnitNMSC : ProcResource<1>; // FP and vector miscellanea - def M4UnitNMUL0 : ProcResource<1>; // Vector multiplication - def M4UnitNSHT0 : ProcResource<1>; // Vector shifting - def M4UnitNSHF0 : ProcResource<1>; // Vector shuffling - def M4UnitNCRY0 : ProcResource<1>; // Cryptographic -} -def M4PipeF1 : ProcResource<1>; // FP #1 -let Super = M4PipeF1 in { - def M4UnitFMAC1 : ProcResource<1>; // FP multiplication - def M4UnitFADD1 : ProcResource<1>; // Simple FP - def M4UnitFDIV0 : ProcResource<2>; // FP division (serialized) - def M4UnitFSQR0 : ProcResource<2>; // FP square root (serialized) - def M4UnitFST0 : ProcResource<1>; // FP store - def M4UnitNALU1 : ProcResource<1>; // Simple vector - def M4UnitNSHT1 : ProcResource<1>; // Vector shifting - def M4UnitNSHF1 : ProcResource<1>; // Vector shuffling -} -def M4PipeF2 : ProcResource<1>; // FP #2 -let Super = M4PipeF2 in { - def M4UnitFMAC2 : ProcResource<1>; // FP multiplication - def M4UnitFADD2 : ProcResource<1>; // Simple FP - def M4UnitFCVT1 : ProcResource<1>; // FP conversion - def M4UnitFDIV1 : ProcResource<2>; // FP division (serialized) - def M4UnitFSQR1 : ProcResource<2>; // FP square root (serialized) - def M4UnitFST1 : ProcResource<1>; // FP store - def M4UnitNALU2 : ProcResource<1>; // Simple vector - def M4UnitNMUL1 : ProcResource<1>; // Vector multiplication - def M4UnitNSHT2 : ProcResource<1>; // Vector shifting - def M4UnitNCRY1 : ProcResource<1>; // Cryptographic -} - -def M4UnitALU : ProcResGroup<[M4UnitA, - M4UnitC]>; -def M4UnitL : ProcResGroup<[M4UnitL0, - M4UnitL1]>; -def M4UnitS : ProcResGroup<[M4UnitS0, - M4UnitS1]>; -def M4UnitFMAC : ProcResGroup<[M4UnitFMAC0, - M4UnitFMAC1, - M4UnitFMAC2]>; -def M4UnitFMACH : ProcResGroup<[M4UnitFMAC0, - M4UnitFMAC1]>; -def M4UnitFADD : ProcResGroup<[M4UnitFADD0, - M4UnitFADD1, - M4UnitFADD2]>; -def M4UnitFADDH : ProcResGroup<[M4UnitFADD0, - M4UnitFADD1]>; -def M4UnitFCVT : ProcResGroup<[M4UnitFCVT0, - M4UnitFCVT1]>; -def M4UnitFCVTH : ProcResGroup<[M4UnitFCVT0]>; -def M4UnitFDIV : ProcResGroup<[M4UnitFDIV0, - M4UnitFDIV1]>; -def M4UnitFDIVH : ProcResGroup<[M4UnitFDIV0]>; -def M4UnitFSQR : ProcResGroup<[M4UnitFSQR0, - M4UnitFSQR1]>; -def M4UnitFSQRH : ProcResGroup<[M4UnitFSQR0]>; -def M4UnitFST : ProcResGroup<[M4UnitFST0, - M4UnitFST1]>; -def M4UnitNALU : ProcResGroup<[M4UnitNALU0, - M4UnitNALU1, - M4UnitNALU2]>; -def M4UnitNALUH : ProcResGroup<[M4UnitNALU0, - M4UnitNALU1]>; -def M4UnitNMUL : ProcResGroup<[M4UnitNMUL0, - M4UnitNMUL1]>; -def M4UnitNSHT : ProcResGroup<[M4UnitNSHT0, - M4UnitNSHT1, - M4UnitNSHT2]>; -def M4UnitNSHF : ProcResGroup<[M4UnitNSHF0, - M4UnitNSHF1]>; -def M4UnitNSHFH : ProcResGroup<[M4UnitNSHF0]>; -def M4UnitNCRY : ProcResGroup<[M4UnitNCRY0, - M4UnitNCRY1]>; - -//===----------------------------------------------------------------------===// -// Resources details. - -def M4WriteZ0 : SchedWriteRes<[]> { let Latency = 0; } -def M4WriteZ1 : SchedWriteRes<[]> { let Latency = 1; - let NumMicroOps = 0; } -def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4; - let NumMicroOps = 0; } - -def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; } -def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; } -def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2; - let ResourceCycles = [2]; } -def M4WriteAB : SchedWriteRes<[M4UnitALU, - M4UnitC]> { let Latency = 2; - let NumMicroOps = 2; } -def M4WriteAC : SchedWriteRes<[M4UnitALU, - M4UnitALU, - M4UnitC]> { let Latency = 3; - let NumMicroOps = 3; } -def M4WriteAD : SchedWriteRes<[M4UnitALU, - M4UnitC]> { let Latency = 2; - let NumMicroOps = 2; } -def M4WriteAF : SchedWriteRes<[M4UnitALU]> { let Latency = 2; - let NumMicroOps = 2; } -def M4WriteAU : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M4WriteAV : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M4WriteAX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M4WriteAY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; } -def M4WriteBX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; } -def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; } -def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4; - let ResourceCycles = [2]; } - -def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; - let ResourceCycles = [12]; } -def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; - let ResourceCycles = [21]; } - -def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; } - -def M4WriteL4 : SchedWriteRes<[M4UnitL]> { let Latency = 4; } -def M4WriteL5 : SchedWriteRes<[M4UnitL]> { let Latency = 5; } -def M4WriteLA : SchedWriteRes<[M4UnitL, - M4UnitL]> { let Latency = 5; - let NumMicroOps = 1; } -def M4WriteLB : SchedWriteRes<[M4UnitA, - M4UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteLC : SchedWriteRes<[M4UnitA, - M4UnitL, - M4UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteLD : SchedWriteRes<[M4UnitA, - M4UnitL]> { let Latency = 4; - let NumMicroOps = 2; } -def M4WriteLE : SchedWriteRes<[M4UnitA, - M4UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M4WriteLH : SchedWriteRes<[]> { let Latency = 5; - let NumMicroOps = 0; } -def M4WriteLX : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M4WriteLY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M4WriteS1 : SchedWriteRes<[M4UnitS]> { let Latency = 1; } -def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; } -def M4WriteSB : SchedWriteRes<[M4UnitA, - M4UnitS]> { let Latency = 2; - let NumMicroOps = 1; } -def M4WriteSX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M4ReadAdrBase : SchedReadVariant<[SchedVar< - MCSchedPredicate< - CheckAny< - [ScaledIdxFn, - ExynosScaledIdxFn]>>, [ReadDefault]>, - SchedVar]>; - -def M4WriteNEONA : SchedWriteRes<[M4UnitNSHF, - M4UnitFADD]> { let Latency = 3; - let NumMicroOps = 2; } -def M4WriteNEONB : SchedWriteRes<[M4UnitNALU, - M4UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteNEOND : SchedWriteRes<[M4UnitNSHF, - M4UnitFST]> { let Latency = 6; - let NumMicroOps = 2; } -def M4WriteNEONH : SchedWriteRes<[M4UnitNALU, - M4UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteNEONI : SchedWriteRes<[M4UnitNSHF, - M4UnitS0]> { let Latency = 2; - let NumMicroOps = 2; } -def M4WriteNEONJ : SchedWriteRes<[M4UnitNMSC, - M4UnitS0]> { let Latency = 4; } -def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF, - M4UnitNMSC, - M4UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } -def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC, - M4UnitNMSC]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteNEONO : SchedWriteRes<[M4UnitNMSC, - M4UnitNMSC, - M4UnitNMSC]> { let Latency = 8; - let NumMicroOps = 3; } -def M4WriteNEONP : SchedWriteRes<[M4UnitNSHF, - M4UnitNMSC]> { let Latency = 4; - let NumMicroOps = 2; } -def M4WriteNEONQ : SchedWriteRes<[M4UnitNMSC, - M4UnitC]> { let Latency = 3; - let NumMicroOps = 1; } -def M4WriteNEONR : SchedWriteRes<[M4UnitFCVT0, - M4UnitS0]> { let Latency = 4; - let NumMicroOps = 1; } -def M4WriteNEONV : SchedWriteRes<[M4UnitFDIV, - M4UnitFDIV]> { let Latency = 7; - let ResourceCycles = [6, 6]; } -def M4WriteNEONVH : SchedWriteRes<[M4UnitFDIVH, - M4UnitFDIVH]> { let Latency = 7; - let ResourceCycles = [6, 6]; } -def M4WriteNEONW : SchedWriteRes<[M4UnitFDIV, - M4UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M4WriteNEONX : SchedWriteRes<[M4UnitFSQR, - M4UnitFSQR]> { let Latency = 8; - let ResourceCycles = [7, 7]; } -def M4WriteNEONXH : SchedWriteRes<[M4UnitFSQRH, - M4UnitFSQRH]> { let Latency = 7; - let ResourceCycles = [6, 6]; } -def M4WriteNEONY : SchedWriteRes<[M4UnitFSQR, - M4UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M4WriteNEONZ : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M4WriteFADD2 : SchedWriteRes<[M4UnitFADD]> { let Latency = 2; } -def M4WriteFADD2H : SchedWriteRes<[M4UnitFADDH]> { let Latency = 2; } - -def M4WriteFCVT2 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 2; } -def M4WriteFCVT2A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 2; } -def M4WriteFCVT2H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 2; } -def M4WriteFCVT3 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 3; } -def M4WriteFCVT3A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 3; } -def M4WriteFCVT3H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 3; } -def M4WriteFCVT4 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 4; } -def M4WriteFCVT4A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; } -def M4WriteFCVT6A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; } - -def M4WriteFDIV7 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 7; - let ResourceCycles = [6]; } -def M4WriteFDIV7H : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7; - let ResourceCycles = [6]; } -def M4WriteFDIV12 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M4WriteFMAC2H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; } -def M4WriteFMAC3H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; } -def M4WriteFMAC3 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 3; } -def M4WriteFMAC4 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 4; } -def M4WriteFMAC4H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; } -def M4WriteFMAC5 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 5; } - -def M4WriteFSQR7H : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7; - let ResourceCycles = [6]; } -def M4WriteFSQR8 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 8; - let ResourceCycles = [7]; } -def M4WriteFSQR12 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M4WriteNALU1 : SchedWriteRes<[M4UnitNALU]> { let Latency = 1; } -def M4WriteNALU1H : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; } - -def M4WriteNCRY1 : SchedWriteRes<[M4UnitNCRY]> { let Latency = 1; } -def M4WriteNCRY1A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 1; } -def M4WriteNCRY3A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 3; } -def M4WriteNCRY5A : SchedWriteRes<[M4UnitNCRY]> { let Latency = 5; } - -def M4WriteNHAD1 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 1; } -def M4WriteNHAD3 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 3; } - -def M4WriteNMSC1 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 1; } -def M4WriteNMSC2 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 2; } -def M4WriteNMSC3 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 3; } - -def M4WriteNMUL3 : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } - -def M4WriteNSHF1 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; } -def M4WriteNSHF1H : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; } -def M4WriteNSHF3 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; } -def M4WriteNSHFA : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; - let ResourceCycles = [2]; } -def M4WriteNSHFB : SchedWriteRes<[M4UnitNSHF]> { let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; } -def M4WriteNSHFC : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [4]; } -def M4WriteNSHFD : SchedWriteRes<[M4UnitNSHF]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [4]; } - -def M4WriteNSHT1 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 1; } -def M4WriteNSHT2 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 2; } -def M4WriteNSHT3 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 3; } -def M4WriteNSHT4A : SchedWriteRes<[M4UnitNSHT1]> { let Latency = 4; } - -def M4WriteVLDA : SchedWriteRes<[M4UnitL, - M4UnitL]> { let Latency = 5; - let NumMicroOps = 2; } -def M4WriteVLDB : SchedWriteRes<[M4UnitL, - M4UnitL, - M4UnitL]> { let Latency = 6; - let NumMicroOps = 3; } -def M4WriteVLDC : SchedWriteRes<[M4UnitL, - M4UnitL, - M4UnitL, - M4UnitL]> { let Latency = 6; - let NumMicroOps = 4; } -def M4WriteVLDD : SchedWriteRes<[M4UnitL, - M4UnitNSHF]> { let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [2, 1]; } -def M4WriteVLDF : SchedWriteRes<[M4UnitL, - M4UnitL]> { let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [3, 3]; } -def M4WriteVLDG : SchedWriteRes<[M4UnitL, - M4UnitNSHF, - M4UnitNSHF]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [2, 1, 1]; } -def M4WriteVLDI : SchedWriteRes<[M4UnitL, - M4UnitL, - M4UnitL]> { let Latency = 12; - let NumMicroOps = 3; - let ResourceCycles = [3, 3, 3]; } -def M4WriteVLDJ : SchedWriteRes<[M4UnitL, - M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [3, 1, 1, 1]; } -def M4WriteVLDK : SchedWriteRes<[M4UnitL, - M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF]> { let Latency = 7; - let NumMicroOps = 5; - let ResourceCycles = [3, 1, 1, 1, 1]; } -def M4WriteVLDL : SchedWriteRes<[M4UnitL, - M4UnitNSHF, - M4UnitNSHF, - M4UnitL, - M4UnitNSHF]> { let Latency = 7; - let NumMicroOps = 5; - let ResourceCycles = [3, 1, 1, 6, 1]; } -def M4WriteVLDM : SchedWriteRes<[M4UnitL, - M4UnitNSHF, - M4UnitNSHF, - M4UnitL, - M4UnitNSHF, - M4UnitNSHF]> { let Latency = 7; - let NumMicroOps = 6; - let ResourceCycles = [3, 1, 1, 3, 1, 1]; } -def M4WriteVLDN : SchedWriteRes<[M4UnitL, - M4UnitL, - M4UnitL, - M4UnitL]> { let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [3, 3, 3, 3]; } - -def M4WriteVST1 : SchedWriteRes<[M4UnitS, - M4UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M4WriteVSTA : WriteSequence<[WriteVST], 2>; -def M4WriteVSTB : WriteSequence<[WriteVST], 3>; -def M4WriteVSTC : WriteSequence<[WriteVST], 4>; -def M4WriteVSTD : SchedWriteRes<[M4UnitS, - M4UnitFST]> { let Latency = 2; } -def M4WriteVSTE : SchedWriteRes<[M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 2; - let NumMicroOps = 2; } -def M4WriteVSTF : SchedWriteRes<[M4UnitNSHF, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1, 2, 1]; } -def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 5; - let NumMicroOps = 6; - let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; } -def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF, - M4UnitNSHF, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; } -def M4WriteVSTJ : SchedWriteRes<[M4UnitA, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 1; - let NumMicroOps = 2; } -def M4WriteVSTK : SchedWriteRes<[M4UnitA, - M4UnitS, - M4UnitFST]> { let Latency = 3; - let NumMicroOps = 2; } -def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF, - M4UnitNSHF, - M4UnitS, - M4UnitFST, - M4UnitS, - M4UnitFST]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 2, 1, 2, 1]; } -def M4WriteVSTY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Special cases. -def M4WriteCOPY : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M4WriteMOVI : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Fast forwarding. -def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>; -def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4, - M4WriteFMAC4H, - M4WriteFMAC5]>; -def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>; -def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>; - - -//===----------------------------------------------------------------------===// -// Coarse scheduling model. - -// Branch instructions. -def : SchedAlias; -def : SchedAlias; - -// Arithmetic and logical integer instructions. -def : SchedAlias; -def : SchedAlias; // FIXME: M4WriteAX crashes TableGen. -def : SchedAlias; // FIXME: M4WriteAX crashes TableGen. -def : SchedAlias; - -// Move instructions. -def : SchedAlias; - -// Divide and multiply instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Miscellaneous instructions. -def : SchedAlias; - -// Addressing modes. -def : SchedAlias; -def : SchedAlias; - -// Load instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Store instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP data instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP miscellaneous instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP load instructions. -def : SchedAlias; - -// FP store instructions. -def : SchedAlias; - -// ASIMD FP instructions. -def : SchedAlias; -def : SchedAlias; - -// Other miscellaneous instructions. -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Generic fast forwarding. - -// TODO: Add FP register forwarding rules. - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -// TODO: The forwarding for 32 bits actually saves 2 cycles. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Finer scheduling model. - -// Branch instructions -def : InstRW<[M4WriteB1], (instrs Bcc)>; -def : InstRW<[M4WriteAF], (instrs BL)>; -def : InstRW<[M4WriteBX], (instrs BLR)>; -def : InstRW<[M4WriteC1], (instregex "^CBN?Z[WX]")>; -def : InstRW<[M4WriteAD], (instregex "^TBN?Z[WX]")>; - -// Arithmetic and logical integer instructions. -def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>; -def : InstRW<[M4WriteAU], (instrs ORRWrs, ORRXrs)>; -def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>; -def : InstRW<[M4WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>; -def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>; - -// Move instructions. -def : InstRW<[M4WriteCOPY], (instrs COPY)>; -def : InstRW<[M4WriteZ0], (instrs ADR, ADRP)>; -def : InstRW<[M4WriteZ0], (instregex "^MOV[NZ][WX]i")>; - -// Divide and multiply instructions. - -// Miscellaneous instructions. - -// Load instructions. -def : InstRW<[M4WriteLD, - WriteLDHi, - WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; -def : InstRW<[M4WriteL5, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>; -def : InstRW<[M4WriteL5, - ReadAdrBase], (instrs PRFMroW)>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instrs PRFMroX)>; - -// Store instructions. -def : InstRW<[M4WriteSB, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>; -def : InstRW<[WriteST, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>; - -// FP data instructions. -def : InstRW<[M4WriteNSHF1H], (instrs FABSHr)>; -def : InstRW<[M4WriteNSHF1], (instregex "^FABS[SD]r")>; -def : InstRW<[M4WriteFADD2H], (instregex "^F(ADD|SUB)Hrr")>; -def : InstRW<[M4WriteFADD2], (instregex "^F(ADD|SUB)[SD]rr")>; -def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.i16")>; -def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.i(32|64)")>; -def : InstRW<[M4WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>; -def : InstRW<[M4WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>; -def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>; -def : InstRW<[M4WriteFDIV7H], (instrs FDIVHrr)>; -def : InstRW<[M4WriteFDIV7], (instrs FDIVSrr)>; -def : InstRW<[M4WriteFDIV12], (instrs FDIVDrr)>; -def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>; -def : InstRW<[M4WriteFMAC3H], (instregex "^FN?MULHrr")>; -def : InstRW<[M4WriteFMAC3], (instregex "^FN?MUL[SD]rr")>; -def : InstRW<[M4WriteFMAC3H], (instrs FMULX16)>; -def : InstRW<[M4WriteFMAC3], (instregex "^FMULX(32|64)")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)Hrrr")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; -def : InstRW<[M4WriteNALU1H], (instrs FNEGHr)>; -def : InstRW<[M4WriteNALU1], (instregex "^FNEG[SD]r")>; -def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT.+r")>; -def : InstRW<[M4WriteNEONH], (instregex "^FCSEL[HSD]rrr")>; -def : InstRW<[M4WriteFSQR7H], (instrs FSQRTHr)>; -def : InstRW<[M4WriteFSQR8], (instrs FSQRTSr)>; -def : InstRW<[M4WriteFSQR12], (instrs FSQRTDr)>; - -// FP miscellaneous instructions. -def : InstRW<[M4WriteFCVT2H], (instregex "^FCVTH[SD]r")>; -def : InstRW<[M4WriteFCVT2H], (instregex "^FCVT[SD]Hr")>; -def : InstRW<[M4WriteFCVT2], (instregex "^FCVT[SD][SD]r")>; -def : InstRW<[M4WriteFCVT6A], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; -def : InstRW<[M4WriteNEONR], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; -def : InstRW<[M4WriteNALU1], (instregex "^FMOV[HSD][ir]")>; -def : InstRW<[M4WriteSA], (instregex "^FMOV[WX][HSD]r")>; -def : InstRW<[M4WriteNEONJ], (instregex "^FMOV[HSD][WX]r")>; -def : InstRW<[M4WriteNEONI], (instregex "^FMOVXDHighr")>; -def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>; -def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>; -def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>; -def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>; - -// FP load instructions. -def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; -def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>; -def : InstRW<[WriteVLD, - WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>; -def : InstRW<[M4WriteLE, - ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>; -def : InstRW<[WriteVLD, - ReadAdrBase], (instregex "^LDR[BHSD]roX")>; -def : InstRW<[M4WriteLY, - ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[WriteVLD, - M4WriteLH], (instregex "^LDN?P[SD]i")>; -def : InstRW<[M4WriteLA, - M4WriteLH], (instregex "^LDN?PQi")>; -def : InstRW<[M4WriteL5, - M4WriteLH, - WriteAdr], (instregex "^LDP[SD]post")>; -def : InstRW<[M4WriteLB, - M4WriteLH, - WriteAdr], (instrs LDPQpost)>; -def : InstRW<[M4WriteLB, - M4WriteLH, - WriteAdr], (instregex "^LDP[SD]pre")>; -def : InstRW<[M4WriteLC, - M4WriteLH, - WriteAdr], (instrs LDPQpre)>; - -// FP store instructions. -def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>; -def : InstRW<[M4WriteVSTK, - ReadAdrBase], (instregex "^STR[BHSD]roW")>; -def : InstRW<[M4WriteVSTK, - ReadAdrBase], (instrs STRQroW)>; -def : InstRW<[WriteVST, - ReadAdrBase], (instregex "^STR[BHSD]roX")>; -def : InstRW<[M4WriteVSTY, - ReadAdrBase], (instrs STRQroX)>; -def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>; -def : InstRW<[M4WriteVSTJ], (instregex "^STN?PQi")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STP[SD](post|pre)")>; -def : InstRW<[M4WriteVSTJ, - WriteAdr], (instregex "^STPQ(post|pre)")>; - -// ASIMD instructions. -def : InstRW<[M4WriteNHAD1], (instregex "^[SU]ABDL?v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ABAL?v")>; -def : InstRW<[M4WriteNMSC1], (instregex "^ABSv")>; -def : InstRW<[M4WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^(SU|US)QADDv")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]RHADDv")>; -def : InstRW<[M4WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>; -def : InstRW<[M4WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; -def : InstRW<[M4WriteNALU1], (instregex "^CMTSTv")>; -def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; -def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; -def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULM1], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M4WriteNMUL3, - M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; -def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>; -def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; -def : InstRW<[M4WriteNSHT1], (instregex "^SHL[dv]")>; -def : InstRW<[M4WriteNSHT1], (instregex "^S[LR]I[dv]")>; -def : InstRW<[M4WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>; -def : InstRW<[M4WriteNSHT2], (instregex "^[SU]?SHLLv")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>; - -// ASIMD FP instructions. -def : InstRW<[M4WriteNSHF1H], (instregex "^FABSv.f16")>; -def : InstRW<[M4WriteNSHF1], (instregex "^FABSv.f(32|64)")>; -def : InstRW<[M4WriteFADD2H], (instregex "^F(ABD|ADD|SUB)v.f16")>; -def : InstRW<[M4WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(32|64)")>; -def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.f16")>; -def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.f(32|64)")>; -def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; -def : InstRW<[M4WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>; -def : InstRW<[M4WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>; -def : InstRW<[M4WriteFCVT2H], (instregex "^[SU]CVTFv.[fi]16")>; -def : InstRW<[M4WriteFCVT2], (instregex "^[SU]CVTFv.[fi](32|64)")>; -def : InstRW<[M4WriteFDIV7H], (instrs FDIVv4f16)>; -def : InstRW<[M4WriteNEONVH], (instrs FDIVv8f16)>; -def : InstRW<[M4WriteFDIV7], (instrs FDIVv2f32)>; -def : InstRW<[M4WriteNEONV], (instrs FDIVv4f32)>; -def : InstRW<[M4WriteNEONW], (instrs FDIVv2f64)>; -def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; -def : InstRW<[M4WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; -def : InstRW<[M4WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; -def : InstRW<[M4WriteFMAC2H], (instregex "^FMULX?v.[fi]16")>; -def : InstRW<[M4WriteFMAC3], (instregex "^FMULX?v.[fi](32|64)")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^FML[AS]v.[fi]16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^FML[AS]v.[fi](32|64)")>; -def : InstRW<[M4WriteNALU1H], (instregex "^FNEGv.f16")>; -def : InstRW<[M4WriteNALU1], (instregex "^FNEGv.f(32|64)")>; -def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; -def : InstRW<[M4WriteFSQR7H], (instrs FSQRTv4f16)>; -def : InstRW<[M4WriteNEONXH], (instrs FSQRTv8f16)>; -def : InstRW<[M4WriteFSQR8], (instrs FSQRTv2f32)>; -def : InstRW<[M4WriteNEONX], (instrs FSQRTv4f32)>; -def : InstRW<[M4WriteNEONY], (instrs FSQRTv2f64)>; - -// ASIMD miscellaneous instructions. -def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>; -def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>; -def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>; -def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M4WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>; -def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>; -def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>; -def : InstRW<[M4WriteNEONB], (instregex "^INSv.+gpr")>; -def : InstRW<[M4WriteNSHF1], (instregex "^INSv.+lane")>; -def : InstRW<[M4WriteMOVI], (instregex "^(MOV|MVN)I")>; -def : InstRW<[M4WriteNALU1H], (instregex "^FMOVv.f16")>; -def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>; -def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>; -def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>; -def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>; -def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; -def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>; -def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; -def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; -def : InstRW<[M4WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>; -def : InstRW<[M4WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>; -def : InstRW<[M4WriteNEONP], (instregex "^[SU]MOVv")>; -def : InstRW<[M4WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>; - -// ASIMD load instructions. -def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, - M4WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, - M4WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDA, - M4WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDA, - M4WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDB, - M4WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDB, - M4WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDC, - M4WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDC, - M4WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDD], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[M4WriteVLDD, - M4WriteA1], (instregex "LD1i(8|16|32|64)_POST$")>; - -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, - M4WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, - M4WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[M4WriteVLDF, - M4WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDF, - M4WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDG], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[M4WriteVLDG, - M4WriteA1], (instregex "LD2i(8|16|32|64)_POST$")>; - -def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDA, - M4WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDA, - M4WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[M4WriteVLDI, - M4WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDI, - M4WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDJ], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[M4WriteVLDJ, - M4WriteA1], (instregex "LD3i(8|16|32)_POST$")>; -def : InstRW<[M4WriteVLDL], (instregex "LD3i64$")>; -def : InstRW<[M4WriteVLDL, - M4WriteA1], (instregex "LD3i64_POST$")>; - -def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDB, - M4WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDB, - M4WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[M4WriteVLDN, - M4WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDN, - M4WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVLDK], (instregex "LD4i(8|16|32)$")>; -def : InstRW<[M4WriteVLDK, - M4WriteA1], (instregex "LD4i(8|16|32)_POST$")>; -def : InstRW<[M4WriteVLDM], (instregex "LD4i64$")>; -def : InstRW<[M4WriteVLDM, - M4WriteA1], (instregex "LD4i64_POST$")>; - -def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVLDC, - M4WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVLDC, - M4WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; - -// ASIMD store instructions. -def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVST, - M4WriteA1], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST, - M4WriteA1], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVSTA, - M4WriteA1], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTA, - M4WriteA1], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVSTB, - M4WriteA1], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTB, - M4WriteA1], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[M4WriteVSTC, - M4WriteA1], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTC, - M4WriteA1], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[WriteVST, - M4WriteA1], (instregex "ST1i(8|16|32|64)_POST$")>; - -def : InstRW<[M4WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[M4WriteVSTD, - M4WriteA1], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTE, - M4WriteA1], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTD], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[M4WriteVSTD, - M4WriteA1], (instregex "ST2i(8|16|32|64)_POST$")>; - -def : InstRW<[M4WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[M4WriteVSTF, - M4WriteA1], (instregex "ST3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTG, - M4WriteA1], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTE], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[M4WriteVSTE, - M4WriteA1], (instregex "ST3i(8|16|32|64)_POST$")>; - -def : InstRW<[M4WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[M4WriteVSTL, - M4WriteA1], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[M4WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M4WriteVSTI, - M4WriteA1], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[M4WriteVSTE], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[M4WriteVSTE, - M4WriteA1], (instregex "ST4i(8|16|32|64)_POST$")>; - -// Cryptography instructions. -def : InstRW<[M4WriteNCRY1], (instregex "^AES[DE]")>; -def : InstRW<[M4WriteNCRY1, - M4ReadAESM1], (instregex "^AESI?MC")>; -def : InstRW<[M4WriteNCRY1A], (instregex "^PMULv")>; -def : InstRW<[M4WriteNCRY1A], (instregex "^PMULLv(1|8)i")>; -def : InstRW<[M4WriteNCRY3A], (instregex "^PMULLv(2|16)i")>; -def : InstRW<[M4WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>; -def : InstRW<[M4WriteNCRY1A], (instrs SHA256SU0rr)>; -def : InstRW<[M4WriteNCRY5A], (instrs SHA256SU1rrr)>; -def : InstRW<[M4WriteNCRY5A], (instrs SHA256H2rrr)>; - -// CRC instructions. -def : InstRW<[M4WriteE2], (instregex "^CRC32C?[BHWX]rr$")>; - -} // SchedModel = ExynosM4Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td deleted file mode 100644 index 1db5f5322a..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td +++ /dev/null @@ -1,1016 +0,0 @@ -//=- AArch64SchedExynosM5.td - Samsung Exynos M5 Sched Defs --*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Samsung Exynos M5 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The Exynos-M5 is an advanced superscalar microprocessor with a 6-wide -// in-order stage for decode and dispatch and a wider issue stage. -// The execution units and loads and stores are out-of-order. - -def ExynosM5Model : SchedMachineModel { - let IssueWidth = 6; // Up to 6 uops per cycle. - let MicroOpBufferSize = 228; // ROB size. - let LoopMicroOpBufferSize = 60; // Based on the instruction queue size. - let LoadLatency = 4; // Optimistic load cases. - let MispredictPenalty = 15; // Minimum branch misprediction penalty. - let CompleteModel = 1; // Use the default model otherwise. - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on the Exynos-M5. - -let SchedModel = ExynosM5Model in { - -def M5UnitA : ProcResource<2>; // Simple integer -def M5UnitC : ProcResource<2>; // Simple and complex integer -let Super = M5UnitC, BufferSize = 1 in -def M5UnitD : ProcResource<1>; // Integer division (inside C0, serialized) -def M5UnitE : ProcResource<2>; // Simple 32-bit integer -let Super = M5UnitC in -def M5UnitF : ProcResource<2>; // CRC (inside C) -def M5UnitB : ProcResource<1>; // Branch -def M5UnitL0 : ProcResource<1>; // Load -def M5UnitS0 : ProcResource<1>; // Store -def M5PipeLS : ProcResource<1>; // Load/Store -let Super = M5PipeLS in { - def M5UnitL1 : ProcResource<1>; - def M5UnitS1 : ProcResource<1>; -} -def M5PipeF0 : ProcResource<1>; // FP #0 -let Super = M5PipeF0 in { - def M5UnitFMAC0 : ProcResource<1>; // FP multiplication - def M5UnitFADD0 : ProcResource<1>; // Simple FP - def M5UnitNALU0 : ProcResource<1>; // Simple vector - def M5UnitNDOT0 : ProcResource<1>; // Dot product vector - def M5UnitNHAD : ProcResource<1>; // Horizontal vector - def M5UnitNMSC : ProcResource<1>; // FP and vector miscellanea - def M5UnitNMUL0 : ProcResource<1>; // Vector multiplication - def M5UnitNSHT0 : ProcResource<1>; // Vector shifting - def M5UnitNSHF0 : ProcResource<1>; // Vector shuffling - def M5UnitNCRY0 : ProcResource<1>; // Cryptographic -} -def M5PipeF1 : ProcResource<1>; // FP #1 -let Super = M5PipeF1 in { - def M5UnitFMAC1 : ProcResource<1>; // FP multiplication - def M5UnitFADD1 : ProcResource<1>; // Simple FP - def M5UnitFCVT0 : ProcResource<1>; // FP conversion - def M5UnitFDIV0 : ProcResource<2>; // FP division (serialized) - def M5UnitFSQR0 : ProcResource<2>; // FP square root (serialized) - def M5UnitFST0 : ProcResource<1>; // FP store - def M5UnitNALU1 : ProcResource<1>; // Simple vector - def M5UnitNDOT1 : ProcResource<1>; // Dot product vector - def M5UnitNSHT1 : ProcResource<1>; // Vector shifting - def M5UnitNSHF1 : ProcResource<1>; // Vector shuffling -} -def M5PipeF2 : ProcResource<1>; // FP #2 -let Super = M5PipeF2 in { - def M5UnitFMAC2 : ProcResource<1>; // FP multiplication - def M5UnitFADD2 : ProcResource<1>; // Simple FP - def M5UnitFCVT1 : ProcResource<1>; // FP conversion - def M5UnitFDIV1 : ProcResource<2>; // FP division (serialized) - def M5UnitFSQR1 : ProcResource<2>; // FP square root (serialized) - def M5UnitFST1 : ProcResource<1>; // FP store - def M5UnitNALU2 : ProcResource<1>; // Simple vector - def M5UnitNDOT2 : ProcResource<1>; // Dot product vector - def M5UnitNMUL1 : ProcResource<1>; // Vector multiplication - def M5UnitNSHT2 : ProcResource<1>; // Vector shifting - def M5UnitNCRY1 : ProcResource<1>; // Cryptographic -} - -def M5UnitAX : ProcResGroup<[M5UnitA, - M5UnitC]>; -def M5UnitAW : ProcResGroup<[M5UnitA, - M5UnitC, - M5UnitE]>; -def M5UnitL : ProcResGroup<[M5UnitL0, - M5UnitL1]>; -def M5UnitS : ProcResGroup<[M5UnitS0, - M5UnitS1]>; -def M5UnitFMAC : ProcResGroup<[M5UnitFMAC0, - M5UnitFMAC1, - M5UnitFMAC2]>; -def M5UnitFADD : ProcResGroup<[M5UnitFADD0, - M5UnitFADD1, - M5UnitFADD2]>; -def M5UnitFCVT : ProcResGroup<[M5UnitFCVT0, - M5UnitFCVT1]>; -def M5UnitFDIV : ProcResGroup<[M5UnitFDIV0, - M5UnitFDIV1]>; -def M5UnitFSQR : ProcResGroup<[M5UnitFSQR0, - M5UnitFSQR1]>; -def M5UnitFST : ProcResGroup<[M5UnitFST0, - M5UnitFST1]>; -def M5UnitNALU : ProcResGroup<[M5UnitNALU0, - M5UnitNALU1, - M5UnitNALU2]>; -def M5UnitNDOT : ProcResGroup<[M5UnitNDOT0, - M5UnitNDOT1, - M5UnitNDOT2]>; -def M5UnitNMUL : ProcResGroup<[M5UnitNMUL0, - M5UnitNMUL1]>; -def M5UnitNSHT : ProcResGroup<[M5UnitNSHT0, - M5UnitNSHT1, - M5UnitNSHT2]>; -def M5UnitNSHF : ProcResGroup<[M5UnitNSHF0, - M5UnitNSHF1]>; -def M5UnitNCRY : ProcResGroup<[M5UnitNCRY0, - M5UnitNCRY1]>; - -//===----------------------------------------------------------------------===// -// Resources details. - -def M5WriteZ0 : SchedWriteRes<[]> { let Latency = 0; } -def M5WriteZ1 : SchedWriteRes<[]> { let Latency = 1; - let NumMicroOps = 0; } -def M5WriteZ4 : SchedWriteRes<[]> { let Latency = 4; - let NumMicroOps = 0; } - -def M5WriteA1W : SchedWriteRes<[M5UnitAW]> { let Latency = 1; } -def M5WriteA1X : SchedWriteRes<[M5UnitAX]> { let Latency = 1; } -def M5WriteAAW : SchedWriteRes<[M5UnitAW]> { let Latency = 2; - let ResourceCycles = [2]; } -def M5WriteAAX : SchedWriteRes<[M5UnitAX]> { let Latency = 2; - let ResourceCycles = [2]; } -def M5WriteAB : SchedWriteRes<[M5UnitAX, - M5UnitC, - M5UnitE]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAC : SchedWriteRes<[M5UnitAX, - M5UnitAX, - M5UnitC]> { let Latency = 3; - let NumMicroOps = 3; } -def M5WriteAD : SchedWriteRes<[M5UnitAW, - M5UnitC]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAFW : SchedWriteRes<[M5UnitAW]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAFX : SchedWriteRes<[M5UnitAX]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAUW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAUX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAVW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAVX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAXW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAXX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAYW : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteAYX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteB1 : SchedWriteRes<[M5UnitB]> { let Latency = 1; } -def M5WriteBX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteC1 : SchedWriteRes<[M5UnitC]> { let Latency = 1; } -def M5WriteC2 : SchedWriteRes<[M5UnitC]> { let Latency = 2; } -def M5WriteCA : SchedWriteRes<[M5UnitC]> { let Latency = 3; - let ResourceCycles = [2]; } - -def M5WriteD10 : SchedWriteRes<[M5UnitD]> { let Latency = 10; - let ResourceCycles = [10]; } -def M5WriteD16 : SchedWriteRes<[M5UnitD]> { let Latency = 16; - let ResourceCycles = [16]; } - -def M5WriteF2 : SchedWriteRes<[M5UnitF]> { let Latency = 2; } - -def M5WriteL4 : SchedWriteRes<[M5UnitL]> { let Latency = 4; } -def M5WriteL5 : SchedWriteRes<[M5UnitL]> { let Latency = 5; } -def M5WriteL6 : SchedWriteRes<[M5UnitL]> { let Latency = 6; } -def M5WriteLA : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 1; } -def M5WriteLB : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteLC : SchedWriteRes<[M5UnitAX, - M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteLD : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 4; - let NumMicroOps = 2; } -def M5WriteLE : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 2; } -def M5WriteLFW : SchedWriteRes<[M5UnitAW, - M5UnitAW, - M5UnitAW, - M5UnitAW, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 6; - let ResourceCycles = [1, 1, 1, 1, 15]; } -def M5WriteLFX : SchedWriteRes<[M5UnitAX, - M5UnitAX, - M5UnitAX, - M5UnitAX, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 6; - let ResourceCycles = [1, 1, 1, 1, 15]; } -def M5WriteLGW : SchedWriteRes<[M5UnitAW, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1, 13]; } -def M5WriteLGX : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1, 13]; } -def M5WriteLH : SchedWriteRes<[]> { let Latency = 6; - let NumMicroOps = 0; } -def M5WriteLX : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteLY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteS1 : SchedWriteRes<[M5UnitS]> { let Latency = 1; } -def M5WriteSA : SchedWriteRes<[M5UnitS0]> { let Latency = 4; } -def M5WriteSB : SchedWriteRes<[M5UnitAX, - M5UnitS]> { let Latency = 2; - let NumMicroOps = 1; } -def M5WriteSX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5ReadAdrBase : SchedReadVariant<[SchedVar< - MCSchedPredicate< - CheckAny< - [ScaledIdxFn, - ExynosScaledIdxFn]>>, [ReadDefault]>, - SchedVar]>; - -def M5WriteNEONB : SchedWriteRes<[M5UnitNALU, - M5UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M5WriteNEONH : SchedWriteRes<[M5UnitNALU, - M5UnitS0]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteNEONI : SchedWriteRes<[M5UnitS0, - M5UnitNSHF]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteNEONK : SchedWriteRes<[M5UnitNSHF, - M5UnitFCVT0, - M5UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M5WriteNEONN : SchedWriteRes<[M5UnitNMSC, - M5UnitNMSC]> { let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [7, 7]; } -def M5WriteNEONO : SchedWriteRes<[M5UnitNMSC, - M5UnitNMSC, - M5UnitNMSC]> { let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [10, 10, 10]; } -def M5WriteNEONP : SchedWriteRes<[M5UnitNSHF, - M5UnitS0, - M5UnitFCVT]> { let Latency = 7; - let NumMicroOps = 2; } -def M5WriteNEONQ : SchedWriteRes<[M5UnitNMSC, - M5UnitC]> { let Latency = 3; - let NumMicroOps = 1; } -def M5WriteNEONU : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 7; - let ResourceCycles = [4, 4]; } -def M5WriteNEONV : SchedWriteRes<[M5UnitFDIV, - M5UnitFDIV]> { let Latency = 7; - let ResourceCycles = [6, 6]; } -def M5WriteNEONW : SchedWriteRes<[M5UnitFDIV, - M5UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M5WriteNEONX : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 8; - let ResourceCycles = [5, 5]; } -def M5WriteNEONY : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M5WriteNEONZ : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteFADD2 : SchedWriteRes<[M5UnitFADD]> { let Latency = 2; } - -def M5WriteFCVT2 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 2; } -def M5WriteFCVT2A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 2; } -def M5WriteFCVT3 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 3; } -def M5WriteFCVT3A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 3; } -def M5WriteFCVTA : SchedWriteRes<[M5UnitFCVT0, - M5UnitS0]> { let Latency = 3; - let NumMicroOps = 1; } -def M5WriteFCVTB : SchedWriteRes<[M5UnitFCVT, - M5UnitS0]> { let Latency = 4; - let NumMicroOps = 1; } -def M5WriteFCVTC : SchedWriteRes<[M5UnitFCVT, - M5UnitS0]> { let Latency = 6; - let NumMicroOps = 1; } - -def M5WriteFDIV5 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 5; - let ResourceCycles = [2]; } -def M5WriteFDIV7 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 7; - let ResourceCycles = [4]; } -def M5WriteFDIV12 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M5WriteFMAC3 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 3; } -def M5WriteFMAC4 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 4; } -def M5WriteFMAC5 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 5; } - -def M5WriteFSQR5 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 5; - let ResourceCycles = [2]; } -def M5WriteFSQR7 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 7; - let ResourceCycles = [4]; } -def M5WriteFSQR8 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 8; - let ResourceCycles = [5]; } -def M5WriteFSQR12 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M5WriteNALU1 : SchedWriteRes<[M5UnitNALU]> { let Latency = 1; } -def M5WriteNALU2 : SchedWriteRes<[M5UnitNALU]> { let Latency = 2; } - -def M5WriteNDOT2 : SchedWriteRes<[M5UnitNDOT]> { let Latency = 2; } - -def M5WriteNCRY2 : SchedWriteRes<[M5UnitNCRY]> { let Latency = 2; } -def M5WriteNCRY1A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 1; } -def M5WriteNCRY2A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 2; } -def M5WriteNCRY3A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 3; } -def M5WriteNCRY5A : SchedWriteRes<[M5UnitNCRY]> { let Latency = 5; } - -def M5WriteNHAD1 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 1; } -def M5WriteNHAD3 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 3; } - -def M5WriteNMSC1 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 1; } -def M5WriteNMSC2 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 2; } - -def M5WriteNMUL3 : SchedWriteRes<[M5UnitNMUL]> { let Latency = 3; } - -def M5WriteNSHF1 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 1; } -def M5WriteNSHF2 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; } -def M5WriteNSHFA : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; } -def M5WriteNSHFB : SchedWriteRes<[M5UnitNSHF]> { let Latency = 4; - let NumMicroOps = 2; } -def M5WriteNSHFC : SchedWriteRes<[M5UnitNSHF]> { let Latency = 6; - let NumMicroOps = 3; } -def M5WriteNSHFD : SchedWriteRes<[M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 4; } - -def M5WriteNSHT2 : SchedWriteRes<[M5UnitNSHT]> { let Latency = 2; } -def M5WriteNSHT4A : SchedWriteRes<[M5UnitNSHT1]> { let Latency = 4; } - -def M5WriteVLDA : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteVLDB : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 3; } -def M5WriteVLDC : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 4; } -def M5WriteVLDD : SchedWriteRes<[M5UnitL, - M5UnitNSHF]> { let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [2, 1]; } -def M5WriteVLDF : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [6, 5]; } -def M5WriteVLDG : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2, 1, 1]; } -def M5WriteVLDI : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 3; } -def M5WriteVLDJ : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 4; } -def M5WriteVLDK : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 5; } -def M5WriteVLDL : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitL, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 5; } -def M5WriteVLDM : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitL, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 6; } -def M5WriteVLDN : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [2, 2, 2, 2]; } - -def M5WriteVST1 : SchedWriteRes<[M5UnitS, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTA : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteVSTB : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 3; - let NumMicroOps = 3; } -def M5WriteVSTC : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 4; } -def M5WriteVSTD : SchedWriteRes<[M5UnitS, - M5UnitFST]> { let Latency = 2; } -def M5WriteVSTE : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 2; - let NumMicroOps = 1; } -def M5WriteVSTF : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 3; } -def M5WriteVSTG : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 5; } -def M5WriteVSTH : SchedWriteRes<[M5UnitS0, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTI : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; } -def M5WriteVSTJ : SchedWriteRes<[M5UnitA, - M5UnitS0, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTK : SchedWriteRes<[M5UnitAX, - M5UnitS, - M5UnitFST]> { let Latency = 3; - let NumMicroOps = 2; } -def M5WriteVSTL : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 2, 1, 2, 1]; } -def M5WriteVSTY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Special cases. -def M5WriteCOPY : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteMOVI : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Fast forwarding. -def M5ReadFM1 : SchedReadAdvance<+1, [M5WriteF2]>; -def M5ReadAESM2 : SchedReadAdvance<+2, [M5WriteNCRY2]>; -def M5ReadFMACM1 : SchedReadAdvance<+1, [M5WriteFMAC4, - M5WriteFMAC5]>; -def M5ReadNMULM1 : SchedReadAdvance<+1, [M5WriteNMUL3]>; - -//===----------------------------------------------------------------------===// -// Coarse scheduling model. - -// Branch instructions. -def : SchedAlias; -def : SchedAlias; - -// Arithmetic and logical integer instructions. -def : SchedAlias; -def : SchedAlias; // FIXME: M5WriteAX crashes TableGen. -def : SchedAlias; // FIXME: M5WriteAX crashes TableGen. -def : SchedAlias; - -// Move instructions. -def : SchedAlias; - -// Divide and multiply instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Miscellaneous instructions. -def : SchedAlias; - -// Addressing modes. -def : SchedAlias; -def : SchedAlias; - -// Load instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Store instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Atomic load and store instructions. -def : SchedAlias; - -// FP data instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP miscellaneous instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP load instructions. -def : SchedAlias; - -// FP store instructions. -def : SchedAlias; - -// ASIMD FP instructions. -def : SchedAlias; -def : SchedAlias; - -// Other miscellaneous instructions. -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Generic fast forwarding. - -// TODO: Add FP register forwarding rules. - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -// TODO: The forwarding for 32 bits actually saves 2 cycles. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Finer scheduling model. - -// Branch instructions -def : InstRW<[M5WriteB1], (instrs Bcc)>; -def : InstRW<[M5WriteAFX], (instrs BL)>; -def : InstRW<[M5WriteBX], (instrs BLR)>; -def : InstRW<[M5WriteC1], (instregex "^CBN?Z[WX]")>; -def : InstRW<[M5WriteAD], (instregex "^TBN?ZW")>; -def : InstRW<[M5WriteAB], (instregex "^TBN?ZX")>; - -// Arithmetic and logical integer instructions. -def : InstRW<[M5WriteA1W], (instregex "^(ADC|SBC)S?Wr$")>; -def : InstRW<[M5WriteA1X], (instregex "^(ADC|SBC)S?Xr$")>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Wrs$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Xrs$")>; -def : InstRW<[M5WriteAUW], (instrs ORRWrs)>; -def : InstRW<[M5WriteAUX], (instrs ORRXrs)>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|SUB)SWrs$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|SUB)SXrs$")>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|SUB)S?Wrx(64)?$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|SUB)S?Xrx(64)?$")>; -def : InstRW<[M5WriteAVW], (instrs ADDWri, ORRWri)>; -def : InstRW<[M5WriteAVX], (instrs ADDXri, ORRXri)>; -def : InstRW<[M5WriteA1W], (instregex "^CCM[NP]W[ir]$")>; -def : InstRW<[M5WriteA1X], (instregex "^CCM[NP]X[ir]$")>; -def : InstRW<[M5WriteA1W], (instrs CSELWr, CSINCWr, CSINVWr, CSNEGWr)>; -def : InstRW<[M5WriteA1X], (instrs CSELXr, CSINCXr, CSINVXr, CSNEGXr)>; - -// Move instructions. -def : InstRW<[M5WriteCOPY], (instrs COPY)>; -def : InstRW<[M5WriteZ0], (instrs ADR, ADRP)>; -def : InstRW<[M5WriteZ0], (instregex "^MOV[NZ][WX]i$")>; - -// Shift instructions. -def : InstRW<[M5WriteA1W], (instrs ASRVWr, LSLVWr, LSRVWr, RORVWr)>; -def : InstRW<[M5WriteA1X], (instrs ASRVXr, LSLVXr, LSRVXr, RORVXr)>; - -// Miscellaneous instructions. -def : InstRW<[M5WriteAYW], (instrs EXTRWrri)>; -def : InstRW<[M5WriteAYX], (instrs EXTRXrri)>; -def : InstRW<[M5WriteA1W], (instrs BFMWri, SBFMWri, UBFMWri)>; -def : InstRW<[M5WriteA1X], (instrs BFMXri, SBFMXri, UBFMXri)>; -def : InstRW<[M5WriteA1W], (instrs CLSWr, CLZWr)>; -def : InstRW<[M5WriteA1X], (instrs CLSXr, CLZXr)>; -def : InstRW<[M5WriteA1W], (instrs RBITWr, REVWr, REV16Wr)>; -def : InstRW<[M5WriteA1X], (instrs RBITXr, REVXr, REV16Xr, REV32Xr)>; - -// Load instructions. -def : InstRW<[M5WriteLD, - WriteLDHi, - WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; -def : InstRW<[M5WriteL5, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>; -def : InstRW<[M5WriteL5, - ReadAdrBase], (instrs PRFMroW)>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instrs PRFMroX)>; - -// Store instructions. -def : InstRW<[M5WriteSB, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>; -def : InstRW<[WriteST, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>; - -// Atomic load and store instructions. -def : InstRW<[M5WriteLGW], (instregex "^CAS(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^CAS(A|AL|L)?X$")>; -def : InstRW<[M5WriteLFW], (instregex "^CASP(A|AL|L)?W$")>; -def : InstRW<[M5WriteLFX], (instregex "^CASP(A|AL|L)?X$")>; -def : InstRW<[M5WriteLGW], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?X$")>; -def : InstRW<[M5WriteLGW], (instregex "^SWP(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^SWP(A|AL|L)?X$")>; - -// FP data instructions. -def : InstRW<[M5WriteNSHF1], (instrs FABSHr, FABSSr,FABSDr)>; -def : InstRW<[M5WriteFADD2], (instregex "^F(ADD|SUB)[HSD]rr")>; -def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.i(16|32|64)")>; -def : InstRW<[M5WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>; -def : InstRW<[M5WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>; -def : InstRW<[M5WriteFDIV5], (instrs FDIVHrr)>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVSrr)>; -def : InstRW<[M5WriteFDIV12], (instrs FDIVDrr)>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>; -def : InstRW<[M5WriteFMAC3], (instregex "^FN?MUL[HSD]rr")>; -def : InstRW<[M5WriteFMAC3], (instrs FMULX16, FMULX32, FMULX64)>; -def : InstRW<[M5WriteFMAC4, - M5ReadFMACM1], (instregex "^FN?M(ADD|SUB)[HSD]rrr")>; -def : InstRW<[M5WriteNALU2], (instrs FNEGHr, FNEGSr, FNEGDr)>; -def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT.+r")>; -def : InstRW<[M5WriteNEONH], (instregex "^FCSEL[HSD]rrr")>; -def : InstRW<[M5WriteFSQR5], (instrs FSQRTHr)>; -def : InstRW<[M5WriteFSQR8], (instrs FSQRTSr)>; -def : InstRW<[M5WriteFSQR12], (instrs FSQRTDr)>; - -// FP miscellaneous instructions. -def : InstRW<[M5WriteFCVT2], (instregex "^FCVT[HSD][HSD]r")>; -def : InstRW<[M5WriteFCVTC], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; -def : InstRW<[M5WriteFCVTB], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; -def : InstRW<[M5WriteNALU1], (instregex "^FMOV[HSD]i")>; -def : InstRW<[M5WriteNALU2], (instregex "^FMOV[HSD]r")>; -def : InstRW<[M5WriteSA], (instregex "^FMOV[WX][HSD]r")>; -def : InstRW<[M5WriteFCVTA], (instregex "^FMOV[HSD][WX]r")>; -def : InstRW<[M5WriteNEONI], (instregex "^FMOVXDHighr")>; -def : InstRW<[M5WriteNEONK], (instregex "^FMOVDXHighr")>; -def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1(f16|i32|i64)")>; -def : InstRW<[M5WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; - -// FP load instructions. -def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; -def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>; -def : InstRW<[WriteVLD, - WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>; -def : InstRW<[M5WriteLE, - ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>; -def : InstRW<[WriteVLD, - ReadAdrBase], (instregex "^LDR[BHSD]roX")>; -def : InstRW<[M5WriteLY, - ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[WriteVLD, - M5WriteLH], (instregex "^LDN?P[SD]i")>; -def : InstRW<[M5WriteLA, - M5WriteLH], (instregex "^LDN?PQi")>; -def : InstRW<[M5WriteLB, - M5WriteLH, - WriteAdr], (instregex "^LDP[SD](post|pre)")>; -def : InstRW<[M5WriteLC, - M5WriteLH, - WriteAdr], (instregex "^LDPQ(post|pre)")>; - -// FP store instructions. -def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>; -def : InstRW<[WriteVST, - ReadAdrBase], (instregex "^STR[BHSD]ro[WX]")>; -def : InstRW<[M5WriteVSTK, - ReadAdrBase], (instregex "^STRQroW")>; -def : InstRW<[M5WriteVSTY, - ReadAdrBase], (instregex "^STRQroX")>; -def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>; -def : InstRW<[M5WriteVSTH], (instregex "^STN?PQi")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STP[SD](post|pre)")>; -def : InstRW<[M5WriteVSTJ, - WriteAdr], (instregex "^STPQ(post|pre)")>; - -// ASIMD instructions. -def : InstRW<[M5WriteNHAD1], (instregex "^[SU]ABDL?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ABAL?v")>; -def : InstRW<[M5WriteNMSC1], (instregex "^ABSv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(ADD|NEG|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^(SU|US)QADDv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]RHADDv")>; -def : InstRW<[M5WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>; -def : InstRW<[M5WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; -def : InstRW<[M5WriteNALU2], (instregex "^CMTSTv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; -def : InstRW<[M5WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; -def : InstRW<[M5WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M5WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M5WriteNMUL3], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; -def : InstRW<[M5WriteNDOT2], (instregex "^[SU]DOT(lane)?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ADALPv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^SHL[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^S[LR]I[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^[SU]SH[LR][dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^[SU]?SHLLv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>; - -// ASIMD FP instructions. -def : InstRW<[M5WriteNSHF2], (instregex "^FABSv.f(16|32|64)")>; -def : InstRW<[M5WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(16|32|64)")>; -def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.f(16|32|64)")>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; -def : InstRW<[M5WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>; -def : InstRW<[M5WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>; -def : InstRW<[M5WriteFCVT2], (instregex "^[SU]CVTFv.[fi](16|32|64)")>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVv4f16)>; -def : InstRW<[M5WriteNEONV], (instrs FDIVv8f16)>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVv2f32)>; -def : InstRW<[M5WriteNEONV], (instrs FDIVv4f32)>; -def : InstRW<[M5WriteNEONW], (instrs FDIVv2f64)>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; -def : InstRW<[M5WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; -def : InstRW<[M5WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; -def : InstRW<[M5WriteFMAC3], (instregex "^FMULX?v.[fi](16|32|64)")>; -def : InstRW<[M5WriteFMAC4, - M5ReadFMACM1], (instregex "^FML[AS]v.[fi](16|32|64)")>; -def : InstRW<[M5WriteNALU2], (instregex "^FNEGv.f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; -def : InstRW<[M5WriteFSQR7], (instrs FSQRTv4f16)>; -def : InstRW<[M5WriteNEONU], (instrs FSQRTv8f16)>; -def : InstRW<[M5WriteFSQR8], (instrs FSQRTv2f32)>; -def : InstRW<[M5WriteNEONX], (instrs FSQRTv4f32)>; -def : InstRW<[M5WriteNEONY], (instrs FSQRTv2f64)>; - -// ASIMD miscellaneous instructions. -def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL|BSP)v")>; -def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>; -def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M5WriteNSHF2], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[M5WriteNSHF2], (instregex "^DUPv.+lane")>; -def : InstRW<[M5WriteNSHF2], (instregex "^EXTv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^XTNv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>; -def : InstRW<[M5WriteNEONB], (instregex "^INSv.+gpr")>; -def : InstRW<[M5WriteNSHF2], (instregex "^INSv.+lane")>; -def : InstRW<[M5WriteMOVI], (instregex "^(MOV|MVN)I")>; -def : InstRW<[M5WriteNALU1], (instregex "^FMOVv.f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(16|32|64)")>; -def : InstRW<[M5WriteNSHF2], (instregex "^REV(16|32|64)v")>; -def : InstRW<[M5WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; -def : InstRW<[M5WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; -def : InstRW<[M5WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>; -def : InstRW<[M5WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>; -def : InstRW<[M5WriteNEONP], (instregex "^[SU]MOVv")>; -def : InstRW<[M5WriteNSHF2], (instregex "^(TRN|UZP|ZIP)[12]v")>; - -// ASIMD load instructions. -def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVLD, - M5WriteA1X, - WriteAdr], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDA], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDA, - M5WriteA1X, - WriteAdr], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDB], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDB, - M5WriteA1X, - WriteAdr], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDC], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDC, - M5WriteA1X, - WriteAdr], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDD], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[M5WriteVLDD, - M5WriteA1X, - WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVLD, - M5WriteA1X, - WriteAdr], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDF], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDF, - M5WriteA1X, - WriteAdr], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDG], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[M5WriteVLDG, - M5WriteA1X, - WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVLDA], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDA, - M5WriteA1X, - WriteAdr], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDI], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDI, - M5WriteA1X, - WriteAdr], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDJ], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[M5WriteVLDJ, - M5WriteA1X, - WriteAdr], (instregex "LD3i(8|16|32)_POST$")>; -def : InstRW<[M5WriteVLDL], (instregex "LD3i64$")>; -def : InstRW<[M5WriteVLDL, - M5WriteA1X, - WriteAdr], (instregex "LD3i64_POST$")>; -def : InstRW<[M5WriteVLDB], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDB, - M5WriteA1X], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDN], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDN, - M5WriteA1X, - WriteAdr], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDK], (instregex "LD4i(8|16|32)$")>; -def : InstRW<[M5WriteVLDK, - M5WriteA1X, - WriteAdr], (instregex "LD4i(8|16|32)_POST$")>; -def : InstRW<[M5WriteVLDM], (instregex "LD4i64$")>; -def : InstRW<[M5WriteVLDM, - M5WriteA1X, - WriteAdr], (instregex "LD4i64_POST$")>; -def : InstRW<[M5WriteVLDC], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDC, - M5WriteA1X, - WriteAdr], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; - -// ASIMD store instructions. -def : InstRW<[WriteVST], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVST, - M5WriteA1X, - WriteAdr], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; - -def : InstRW<[M5WriteVSTB], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTB, - M5WriteA1X, - WriteAdr], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVSTC], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTC, - M5WriteA1X, - WriteAdr], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[WriteVST, - M5WriteA1X, - WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTD, - M5WriteA1X, - WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTE, - M5WriteA1X, - WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTD], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTD, - M5WriteA1X, - WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTF, - M5WriteA1X, - WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTG, - M5WriteA1X, - WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTL, - M5WriteA1X, - WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTI, - M5WriteA1X, - WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; - -// Cryptography instructions. -def : InstRW<[M5WriteNCRY2], (instregex "^AES[DE]")>; -def : InstRW<[M5WriteNCRY2, - M5ReadAESM2], (instregex "^AESI?MC")>; -def : InstRW<[M5WriteNCRY2A], (instregex "^PMULv")>; -def : InstRW<[M5WriteNCRY1A], (instregex "^PMULLv(1|8)i")>; -def : InstRW<[M5WriteNCRY3A], (instregex "^PMULLv(2|16)i")>; -def : InstRW<[M5WriteNCRY2A], (instregex "^SHA1(H|SU[01])")>; -def : InstRW<[M5WriteNCRY5A], (instregex "^SHA1[CMP]")>; -def : InstRW<[M5WriteNCRY2A], (instrs SHA256SU0rr)>; -def : InstRW<[M5WriteNCRY5A], (instrs SHA256SU1rrr)>; -def : InstRW<[M5WriteNCRY5A], (instregex "^SHA256H2?")>; - -// CRC instructions. -def : InstRW<[M5WriteF2, - M5ReadFM1], (instregex "^CRC32C?[BHWX]")>; - -} // SchedModel = ExynosM5Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td b/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td deleted file mode 100644 index 7c9b0afdd1..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td +++ /dev/null @@ -1,121 +0,0 @@ -//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for Qualcomm Falkor to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Define the SchedMachineModel and provide basic properties for coarse grained -// instruction cost model. - -def FalkorModel : SchedMachineModel { - let IssueWidth = 8; // 8 uops are dispatched per cycle. - let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer. - let LoopMicroOpBufferSize = 16; - let LoadLatency = 3; // Optimistic load latency. - let MispredictPenalty = 11; // Minimum branch misprediction penalty. - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Falkor. - -let SchedModel = FalkorModel in { - - def FalkorUnitB : ProcResource<1>; // Branch - def FalkorUnitLD : ProcResource<1>; // Load pipe - def FalkorUnitSD : ProcResource<1>; // Store data - def FalkorUnitST : ProcResource<1>; // Store pipe - def FalkorUnitX : ProcResource<1>; // Complex arithmetic - def FalkorUnitY : ProcResource<1>; // Simple arithmetic - def FalkorUnitZ : ProcResource<1>; // Simple arithmetic - - def FalkorUnitVSD : ProcResource<1>; // Vector store data - def FalkorUnitVX : ProcResource<1>; // Vector X-pipe - def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe - - def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector - def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar - - // Define the resource groups. - def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>; - def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>; - def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ, - FalkorUnitB]>; - def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>; - def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>; - -} - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latency for -// Falkor. - -let SchedModel = FalkorModel in { - -// These WriteRes entries are not used in the Falkor sched model. -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Unsupported = 1; } - -// These ReadAdvance entries are not used in the Falkor sched model. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// Detailed Refinements -// ----------------------------------------------------------------------------- -include "AArch64SchedFalkorDetails.td" - -} diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td b/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td deleted file mode 100644 index a3a038f869..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td +++ /dev/null @@ -1,1291 +0,0 @@ -//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the uop and latency details for the machine model for the -// Qualcomm Falkor subtarget. -// -//===----------------------------------------------------------------------===// - -// Contains all of the Falkor specific SchedWriteRes types. The approach -// below is to define a generic SchedWriteRes for every combination of -// latency and microOps. The naming conventions is to use a prefix, one field -// for latency, and one or more microOp count/type designators. -// Prefix: FalkorWr -// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) -// Latency: #cyc -// -// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued -// down one Z pipe, six SD pipes, four VX pipes and the total latency is -// six cycles. -// -// Contains all of the Falkor specific ReadAdvance types for forwarding logic. -// -// Contains all of the Falkor specific WriteVariant types for immediate zero -// and LSLFast. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Define 0 micro-op types -def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> { - let Latency = 2; - let NumMicroOps = 0; -} -def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> { - let Latency = 2; - let NumMicroOps = 0; -} -def FalkorWr_none_3cyc : SchedWriteRes<[]> { - let Latency = 3; - let NumMicroOps = 0; -} -def FalkorWr_none_4cyc : SchedWriteRes<[]> { - let Latency = 4; - let NumMicroOps = 0; -} - -//===----------------------------------------------------------------------===// -// Define 1 micro-op types - -def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } -def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } -def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } -def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } -def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } -def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } -def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } -def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } -def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } -def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } -def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } -def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } -def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } -def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } - -def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } -def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } -def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } -def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } -def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } -def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } -def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } -def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } -def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } -def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } - -def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } -def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } -def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } - -def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } -def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } -def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } -def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Define 2 micro-op types - -def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 0; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 1; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 2; -} -def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 2; -} -def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 12; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 14; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 21; - let NumMicroOps = 2; -} - -def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { - let Latency = 0; - let NumMicroOps = 2; -} - -def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [2, 8]; -} - -def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [2, 11]; -} - -def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 2; -} - -def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 2; -} - -//===----------------------------------------------------------------------===// -// Define 3 micro-op types - -def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, - FalkorUnitLD]> { - let Latency = 0; - let NumMicroOps = 3; -} - -def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, - FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 3; -} -def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 3; -} -//===----------------------------------------------------------------------===// -// Define 4 micro-op types - -def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 14; - let NumMicroOps = 4; -} - -def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 20; - let NumMicroOps = 4; -} - -def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 21; - let NumMicroOps = 4; -} - -def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 24; - let NumMicroOps = 4; -} - -def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 4; -} - -def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} - -def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, - FalkorUnitSD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 4; -} - -//===----------------------------------------------------------------------===// -// Define 5 micro-op types - -def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 7; - let NumMicroOps = 5; -} -def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, - FalkorUnitVSD, FalkorUnitST, - FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 5; -} -def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, - FalkorUnitVSD, FalkorUnitST, - FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 5; -} -//===----------------------------------------------------------------------===// -// Define 6 micro-op types - -def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 6; -} - -def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, - FalkorUnitVSD, FalkorUnitXYZ, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 6; -} - -def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, - FalkorUnitVSD, FalkorUnitVXVY, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 6; -} - -def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 6; -} - -//===----------------------------------------------------------------------===// -// Define 8 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 8; -} - -def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 8; -} - -//===----------------------------------------------------------------------===// -// Define 9 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitLD, - FalkorUnitLD, FalkorUnitXYZ, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitXYZ, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -//===----------------------------------------------------------------------===// -// Define 10 micro-op types - -def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, - FalkorUnitVSD, FalkorUnitVXVY, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 10; -} - -//===----------------------------------------------------------------------===// -// Define 12 micro-op types - -def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, - FalkorUnitVSD, FalkorUnitVXVY, - FalkorUnitST, FalkorUnitVSD, - FalkorUnitVXVY, FalkorUnitST, - FalkorUnitVSD, FalkorUnitVXVY, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 12; -} - -// Forwarding logic is modeled for multiply add/accumulate and -// load/store base register increment. -// ----------------------------------------------------------------------------- -def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; -def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; -def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; -def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; -def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; - -def FalkorReadIncLd : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>; -def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>; - -// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast -// ----------------------------------------------------------------------------- -def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() && - MI->getOperand(1).getImm() == 0}]>; -def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || - - MI->getOperand(1).getReg() == AArch64::XZR}]>; -def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; - -def FalkorWr_FMOV : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_MOVZ : SchedWriteVariant<[ - SchedVar, - SchedVar]>; // imm fwd - - -def FalkorWr_ADDSUBsx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDRro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDRSro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_ORRi : SchedWriteVariant<[ - SchedVar, // imm fwd - SchedVar]>; - -def FalkorWr_PRFMro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_STRVro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_STRQro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_STRro : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -//===----------------------------------------------------------------------===// -// Specialize the coarse model by associating instruction groups with the -// subtarget-defined types. As the modeled is refined, this will override most -// of the earlier mappings. - -// Miscellaneous -// ----------------------------------------------------------------------------- - -// FIXME: This could be better modeled by looking at the regclasses of the operands. -def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; - -// SIMD Floating-point Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; - -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; - -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; - -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], - (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], - (instrs FMULX32)>; - -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], - (instregex "^(FMUL|FMULX)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], - (instrs FMULX64)>; - -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; - -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; - -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; -def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; - -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; - -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; - -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], - (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; - -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], - (instregex "^(FMUL|FMULX)v2i64_indexed$")>; - -def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; -def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; - -def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; -def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; -def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; -def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; - -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], - (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], - (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], - (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], - (instregex "^FML(A|S)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], - (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], - (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; - -// SIMD Integer Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>; - -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; - -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>; - -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], - (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], - (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], - (instregex "^SQDMULL(i16|i32)$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], - (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; - -def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; - -def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>; - -def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>; - -def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>; - -def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>; -def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>; - -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11 -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>; - -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11 -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; - -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; - -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], - (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], - (instregex "^SQDMULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], - (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; - -def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; - -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>; - -def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>; - -def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>; - -def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; - -def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; - -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], - (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], - (instregex "^SQD(MLAL|MLSL)v[248].*$")>; - -// SIMD Load Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instrs LD2i64_POST)>; - -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD1i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], - (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], - (instregex "^LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], - (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], - (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], - (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], - (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], - (instrs LD3i64_POST)>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], - (instrs LD4i64_POST)>; - -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD2i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], - (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], - (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], - (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], - (instrs LD3Threev2d_POST)>; -def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], - (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD3i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], - (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], - (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], - (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], - (instrs LD4Fourv2d_POST)>; -def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], - (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD4i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], - (instregex "^LD3Threev(8b|4h|2s)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], - (instregex "^LD3Threev(8b|4h|2s)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], - (instregex "^LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], - (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD3Threev(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD4Fourv(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD3Threev(16b|8h|4s)_POST$")>; - -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd], - (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; - -// Arithmetic and Logical Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; -def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; -def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; - -// SIMD Miscellaneous Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; -def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd -def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; - -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; - -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], - (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; - -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], - (instrs FRECPS64, FRSQRTS64)>; - -def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], - (instregex "^INSv(i32|i64)(gpr|lane)$")>; -def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; -def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd -def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; - -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; - -def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; - -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], - (instrs FRECPSv4f32, FRSQRTSv4f32)>; - -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], - (instrs FRECPSv2f64, FRSQRTSv2f64)>; - -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; - -def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>; -def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>; - -def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>; -def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; - -// SIMD Store Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STR(Q|D|S|H|B)ui$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt], - (instregex "^STR(D|S|H|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STPQi$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STPQ(post|pre)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STP(D|S)(i)$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STP(D|S)(post|pre)$")>; -def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt], - (instregex "^STRQro(W|X)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STUR(Q|D|S|B|H)i$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instrs STNPDi, STNPSi)>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instrs STNPQi)>; - -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>; - -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3(i8|i16|i32|i64)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4(i8|i16|i32|i64)$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3(i8|i16|i32|i64)_POST$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4(i8|i16|i32|i64)_POST$")>; - -def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3Three(v8b|v4h|v2s)$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>; - -def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], - (instrs ST3Threev2d)>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], - (instrs ST3Threev2d_POST)>; - -def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4Four(v8b|v4h|v2s)$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>; - -def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], - (instrs ST4Fourv2d)>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], - (instrs ST4Fourv2d_POST)>; - -def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3Three(v16b|v8h|v4s)$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; - -def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4Four(v16b|v8h|v4s)$")>; -// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; - -// Branch Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; -def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; -def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; -def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; -def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; -def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; - -// Cryptography Extensions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; -def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>; -def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>; -def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; - -// FP Load Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDUR(Q|D|S|H|B)i$")>; -def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], - (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instrs LDNPQi)>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instrs LDPQi)>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "LDNP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "LDP(D|S)i$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "LDP(D|S)(pre|post)$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "^LDPQ(pre|post)$")>; - -// FP Data Processing Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; - -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; - -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; - -def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], - (instregex "^F(N)?MULSrr$")>; - -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], - (instregex "^F(N)?MULDrr$")>; - -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; -def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; -def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; -def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], - (instregex "^F(N)?M(ADD|SUB)Srrr$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], - (instregex "^F(N)?M(ADD|SUB)Drrr$")>; - -// FP Miscellaneous Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; -def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; -def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd -// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr -def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd - -def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; - -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; - -// Load Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; -def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "^LDNP(W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "^LDP(W|X)i$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "^LDP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDR(BB|HH|W|X)ui$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], - (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDR(W|X)l$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^LDUR(BB|HH|W|X)i$")>; -def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], - (instrs LDPSWi)>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], - (instregex "^LDPSW(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], - (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; -def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd], - (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; -def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd], - (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], - (instrs LDRSWl)>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], - (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], - (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; - -// Miscellaneous Data-Processing Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; -def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>; -def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>; -def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; - -// Divide and Multiply Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], - (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; -def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], - (instregex "^M(ADD|SUB)Wrrr$")>; - -def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; -def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], - (instregex "^M(ADD|SUB)Xrrr$")>; - -def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; - -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], - (instregex "^(S|U)MULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], - (instregex "^(S|U)(MLAL|MLSL)v.*$")>; - -// Move and Shift Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; -def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd -def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd -def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd -def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; -def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) -def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], - (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; -def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], - (instrs LOADgot)>; - -// Other Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>; -def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>; -def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; -def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; - -def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], - (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], - (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; - -def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; - -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instrs STNPWi, STNPXi)>; -def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; - -def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STXP(W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STXR(B|H|W|X)$")>; - -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STLXP(W|X)$")>; -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STLXR(B|H|W|X)$")>; - -// Store Instructions -// ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STP(W|X)i$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], - (instregex "^STP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STR(BB|HH|W|X)ui$")>; -def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt], - (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], - (instregex "^STUR(BB|HH|W|X)i$")>; - diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td b/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td deleted file mode 100644 index cc568a2f2f..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td +++ /dev/null @@ -1,140 +0,0 @@ -//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for Qualcomm Kryo to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The issue width is set to five, matching the five issue queues for expanded -// uops. Now, the latency spreadsheet has information based on fragmented uops, -// but these do not actually take up an issue queue. - -def KryoModel : SchedMachineModel { - let IssueWidth = 5; // 5-wide issue for expanded uops - let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer - let LoadLatency = 4; // Optimistic load latency - let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch - - // Enable partial & runtime unrolling. The magic number is chosen based on - // experiments and benchmarking data. - let LoopMicroOpBufferSize = 16; - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Kryo. - -let SchedModel = KryoModel in { - def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops - def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops - def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops - def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops - def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops - KryoUnitXB]>; - def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops - KryoUnitYB]>; - def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops - KryoUnitXB, - KryoUnitYA, - KryoUnitYB]>; - def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops - def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops - def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops - KryoUnitLSB]>; -} - -let SchedModel = KryoModel in { - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latency for -// Kryo. - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes - { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 -def : WriteRes - { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes - { let Latency = 6; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 4; } - -def : WriteRes { let Unsupported = 1; } - -// No forwarding logic is modelled yet. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - - -//===----------------------------------------------------------------------===// -// Specialize the coarse model by associating instruction groups with the -// subtarget-defined types. As the modeled is refined, this will override most -// of the above SchedWriteRes and SchedAlias mappings. - -// Miscellaneous -// ----------------------------------------------------------------------------- - -def : InstRW<[WriteI], (instrs COPY)>; - - -// Detailed Refinedments -// ----------------------------------------------------------------------------- -include "AArch64SchedKryoDetails.td" - - -} // SchedModel = KryoModel diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td b/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td deleted file mode 100644 index bc5ad0f8be..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td +++ /dev/null @@ -1,2377 +0,0 @@ -//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the uop and latency details for the machine model for the -// Qualcomm Kryo subtarget. -// -//===----------------------------------------------------------------------===// - -def KryoWrite_3cyc_X_noRSV_138ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_noRSV_138ln], - (instregex "(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)")>; - -def KryoWrite_3cyc_X_X_139ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_X_139ln], - (instregex "(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift")>; - -def KryoWrite_4cyc_XY_XY_noRSV_172ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_172ln], - (instregex "(S|U)ABA(v8i8|v4i16|v2i32)")>; -def KryoWrite_4cyc_XY_XY_XY_XY_178ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { - let Latency = 4; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_4cyc_XY_XY_XY_XY_178ln], - (instregex "(S|U)ABA(v16i8|v8i16|v4i32)")>; -def KryoWrite_3cyc_XY_XY_XY_XY_177ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_XY_XY_177ln], - (instregex "(S|U)ABALv.*")>; -def KryoWrite_3cyc_XY_XY_166ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_166ln], - (instregex "(S|U)(ABD|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_3cyc_XY_noRSV_159ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_159ln], - (instregex "(S|U)(ABD|RHADD)(v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_XY_165ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_165ln], - (instregex "(S|U)ABDLv.*")>; -def KryoWrite_3cyc_X_noRSV_154ln : - SchedWriteRes<[KryoUnitX]> { -let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_noRSV_154ln], - (instregex "(S|U)ADALP(v8i8|v4i16|v2i32)_v.*")>; -def KryoWrite_3cyc_X_X_155ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_X_155ln], - (instregex "(S|U)ADALP(v16i8|v8i16|v4i32)_v.*")>; -def KryoWrite_2cyc_XY_XY_151ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_151ln], - (instregex "(S|U)(ADD|SUB)Lv.*")>; -def KryoWrite_2cyc_XY_noRSV_148ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_148ln], - (instregex "((S|U)ADDLP|ABS)(v2i32|v4i16|v8i8)(_v.*)?")>; -def KryoWrite_2cyc_XY_XY_150ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_150ln], - (instregex "((S|U)ADDLP|ABS)(v2i64|v4i32|v8i16|v16i8)(_v.*)?")>; -def KryoWrite_3cyc_XY_XY_XY_noRSV_179ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_XY_noRSV_179ln], - (instrs SADDLVv4i32v, UADDLVv4i32v)>; -def KryoWrite_5cyc_XY_XY_XY_noRSV_180ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { - let Latency = 5; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_5cyc_XY_XY_XY_noRSV_180ln], - (instrs SADDLVv8i16v, UADDLVv8i16v)>; -def KryoWrite_6cyc_XY_XY_X_noRSV_181ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_6cyc_XY_XY_X_noRSV_181ln], - (instrs SADDLVv16i8v, UADDLVv16i8v)>; -def KryoWrite_3cyc_XY_noRSV_158ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_158ln], - (instrs SADDLVv4i16v, UADDLVv4i16v, ADDVv4i16v)>; -def KryoWrite_4cyc_X_noRSV_169ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_169ln], - (instrs SADDLVv8i8v, UADDLVv8i8v, ADDVv8i8v)>; -def KryoWrite_2cyc_XY_XY_XY_XY_176ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_XY_XY_176ln], - (instregex "(S|U)(ADDW|SUBW)v.*")>; -def KryoWrite_4cyc_X_noRSV_40ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_40ln], - (instregex "(S|U)CVTFS(W|X)(D|S)ri")>; -def KryoWrite_4cyc_X_noRSV_97ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_97ln], - (instregex "(S|U)CVTFU(W|X)(D|S)ri")>; -def KryoWrite_4cyc_X_noRSV_110ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_110ln], - (instregex "(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; -def KryoWrite_4cyc_X_X_114ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_114ln], - (instregex "(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; -def KryoWrite_1cyc_XA_Y_98ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XA_Y_98ln], - (instregex "(S|U)DIV(_Int)?(W|X)r")>; -def KryoWrite_2cyc_XY_XY_152ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_152ln], - (instregex "(S|U)H(ADD|SUB)(v16i8|v8i16|v4i32)")>; -def KryoWrite_2cyc_XY_noRSV_149ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_149ln], - (instregex "((S|U)H(ADD|SUB)|ADDP)(v8i8|v4i16|v2i32)")>; -def KryoWrite_4cyc_X_70ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_X_70ln], - (instregex "(S|U)(MADDL|MSUBL)rrr")>; -def KryoWrite_4cyc_X_X_191ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_191ln], - (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; -def KryoWrite_1cyc_XY_195ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_195ln], - (instregex "(S|U)MOVv.*")>; -def KryoWrite_5cyc_X_71ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_5cyc_X_71ln], - (instrs SMULHrr, UMULHrr)>; -def KryoWrite_3cyc_XY_noRSV_186ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_186ln], - (instregex "^(S|U)QADD(v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_XY_187ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_187ln], - (instregex "^(S|U)QADD(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_3cyc_XY_noRSV_69ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_69ln], - (instregex "(S|U|SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64)")>; -def KryoWrite_3cyc_XY_noRSV_248ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_248ln], - (instregex "(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; -def KryoWrite_3cyc_XY_XY_250ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_250ln], - (instregex "(S|U)(QSHLU?|RSHR)(v16i8|v8i16|v4i32|v2i64)_shift$")>; -def KryoWrite_3cyc_XY_noRSV_246ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_246ln], - (instregex "(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32)$")>; -def KryoWrite_3cyc_XY_XY_251ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_251ln], - (instregex "(S|U)(QSHL|RSHL|QRSHL)(v16i8|v8i16|v4i32|v2i64)$")>; -def KryoWrite_6cyc_XY_X_238ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_XY_X_238ln], - (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v16i8|v8i16|v4i32)_shift$")>; -def KryoWrite_3cyc_XY_noRSV_249ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_249ln], - (instregex "((S|U)QR?SHRN|SQR?SHRUN)(s|h|b)?")>; -def KryoWrite_6cyc_XY_X_noRSV_252ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_252ln], - (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v8i8|v4i16|v2i32)_shift?")>; -def KryoWrite_3cyc_XY_noRSV_161ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_161ln], - (instregex "(S|U)QSUB(v8i8|v4i16|v2i32|v1i64|v1i32|v1i16|v1i8)")>; -def KryoWrite_3cyc_XY_noRSV_163ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_163ln], - (instregex "(S|U)QXTU?N(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_noRSV_162ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_162ln], - (instregex "(S|U)QXTU?N(v1i8|v1i16|v1i32)")>; -def KryoWrite_3cyc_XY_noRSV_247ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_247ln], - (instregex "(S|U)RSHR(d|(v8i8|v4i16|v2i32)_shift)$")>; -def KryoWrite_2cyc_XY_noRSV_239ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_239ln], - (instregex "(S|U)SHL(d|v8i8|v4i16|v2i32|v1i64)$")>; -def KryoWrite_2cyc_XY_XY_243ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_243ln], - (instregex "(S|U)SHL(v16i8|v8i16|v4i32|v2i64)$")>; -def KryoWrite_2cyc_XY_XY_241ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_241ln], - (instregex "(S|U)?SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; -def KryoWrite_2cyc_XY_noRSV_240ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_240ln], - (instregex "((S|U)SHR|SHL)(d|(v8i8|v4i16|v2i32)_shift)$")>; -def KryoWrite_2cyc_XY_XY_242ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_242ln], - (instregex "((S|U)SHR|SHL)(v16i8|v8i16|v4i32|v2i64)_shift$")>; -def KryoWrite_2cyc_XY_XY_183ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_183ln], - (instregex "(S|U)(MAX|MIN)P?(v16i8|v8i16|v4i32)")>; -def KryoWrite_2cyc_XY_noRSV_182ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_182ln], - (instregex "(S|U)(MAX|MIN)P?(v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_noRSV_184ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_184ln], - (instregex "(S|U)(MAX|MIN)V(v4i16v|v8i8v|v4i32)")>; -def KryoWrite_4cyc_X_noRSV_185ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_185ln], - (instregex "(S|U)(MAX|MIN)V(v16i8v|v8i16v)")>; -def KryoWrite_2cyc_XY_noRSV_67ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_67ln], - (instrs ABSv1i64)>; -def KryoWrite_1cyc_XY_63ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_63ln, ReadI, ReadI], - (instregex "ADC.*")>; -def KryoWrite_1cyc_XY_63_1ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_63_1ln], - (instregex "ADR.*")>; -def KryoWrite_1cyc_XY_62ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_62ln, ReadI], - (instregex "ADDS?(W|X)ri")>; -def KryoWrite_2cyc_XY_XY_64ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_64ln, ReadI, ReadI], - (instregex "ADDS?(W|X)r(r|s|x)(64)?")>; -def KryoWrite_1cyc_XY_noRSV_65ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_65ln], - (instrs ADDv1i64)>; -def KryoWrite_1cyc_XY_noRSV_144ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_144ln], - (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; -def KryoWrite_1cyc_XY_XY_146ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_146ln], - (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_4cyc_XY_X_noRSV_171ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_XY_X_noRSV_171ln], - (instregex "(ADD|SUB)HNv.*")>; -def KryoWrite_1cyc_XY_noRSV_66ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_66ln], - (instrs ADDPv2i64p)>; -def KryoWrite_2cyc_XY_XY_153ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_153ln], - (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_3cyc_XY_XY_noRSV_170ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_noRSV_170ln], - (instrs ADDVv4i32v)>; -def KryoWrite_4cyc_XY_XY_noRSV_173ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_173ln], - (instrs ADDVv8i16v)>; -def KryoWrite_5cyc_XY_X_noRSV_174ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_5cyc_XY_X_noRSV_174ln], - (instrs ADDVv16i8v)>; -def KryoWrite_3cyc_XY_XY_X_X_27ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_27ln], - (instrs AESDrr, AESErr)>; -def KryoWrite_2cyc_X_X_22ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_X_X_22ln], - (instrs AESIMCrr, AESMCrr)>; -def KryoWrite_1cyc_XY_noRSV_76ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_76ln], - (instregex "((AND|ORN|EOR|EON)S?(Wr[rsi]|v8i8|v4i16|v2i32)|(ORR|BIC)S?(Wr[rs]|v8i8|v4i16|v2i32))")>; -def KryoWrite_1cyc_XY_XY_79ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_79ln], - (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; -def KryoWrite_1cyc_X_72ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_X_72ln], - (instregex "(S|U)?BFM.*")>; -def KryoWrite_1cyc_XY_noRSV_77ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_77ln], - (instregex "(BIC|ORR)S?Wri")>; -def KryoWrite_1cyc_XY_XY_78ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_78ln], - (instregex "(BIC|ORR)S?Xri")>; -def KryoWrite_1cyc_X_noRSV_74ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln], - (instrs BIFv8i8, BITv8i8, BSLv8i8, BSPv8i8)>; -def KryoWrite_1cyc_X_X_75ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_75ln], - (instrs BIFv16i8, BITv16i8, BSLv16i8, BSPv16i8)>; -def KryoWrite_0cyc_noRSV_11ln : - SchedWriteRes<[]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_noRSV_11ln], - (instrs BRK, DCPS1, DCPS2, DCPS3, HLT, HVC, ISB, HINT, SMC, SVC)>; -def KryoWrite_0cyc_XY_16ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_XY_16ln, ReadI], - (instregex "(CCMN|CCMP)(W|X)i")>; -def KryoWrite_0cyc_XY_16_1ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_XY_16_1ln, ReadI, ReadI], - (instregex "(CCMN|CCMP)(W|X)r")>; -def KryoWrite_2cyc_XY_3ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_2cyc_XY_3ln, ReadI], - (instregex "(CLS|CLZ)(W|X)r")>; -def KryoWrite_2cyc_XY_noRSV_7ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_7ln], - (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; -def KryoWrite_2cyc_XY_XY_8ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_8ln], - (instregex "(CLS|CLZ|CNT)(v2i32|v4i16|v8i8)")>; -def KryoWrite_2cyc_XY_noRSV_80ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_80ln], - (instregex "CM(EQ|GE|HS|GT|HI|TST)(v8i8|v4i16|v2i32|v1i64)$")>; -def KryoWrite_2cyc_XY_XY_83ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_83ln], - (instregex "CM(EQ|GE|HS|GT|HI|TST)(v16i8|v8i16|v4i32|v2i64)$")>; -def KryoWrite_2cyc_XY_noRSV_81ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_81ln], - (instregex "CM(EQ|LE|GE|GT|LT)(v8i8|v4i16|v2i32|v1i64)rz$")>; -def KryoWrite_2cyc_XY_XY_82ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_82ln], - (instregex "CM(EQ|LE|GE|GT|LT)(v16i8|v8i16|v4i32|v2i64)rz$")>; -def KryoWrite_3cyc_XY_4ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_XY_4ln, ReadI, ReadISReg], - (instregex "CRC32.*")>; -def KryoWrite_1cyc_XY_20ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_20ln, ReadI, ReadI], - (instregex "CSEL(W|X)r")>; -def KryoWrite_1cyc_X_17ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_X_17ln, ReadI, ReadI], - (instregex "(CSINC|CSNEG)(W|X)r")>; -def KryoWrite_1cyc_XY_18ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_18ln, ReadI, ReadI], - (instregex "(CSINV)(W|X)r")>; -def KryoWrite_3cyc_LS_X_13ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_X_13ln], - (instrs DRPS)>; -def KryoWrite_0cyc_LS_10ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_LS_10ln], - (instrs DSB, DMB, CLREX)>; -def KryoWrite_1cyc_X_noRSV_196ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_noRSV_196ln], - (instregex "DUP(v8i8|v4i16|v2i32)(gpr|lane)")>; -def KryoWrite_1cyc_X_X_197ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_197ln], - (instregex "DUP(v16i8|v8i16|v4i32|v2i64)(gpr|lane)")>; -def KryoWrite_3cyc_LS_LS_X_15ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_X_15ln], - (instrs ERET)>; -def KryoWrite_1cyc_X_noRSV_207ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_noRSV_207ln], - (instrs EXTv8i8)>; -def KryoWrite_1cyc_X_X_212ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_212ln], - (instrs EXTv16i8)>; -def KryoWrite_2cyc_XY_X_136ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_X_136ln], - (instrs EXTRWrri, EXTRXrri)>; -def KryoWrite_2cyc_XY_noRSV_35ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_35ln], - (instregex "F(MAX|MIN)(NM)?P?(D|S)rr")>; -def KryoWrite_2cyc_XY_XY_106ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_106ln], - (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2i64p|v2f64|v4f32)")>; -def KryoWrite_2cyc_XY_noRSV_104ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_104ln], - (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f32|v2i32p)")>; -def KryoWrite_3cyc_XY_noRSV_107ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_107ln], - (instregex "F(MAX|MIN)(NM)?Vv4i32v")>; -def KryoWrite_3cyc_XY_noRSV_101ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_101ln], - (instregex "FABD(32|64|v2f32)")>; -def KryoWrite_3cyc_XY_XY_103ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_103ln], - (instregex "(FABD|FADD|FSUB|FADDP)(v4f32|v2f64)")>; -def KryoWrite_1cyc_XY_noRSV_48ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_48ln], - (instregex "F(ABS|NEG)(D|S)r")>; -def KryoWrite_1cyc_XY_noRSV_124ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_124ln], - (instregex "F(ABS|NEG)v2f32")>; -def KryoWrite_1cyc_XY_XY_125ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_125ln], - (instregex "F(ABS|NEG)(v2f64|v4f32)")>; -def KryoWrite_2cyc_XY_noRSV_33ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_33ln], - (instregex "(FAC(GE|GT)|FCM(EQ|GE|GT))(32|64)")>; -def KryoWrite_3cyc_XY_noRSV_30ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_30ln], - (instregex "(FADD|FSUB)(D|S)rr")>; -def KryoWrite_3cyc_XY_noRSV_100ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_100ln], - (instregex "(FADD|FSUB|FADDP)v2f32")>; -def KryoWrite_3cyc_XY_noRSV_29ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_29ln], - (instregex "FADDP(v2i32p|v2i64p)")>; -def KryoWrite_0cyc_XY_31ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_XY_31ln], - (instregex "FCCMPE?(D|S)rr")>; -def KryoWrite_2cyc_XY_noRSV_34ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_34ln], - (instregex "FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64)rz")>; -def KryoWrite_2cyc_XY_XY_36ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_36ln], - (instregex "FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz")>; -def KryoWrite_2cyc_XY_noRSV_105ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_105ln], - (instregex "FCM(EQ|LE|GE|GT|LT)v2i32rz")>; -def KryoWrite_0cyc_XY_32ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_XY_32ln], - (instregex "FCMPE?(D|S)r(r|i)")>; -def KryoWrite_1cyc_XY_noRSV_49ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_49ln], - (instrs FCSELDrrr, FCSELSrrr)>; -def KryoWrite_4cyc_X_noRSV_41ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_41ln], - (instrs FCVTDHr, FCVTDSr, FCVTHDr, FCVTHSr, FCVTSDr, FCVTSHr)>; -def KryoWrite_4cyc_X_38ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_X_38ln], - (instregex "FCVT(((A|N|M|P)(S|U)(S|U)|Z(S|U)_Int(S|U))(W|X)(D|S)ri?|Z(S|U)(d|s))$")>; -def KryoWrite_4cyc_X_noRSV_113ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_113ln], - (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v1i32|v1i64|v2f32)$")>; -def KryoWrite_4cyc_X_X_117ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_117ln], - (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v4f32|v2f64)$")>; -def KryoWrite_5cyc_X_X_XY_noRSV_119ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitXY]> { - let Latency = 5; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_5cyc_X_X_XY_noRSV_119ln], - (instregex "FCVTX?N(v2f32|v4f32|v2i32|v4i16|v4i32|v8i16)$")>; -def KryoWrite_4cyc_X_X_116ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_116ln], - (instregex "FCVTL(v2i32|v4i16|v4i32|v8i16)$")>; -def KryoWrite_4cyc_X_noRSV_112ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_112ln], - (instrs FCVTXNv1i64)>; -def KryoWrite_4cyc_X_37ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_X_37ln], - (instregex "FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; -def KryoWrite_4cyc_X_noRSV_111ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_111ln], - (instregex "FCVTZ(S|U)(v2f32|v1i32|v1i64|v2i32(_shift)?)$")>; -def KryoWrite_4cyc_X_X_115ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_115ln], - (instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>; -def KryoWrite_10cyc_XA_Y_noRSV_43ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 10; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln], - (instrs FDIVSrr)>; -def KryoWrite_14cyc_XA_Y_noRSV_43ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 14; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln], - (instrs FDIVDrr)>; -def KryoWrite_10cyc_XA_Y_noRSV_121ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 10; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln], - (instrs FDIVv2f32)>; -def KryoWrite_14cyc_XA_Y_XA_Y_123ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { - let Latency = 14; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln], - (instrs FDIVv2f64, FDIVv4f32)>; -def KryoWrite_5cyc_X_noRSV_55ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_55ln], - (instregex "FN?M(ADD|SUB)Srrr")>; -def KryoWrite_6cyc_X_noRSV_57ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_noRSV_57ln], - (instregex "FN?M(ADD|SUB)Drrr")>; -def KryoWrite_5cyc_X_noRSV_51ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_51ln], - (instrs FMLAv2f32, FMLSv2f32, FMLAv1i32_indexed, FMLSv1i32_indexed)>; -def KryoWrite_5cyc_X_X_56ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_X_56ln], - (instrs FMLAv4f32, FMLSv4f32)>; -def KryoWrite_6cyc_X_X_61ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_X_61ln], - (instrs FMLAv2f64, FMLSv2f64)>; -def KryoWrite_5cyc_X_noRSV_128ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_128ln], - (instrs FMLAv2i32_indexed, FMLSv2i32_indexed)>; -def KryoWrite_5cyc_X_X_131ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_X_131ln], - (instrs FMLAv4i32_indexed, FMLSv4i32_indexed)>; -def KryoWrite_6cyc_X_X_134ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_X_134ln], - (instrs FMLAv2i64_indexed, FMLSv2i64_indexed)>; -def KryoWrite_6cyc_X_noRSV_60ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_noRSV_60ln], - (instrs FMLAv1i64_indexed, FMLSv1i64_indexed, FMULv1i64_indexed, FMULXv1i64_indexed)>; -def KryoWrite_1cyc_XY_45ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_45ln], - (instregex "FMOV(XDHigh|DXHigh|DX)r")>; -def KryoWrite_1cyc_XY_noRSV_47ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_47ln], - (instregex "FMOV(Di|Dr|Si|Sr|SWr|WSr|XDr|v.*_ns)")>; -def KryoWrite_5cyc_X_noRSV_53ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_53ln], - (instrs FMULv1i32_indexed, FMULXv1i32_indexed)>; -def KryoWrite_5cyc_X_noRSV_127ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_127ln], - (instrs FMULv2f32, FMULXv2f32, FMULv2i32_indexed, FMULXv2i32_indexed)>; -def KryoWrite_5cyc_X_X_130ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_X_130ln], - (instrs FMULv4f32, FMULXv4f32, FMULv4i32_indexed, FMULXv4i32_indexed)>; -def KryoWrite_6cyc_X_X_133ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_X_133ln], - (instrs FMULv2f64, FMULXv2f64, FMULv2i64_indexed, FMULXv2i64_indexed)>; -def KryoWrite_5cyc_X_noRSV_54ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_54ln], - (instrs FMULSrr, FNMULSrr, FMULX32)>; -def KryoWrite_6cyc_X_noRSV_59ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_noRSV_59ln], - (instrs FMULDrr, FNMULDrr, FMULX64)>; -def KryoWrite_3cyc_XY_noRSV_28ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_28ln], - (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64 )>; -def KryoWrite_3cyc_XY_noRSV_99ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_99ln], - (instrs FRECPEv2f32, FRSQRTEv2f32)>; -def KryoWrite_3cyc_XY_XY_102ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_102ln], - (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; -def KryoWrite_5cyc_X_noRSV_52ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_52ln], - (instrs FRECPS32, FRSQRTS32)>; -def KryoWrite_6cyc_X_noRSV_58ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_noRSV_58ln], - (instrs FRECPS64, FRSQRTS64)>; -def KryoWrite_5cyc_X_noRSV_126ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_noRSV_126ln], - (instrs FRECPSv2f32, FRSQRTSv2f32)>; -def KryoWrite_5cyc_X_X_129ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_X_129ln], - (instrs FRECPSv4f32, FRSQRTSv4f32)>; -def KryoWrite_6cyc_X_X_132ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_6cyc_X_X_132ln], - (instrs FRECPSv2f64, FRSQRTSv2f64)>; -def KryoWrite_3cyc_XY_noRSV_50ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_50ln], - (instrs FRECPXv1i32, FRECPXv1i64)>; -def KryoWrite_2cyc_XY_noRSV_39ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_39ln], - (instregex "FRINT(A|I|M|N|P|X|Z)(S|D)r")>; -def KryoWrite_2cyc_XY_noRSV_108ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_108ln], - (instregex "FRINT(A|I|M|N|P|X|Z)v2f32")>; -def KryoWrite_2cyc_XY_XY_109ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_109ln], - (instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>; -def KryoWrite_12cyc_XA_Y_noRSV_42ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 12; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln], - (instrs FSQRTSr)>; -def KryoWrite_21cyc_XA_Y_noRSV_42ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 21; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln], - (instrs FSQRTDr)>; -def KryoWrite_12cyc_XA_Y_noRSV_120ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY]> { - let Latency = 12; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln], - (instrs FSQRTv2f32)>; -def KryoWrite_21cyc_XA_Y_XA_Y_122ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { - let Latency = 21; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln], - (instrs FSQRTv4f32)>; -def KryoWrite_36cyc_XA_Y_XA_Y_122ln : - SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { - let Latency = 36; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln], - (instrs FSQRTv2f64)>; -def KryoWrite_1cyc_X_201ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_X_201ln], - (instregex "INSv.*")>; -def KryoWrite_3cyc_LS_255ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_255ln], - (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)$")>; -def KryoWrite_4cyc_LS_X_270ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_LS_X_270ln], - (instregex "LD1(i8|i16|i32)$")>; -def KryoWrite_3cyc_LS_noRSV_285ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_285ln], - (instregex "LD1One(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_289ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_289ln, WriteAdr], - (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)_POST$")>; -def KryoWrite_4cyc_LS_XY_X_298ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_X_298ln, WriteAdr], - (instregex "LD1(i8|i16|i32)_POST$")>; -def KryoWrite_3cyc_LS_LS_LS_308ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_308ln], - (instregex "LD1Three(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_XY_noRSV_317ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_317ln, WriteAdr], - (instregex "LD1One(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_LS_LS_LS_328ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_328ln, WriteAdr], - (instregex "LD1Four(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_332ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_332ln, WriteAdr], - (instregex "LD1Three(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln], - (instregex "LD1Three(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln], - (instregex "LD1Four(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln], - (instregex "LD1Four(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln, WriteAdr], - (instregex "LD1Three(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 7; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln, WriteAdr], - (instregex "LD1Four(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_LS_281ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_281ln], - (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_noRSV_noRSV_311ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_311ln], - (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_313ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_313ln, WriteAdr], - (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln, WriteAdr], - (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_256ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_256ln], - (instregex "LD1R(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_noRSV_286ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_286ln], - (instregex "LD1R(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_290ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_290ln, WriteAdr], - (instregex "LD1R(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_XY_noRSV_318ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_318ln, WriteAdr], - (instregex "LD1R(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_257ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_257ln], - (instregex "LD2i64$")>; -def KryoWrite_3cyc_LS_XY_291ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_291ln, WriteAdr], - (instregex "LD2i64_POST$")>; -def KryoWrite_4cyc_LS_X_X_296ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_LS_X_X_296ln], - (instregex "LD2(i8|i16|i32)$")>; -def KryoWrite_4cyc_LS_XY_X_X_321ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_321ln, WriteAdr], - (instregex "LD2(i8|i16|i32)_POST$")>; -def KryoWrite_3cyc_LS_LS_282ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_282ln], - (instregex "LD2R(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_noRSV_noRSV_312ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_312ln], - (instregex "LD2R(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_314ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_314ln, WriteAdr], - (instregex "LD2R(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln, WriteAdr], - (instregex "LD2R(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_LS_283ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_283ln], - (instregex "LD3i64$")>; -def KryoWrite_3cyc_LS_LS_LS_309ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_309ln], - (instregex "LD3Threev2d$")>; -def KryoWrite_3cyc_LS_XY_LS_315ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_315ln, WriteAdr], - (instregex "LD3i64_POST$")>; -def KryoWrite_4cyc_LS_X_X_X_320ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_4cyc_LS_X_X_X_320ln], - (instregex "LD3(i8|i16|i32)$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_331ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_331ln, WriteAdr], - (instregex "LD3Threev2d_POST$")>; -def KryoWrite_4cyc_LS_XY_X_X_X_338ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_338ln, WriteAdr], - (instregex "LD3(i8|i16|i32)_POST$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln], - (instregex "LD3Three(v8b|v4h|v2s)$")>; -def KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 9; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln, WriteAdr], - (instregex "LD3Three(v8b|v4h|v2s)_POST$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 10; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln], - (instregex "LD3Three(v16b|v8h|v4s)$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 11; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln, WriteAdr], - (instregex "LD3Three(v16b|v8h|v4s)_POST$")>; -def KryoWrite_3cyc_LS_LS_LS_310ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_310ln], - (instregex "LD3R(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_333ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_333ln, WriteAdr], - (instregex "LD3R(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln], - (instregex "LD3R(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln, WriteAdr], - (instregex "LD3R(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_LS_284ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_284ln], - (instregex "LD4i64$")>; -def KryoWrite_3cyc_LS_XY_LS_316ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_316ln, WriteAdr], - (instregex "LD4i64_POST$")>; -def KryoWrite_3cyc_LS_LS_LS_LS_329ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_329ln], - (instregex "LD4Four(v2d)$")>; -def KryoWrite_4cyc_LS_X_X_X_X_337ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_4cyc_LS_X_X_X_X_337ln], - (instregex "LD4(i8|i16|i32)$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln, WriteAdr], - (instregex "LD4Four(v2d)_POST$")>; -def KryoWrite_4cyc_LS_XY_X_X_X_X_355ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_X_355ln, WriteAdr], - (instregex "LD4(i8|i16|i32)_POST$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 10; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln], - (instregex "LD4Four(v8b|v4h|v2s)$")>; -def KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 11; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln, WriteAdr], - (instregex "LD4Four(v8b|v4h|v2s)_POST$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 12; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln], - (instregex "LD4Four(v16b|v8h|v4s)$")>; -def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 13; -} -def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln, WriteAdr], - (instregex "LD4Four(v16b|v8h|v4s)_POST$")>; -def KryoWrite_3cyc_LS_LS_LS_LS_330ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_330ln], - (instregex "LD4R(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln, WriteAdr], - (instregex "LD4R(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln], - (instregex "LD4R(v8b|v4h|v2s|v1d)$")>; -def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 7; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln, WriteAdr], - (instregex "LD4R(v8b|v4h|v2s|v1d)_POST$")>; -def KryoWrite_3cyc_LS_LS_400ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_400ln], - (instregex "LDAX?R(B|H|W|X)")>; -def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi], - (instregex "LDAXP(W|X)")>; -def KryoWrite_3cyc_LS_LS_401ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_401ln, WriteLDHi], - (instrs LDNPQi)>; -def KryoWrite_3cyc_LS_noRSV_noRSV_408ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_408ln, WriteLDHi], - (instrs LDNPDi, LDNPSi)>; -def KryoWrite_3cyc_LS_394ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_394ln, WriteLDHi], - (instrs LDNPWi, LDNPXi)>; -def KryoWrite_3cyc_LS_LS_402ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_402ln, WriteLDHi], - (instrs LDPQi)>; -def KryoWrite_3cyc_LS_noRSV_noRSV_409ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_409ln, WriteLDHi], - (instrs LDPDi, LDPSi)>; -def KryoWrite_3cyc_LS_XY_LS_410ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_LS_410ln, WriteLDHi, WriteAdr], - (instregex "LDPQ(post|pre)")>; -def KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln, WriteLDHi, WriteAdr], - (instregex "LDP(D|S)(post|pre)")>; -def KryoWrite_3cyc_LS_393ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_393ln, WriteLDHi], - (instrs LDPWi, LDPXi)>; -def KryoWrite_3cyc_LS_XY_403ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_403ln, WriteLDHi, WriteAdr], - (instregex "LDP(W|X)(post|pre)")>; -def KryoWrite_4cyc_LS_395ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_LS_395ln, WriteLDHi], - (instrs LDPSWi)>; -def KryoWrite_4cyc_LS_XY_405ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_405ln, WriteLDHi, WriteAdr], - (instrs LDPSWpost, LDPSWpre)>; -def KryoWrite_3cyc_LS_264ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_264ln], - (instrs LDRQui, LDRQl)>; -def KryoWrite_4cyc_X_LS_271ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_LS_271ln], - (instrs LDRQroW, LDRQroX)>; -def KryoWrite_3cyc_LS_noRSV_287ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_287ln], - (instregex "LDR((D|S)l|(D|S|H|B)ui)")>; -def KryoWrite_3cyc_LS_XY_293ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_293ln, WriteAdr], - (instrs LDRQpost, LDRQpre)>; -def KryoWrite_4cyc_X_LS_noRSV_297ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_X_LS_noRSV_297ln], - (instregex "LDR(D|S|H|B)ro(W|X)")>; -def KryoWrite_3cyc_LS_XY_noRSV_319ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_319ln, WriteAdr], - (instregex "LDR(D|S|H|B)(post|pre)")>; -def KryoWrite_3cyc_LS_261ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_261ln], - (instregex "LDR(BB|HH|W|X)ui")>; -def KryoWrite_3cyc_LS_XY_292ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_XY_292ln, WriteAdr], - (instregex "LDR(BB|HH|W|X)(post|pre)")>; -def KryoWrite_4cyc_X_LS_272ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_LS_272ln], - (instregex "(LDR(BB|HH|W|X)ro(W|X)|PRFMro(W|X))")>; -def KryoWrite_3cyc_LS_262ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_262ln], - (instrs LDRWl, LDRXl)>; -def KryoWrite_4cyc_LS_268ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_LS_268ln], - (instregex "LDRS(BW|BX|HW|HX|W)ui")>; -def KryoWrite_5cyc_X_LS_273ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS]> { - let Latency = 5; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_5cyc_X_LS_273ln], - (instregex "LDRS(BW|BX|HW|HX|W)ro(W|X)")>; -def KryoWrite_4cyc_LS_XY_294ln : - SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_LS_XY_294ln, WriteAdr], - (instregex "LDRS(BW|BX|HW|HX|W)(post|pre)")>; -def KryoWrite_4cyc_LS_269ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_LS_269ln], - (instrs LDRSWl)>; -def KryoWrite_3cyc_LS_260ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_260ln], - (instregex "LDTR(B|H|W|X)i")>; -def KryoWrite_4cyc_LS_267ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_LS_267ln], - (instregex "LDTRS(BW|BX|HW|HX|W)i")>; -def KryoWrite_3cyc_LS_263ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_263ln], - (instrs LDURQi)>; -def KryoWrite_3cyc_LS_noRSV_288ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_noRSV_288ln], - (instregex "LDUR(D|S|H|B)i")>; -def KryoWrite_3cyc_LS_259ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_259ln], - (instregex "LDUR(BB|HH|W|X)i")>; -def KryoWrite_4cyc_LS_266ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_LS_266ln], - (instregex "LDURS(B|H)?(W|X)i")>; -def KryoWrite_3cyc_LS_258ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi], - (instregex "LDXP(W|X)")>; -def KryoWrite_3cyc_LS_258_1ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 3; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_3cyc_LS_258_1ln], - (instregex "LDXR(B|H|W|X)")>; -def KryoWrite_2cyc_XY_XY_137ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_137ln], - (instrs LSLVWr, LSLVXr)>; -def KryoWrite_1cyc_XY_135ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_135ln], - (instregex "(LS|AS|RO)RV(W|X)r")>; -def KryoWrite_4cyc_X_84ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_4cyc_X_84ln], - (instrs MADDWrrr, MSUBWrrr)>; -def KryoWrite_5cyc_X_85ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 5; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_5cyc_X_85ln], - (instrs MADDXrrr, MSUBXrrr)>; -def KryoWrite_4cyc_X_noRSV_188ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_188ln], - (instregex "(MLA|MLS|MUL)(v8i8|v4i16|v2i32)(_indexed)?")>; -def KryoWrite_4cyc_X_X_192ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_X_192ln], - (instregex "(MLA|MLS|MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?")>; -def KryoWrite_1cyc_XY_noRSV_198ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_198ln], - (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)")>; -def KryoWrite_1cyc_XY_XY_199ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_199ln], - (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)")>; -def KryoWrite_1cyc_X_89ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_X_89ln], - (instrs MOVKWi, MOVKXi)>; -def KryoWrite_1cyc_XY_91ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_91ln], - (instrs MOVNWi, MOVNXi)>; -def KryoWrite_1cyc_XY_90ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_90ln], - (instrs MOVZWi, MOVZXi)>; -def KryoWrite_2cyc_XY_93ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_2cyc_XY_93ln], - (instrs MRS)>; -def KryoWrite_0cyc_X_87ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_X_87ln], - (instrs MSRpstateImm4)>; -def : InstRW<[KryoWrite_0cyc_X_87ln], - (instrs MSRpstateImm1)>; -def KryoWrite_0cyc_XY_88ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_XY_88ln], - (instrs MSR)>; -def KryoWrite_1cyc_XY_noRSV_143ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_143ln], - (instregex "NEG(v8i8|v4i16|v2i32|v1i64)")>; -def KryoWrite_1cyc_XY_XY_145ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_145ln], - (instregex "NEG(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_1cyc_XY_noRSV_193ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_193ln], - (instrs NOTv8i8)>; -def KryoWrite_1cyc_XY_XY_194ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_194ln], - (instrs NOTv16i8)>; -def KryoWrite_2cyc_XY_noRSV_234ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_234ln], - (instrs PMULv8i8)>; -def KryoWrite_2cyc_XY_XY_236ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_236ln], - (instrs PMULv16i8)>; -def KryoWrite_2cyc_XY_XY_235ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_235ln], - (instrs PMULLv8i8, PMULLv16i8)>; -def KryoWrite_3cyc_XY_XY_237ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_237ln], - (instrs PMULLv1i64, PMULLv2i64)>; -def KryoWrite_0cyc_LS_254ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_LS_254ln], - (instrs PRFMl, PRFMui)>; -def KryoWrite_0cyc_LS_253ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_LS_253ln], - (instrs PRFUMi)>; -def KryoWrite_6cyc_XY_X_noRSV_175ln : - SchedWriteRes<[KryoUnitXY, KryoUnitX]> { - let Latency = 6; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_175ln], - (instregex "R(ADD|SUB)HNv.*")>; -def KryoWrite_2cyc_XY_204ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_2cyc_XY_204ln], - (instrs RBITWr, RBITXr)>; -def KryoWrite_2cyc_XY_noRSV_218ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_noRSV_218ln], - (instrs RBITv8i8)>; -def KryoWrite_2cyc_XY_XY_219ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_219ln], - (instrs RBITv16i8)>; -def KryoWrite_1cyc_X_202ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_X_202ln], - (instregex "REV(16|32)?(W|X)r")>; -def KryoWrite_1cyc_XY_noRSV_214ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_214ln], - (instregex "REV(16|32|64)(v8i8|v4i16|v2i32)")>; -def KryoWrite_1cyc_XY_XY_216ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_216ln], - (instregex "REV(16|32|64)(v16i8|v8i16|v4i32)")>; -def KryoWrite_3cyc_X_noRSV_244ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_noRSV_244ln], - (instregex "S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)")>; -def KryoWrite_3cyc_X_X_245ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_X_X_245ln], - (instregex "S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift")>; -def KryoWrite_1cyc_XY_2ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_2ln, ReadI, ReadI], - (instregex "SBCS?(W|X)r")>; -def KryoWrite_2cyc_XA_XA_XA_24ln : - SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { - let Latency = 2; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_2cyc_XA_XA_XA_24ln], - (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr)>; -def KryoWrite_1cyc_XY_noRSV_21ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_21ln], - (instrs SHA1Hrr)>; -def KryoWrite_2cyc_X_X_23ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_X_X_23ln], - (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; -def KryoWrite_4cyc_XA_XA_XA_25ln : - SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { - let Latency = 4; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_4cyc_XA_XA_XA_25ln], - (instrs SHA256Hrrr, SHA256H2rrr)>; -def KryoWrite_3cyc_XY_XY_X_X_26ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_26ln], - (instrs SHA256SU1rrr)>; -def KryoWrite_4cyc_X_noRSV_189ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_189ln], - (instregex "SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?")>; -def KryoWrite_3cyc_XY_noRSV_68ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_68ln], - (instregex "SQ(ABS|NEG)(v1i8|v1i16|v1i32|v1i64)")>; -def KryoWrite_3cyc_XY_noRSV_157ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_157ln], - (instregex "SQ(ABS|NEG)(v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_XY_164ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_164ln], - (instregex "SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_4cyc_X_noRSV_190ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 4; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_4cyc_X_noRSV_190ln], - (instregex "SQD(MLAL|MLSL|MULL)(i16|i32)")>; -def KryoWrite_0cyc_LS_Y_274ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_274ln], - (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))$")>; -def KryoWrite_1cyc_LS_Y_X_301ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_301ln], - (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def KryoWrite_1cyc_LS_Y_XY_305ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_305ln], - (instregex "ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_323ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_323ln], - (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln], - (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY]> { - let Latency = 0; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln], - (instregex "ST1Three(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, - KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 7; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln], - (instregex "ST1Three(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln], - (instregex "ST1Four(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, - KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 9; -} -def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln], - (instregex "ST1Four(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_0cyc_LS_Y_275ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_275ln], - (instregex "ST2(Two(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64))$")>; -def KryoWrite_1cyc_LS_Y_XY_306ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_306ln], - (instregex "ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_322ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_322ln], - (instregex "ST2Two(v16b|v8h|v4s|v2d)$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 5; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln], - (instregex "ST2Two(v16b|v8h|v4s|v2d)_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_324ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_324ln], - (instregex "ST3(Threev1d|(i8|i16|i32|i64))$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 5; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln], - (instregex "ST3(Threev1d|(i8|i16|i32|i64))_POST$")>; -def KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY]> { - let Latency = 1; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln], - (instregex "ST3Three(v8b|v4h|v2s)$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY]> { - let Latency = 0; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln], - (instregex "ST3Threev2d$")>; -def KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, - KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 7; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln], - (instregex "ST3Three(v8b|v4h|v2s)_POST$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, - KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 7; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln], - (instregex "ST3Threev2d_POST$")>; -def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, - KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 12; -} -def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln], - (instregex "ST3Three(v16b|v8h|v4s)$")>; -def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, - KryoUnitXY, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 13; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln], - (instregex "ST3Three(v16b|v8h|v4s)_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_325ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_325ln], - (instregex "ST4(Fourv1d|(i8|i16|i32|i64))$")>; -def KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 5; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln], - (instregex "ST4(Fourv1d|(i8|i16|i32|i64))_POST$")>; -def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, - KryoUnitX, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln], - (instregex "ST4Four(v8b|v4h|v2s)$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, - KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln], - (instregex "ST4Fourv2d$")>; -def KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, - KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 9; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln], - (instregex "ST4Four(v8b|v4h|v2s)_POST$")>; -def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, - KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 9; -} -def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln], - (instregex "ST4Fourv2d_POST$")>; -def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, - KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, - KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, - KryoUnitY]> { - let Latency = 1; let NumMicroOps = 16; -} -def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln], - (instregex "ST4Four(v16b|v8h|v4s)$")>; -def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, - KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, - KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitX, KryoUnitX, - KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 17; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln], - (instregex "ST4Four(v16b|v8h|v4s)_POST$")>; -def KryoWrite_0cyc_LS_LS_Y_299ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_0cyc_LS_LS_Y_299ln], - (instregex "STLR(B|H|W|X)")>; -def KryoWrite_3cyc_LS_LS_Y_307ln : - SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { - let Latency = 3; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_3cyc_LS_LS_Y_307ln], - (instregex "STLX(P(W|X)|R(B|H|W|X))")>; -def KryoWrite_0cyc_LS_Y_276ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_276ln], - (instrs STNPDi, STNPSi)>; -def KryoWrite_0cyc_LS_Y_LS_Y_326ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_326ln], - (instrs STNPQi)>; -def KryoWrite_0cyc_LS_Y_280ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_280ln], - (instrs STNPWi, STNPXi)>; -def KryoWrite_0cyc_LS_Y_277ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_277ln], - (instregex "STP(D|S)i")>; -def KryoWrite_1cyc_LS_Y_X_303ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_303ln], - (instregex "STP(D|S)(post|pre)")>; -def KryoWrite_0cyc_LS_Y_LS_Y_327ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_327ln], - (instrs STPQi)>; -def KryoWrite_1cyc_LS_Y_X_LS_Y_343ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 5; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_LS_Y_343ln], - (instrs STPQpost, STPQpre)>; -def KryoWrite_0cyc_LS_Y_279ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_279ln], - (instregex "STP(W|X)i")>; -def KryoWrite_1cyc_LS_X_Y_300ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_300ln], - (instregex "STP(W|X)(post|pre)")>; -def KryoWrite_0cyc_LS_Y_278ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_278ln], - (instregex "STR(Q|D|S|H|B)ui")>; -def KryoWrite_1cyc_X_LS_Y_295ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_1cyc_X_LS_Y_295ln], - (instregex "STR(D|S|H|B)ro(W|X)")>; -def KryoWrite_1cyc_LS_Y_X_304ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_304ln], - (instregex "STR(Q|D|S|H|B)(post|pre)")>; -def KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, - KryoUnitY]> { - let Latency = 2; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln], - (instregex "STRQro(W|X)")>; -def KryoWrite_0cyc_LS_Y_399ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_399ln], - (instregex "STR(BB|HH|W|X)ui")>; -def KryoWrite_1cyc_X_LS_Y_406ln : - SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_1cyc_X_LS_Y_406ln], - (instregex "STR(BB|HH|W|X)ro(W|X)")>; -def KryoWrite_1cyc_LS_X_Y_407ln : - SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { - let Latency = 1; let NumMicroOps = 3; -} -def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_407ln], - (instregex "STR(BB|HH|W|X)(post|pre)")>; -def KryoWrite_0cyc_LS_Y_398ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_398ln], - (instregex "STTR(B|H|W|X)i")>; -def KryoWrite_0cyc_LS_Y_396ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_396ln], - (instregex "STUR(Q|D|S|H|B)i")>; -def KryoWrite_0cyc_LS_Y_397ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 0; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_0cyc_LS_Y_397ln], - (instregex "STUR(BB|HH|W|X)i")>; -def KryoWrite_3cyc_LS_Y_404ln : - SchedWriteRes<[KryoUnitLS, KryoUnitY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_LS_Y_404ln], - (instregex "STX(P(W|X)|R(B|H|W|X))")>; -def KryoWrite_3cyc_XY_noRSV_160ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_160ln], - (instregex "^(SU|US)QADD(v8i8|v4i16|v2i32)")>; -def KryoWrite_3cyc_XY_XY_167ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_167ln], - (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)")>; -def KryoWrite_1cyc_XY_1ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_1cyc_XY_1ln, ReadI], - (instregex "SUBS?(W|X)ri")>; -def KryoWrite_2cyc_XY_XY_5ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_5ln, ReadI, ReadIEReg], - (instregex "SUBS?(W|X)rx")>; -def KryoWrite_2cyc_XY_XY_5_1ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 2; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_2cyc_XY_XY_5_1ln, ReadI, ReadISReg], - (instregex "SUBS?(W|X)rs")>; -def KryoWrite_1cyc_XY_noRSV_6ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_6ln, ReadI, ReadI], - (instregex "SUBS?(W|X)rr")>; -def KryoWrite_0cyc_LS_9ln : - SchedWriteRes<[KryoUnitLS]> { - let Latency = 0; let NumMicroOps = 1; -} -def : InstRW<[KryoWrite_0cyc_LS_9ln], - (instregex "SYSL?xt")>; -def KryoWrite_1cyc_X_noRSV_205ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_noRSV_205ln], - (instrs TBLv8i8One)>; -def KryoWrite_1cyc_X_X_208ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_208ln], - (instrs TBLv16i8One)>; -def KryoWrite_2cyc_X_X_X_noRSV_222ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_2cyc_X_X_X_noRSV_222ln], - (instrs TBLv8i8Two)>; -def KryoWrite_2cyc_X_X_X_X_X_X_224ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 2; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_2cyc_X_X_X_X_X_X_224ln], - (instrs TBLv16i8Two)>; -def KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 6; -} -def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln], - (instrs TBLv8i8Three)>; -def KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln], - (instrs TBLv8i8Four)>; -def KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 11; -} -def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln], - (instrs TBLv16i8Three)>; -def KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 15; -} -def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln], - (instrs TBLv16i8Four)>; -def KryoWrite_2cyc_X_X_noRSV_220ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 3; -} -def : InstRW<[KryoWrite_2cyc_X_X_noRSV_220ln], - (instrs TBXv8i8One)>; -def KryoWrite_2cyc_X_X_X_X_221ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 2; let NumMicroOps = 4; -} -def : InstRW<[KryoWrite_2cyc_X_X_X_X_221ln], - (instrs TBXv16i8One)>; -def KryoWrite_3cyc_X_X_X_X_noRSV_223ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 5; -} -def : InstRW<[KryoWrite_3cyc_X_X_X_X_noRSV_223ln], - (instrs TBXv8i8Two)>; -def KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX]> { - let Latency = 4; let NumMicroOps = 7; -} -def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln], - (instrs TBXv8i8Three)>; -def KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 3; let NumMicroOps = 8; -} -def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln], - (instrs TBXv16i8Two)>; -def KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 4; let NumMicroOps = 9; -} -def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln], - (instrs TBXv8i8Four)>; -def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, - KryoUnitX, KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 13; -} -def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln], - (instrs TBXv16i8Three)>; -def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln : - SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, - KryoUnitX, KryoUnitX]> { - let Latency = 5; let NumMicroOps = 17; -} -def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln], - (instrs TBXv16i8Four)>; -def KryoWrite_1cyc_XY_XY_217ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_XY_217ln], - (instregex "((TRN1|TRN2|ZIP1|UZP1|UZP2)v2i64|ZIP2(v2i64|v4i32|v8i16|v16i8))")>; -def KryoWrite_1cyc_X_X_211ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_211ln], - (instregex "(TRN1|TRN2)(v4i32|v8i16|v16i8)")>; -def KryoWrite_1cyc_X_XY_213ln : - SchedWriteRes<[KryoUnitX, KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_XY_213ln], - (instregex "(TRN1|TRN2)(v2i32|v4i16|v8i8)")>; -def KryoWrite_3cyc_XY_noRSV_156ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_noRSV_156ln], - (instrs URECPEv2i32, URSQRTEv2i32)>; -def KryoWrite_3cyc_XY_XY_168ln : - SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { - let Latency = 3; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_3cyc_XY_XY_168ln], - (instrs URECPEv4i32, URSQRTEv4i32)>; -def KryoWrite_1cyc_X_X_210ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_210ln], - (instregex "(UZP1|UZP2)(v4i32|v8i16|v16i8)")>; -def KryoWrite_1cyc_X_noRSV_206ln : - SchedWriteRes<[KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_noRSV_206ln], - (instregex "(UZP1|UZP2|ZIP1|ZIP2)(v2i32|v4i16|v8i8)")>; -def KryoWrite_1cyc_XY_noRSV_215ln : - SchedWriteRes<[KryoUnitXY]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_XY_noRSV_215ln], - (instregex "XTNv.*")>; -def KryoWrite_1cyc_X_X_209ln : - SchedWriteRes<[KryoUnitX, KryoUnitX]> { - let Latency = 1; let NumMicroOps = 2; -} -def : InstRW<[KryoWrite_1cyc_X_X_209ln], - (instregex "ZIP1(v4i32|v8i16|v16i8)")>; diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td b/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td deleted file mode 100644 index 8552c07bda..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td +++ /dev/null @@ -1,25 +0,0 @@ -//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines scheduling predicate definitions that are used by the -// AArch64 Ampere Computing processors. -// -//===----------------------------------------------------------------------===// - -// Auxiliary predicates. - -// Check for a LSL shift <= 4 -def AmpereCheapLSL : MCSchedPredicate< - CheckAny<[CheckShiftBy0, - CheckAll< - [CheckShiftLSL, - CheckAny< - [CheckShiftBy1, - CheckShiftBy2, - CheckShiftBy3, - CheckShiftBy4]>]>]>>; diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td b/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td deleted file mode 100644 index fcda2394ba..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td +++ /dev/null @@ -1,157 +0,0 @@ -//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines scheduling predicate definitions that are used by the -// AArch64 Exynos processors. -// -//===----------------------------------------------------------------------===// - -// Auxiliary predicates. - -// Check the shift in arithmetic and logic instructions. -def ExynosCheckShift : CheckAny<[CheckShiftBy0, - CheckAll< - [CheckShiftLSL, - CheckAny< - [CheckShiftBy1, - CheckShiftBy2, - CheckShiftBy3]>]>]>; - -// Exynos predicates. - -// Identify BLR specifying the LR register as the indirect target register. -def ExynosBranchLinkLRPred : MCSchedPredicate< - CheckAll<[CheckOpcode<[BLR]>, - CheckRegOperand<0, LR>]>>; - -// Identify arithmetic instructions without or with limited extension or shift. -def ExynosArithFn : TIIPredicate< - "isExynosArithFast", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsArithExtOp.ValidOpcodes, - MCReturnStatement< - CheckAny<[CheckExtBy0, - CheckAll< - [CheckAny< - [CheckExtUXTW, - CheckExtUXTX]>, - CheckAny< - [CheckExtBy1, - CheckExtBy2, - CheckExtBy3]>]>]>>>, - MCOpcodeSwitchCase< - IsArithShiftOp.ValidOpcodes, - MCReturnStatement>, - MCOpcodeSwitchCase< - IsArithUnshiftOp.ValidOpcodes, - MCReturnStatement>, - MCOpcodeSwitchCase< - IsArithImmOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement>>; -def ExynosArithPred : MCSchedPredicate; - -// Identify logic instructions with limited shift. -def ExynosLogicFn : TIIPredicate< - "isExynosLogicFast", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLogicShiftOp.ValidOpcodes, - MCReturnStatement>, - MCOpcodeSwitchCase< - IsLogicUnshiftOp.ValidOpcodes, - MCReturnStatement>, - MCOpcodeSwitchCase< - IsLogicImmOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement>>; -def ExynosLogicPred : MCSchedPredicate; - -// Identify more logic instructions with limited shift. -def ExynosLogicExFn : TIIPredicate< - "isExynosLogicExFast", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLogicShiftOp.ValidOpcodes, - MCReturnStatement< - CheckAny< - [ExynosCheckShift, - CheckAll< - [CheckShiftLSL, - CheckShiftBy8]>]>>>, - MCOpcodeSwitchCase< - IsLogicUnshiftOp.ValidOpcodes, - MCReturnStatement>, - MCOpcodeSwitchCase< - IsLogicImmOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement>>; -def ExynosLogicExPred : MCSchedPredicate; - -// Identify a load or store using the register offset addressing mode -// with a scaled non-extended register. -def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLoadStoreRegOffsetOp.ValidOpcodes, - MCReturnStatement< - CheckAny< - [CheckMemExtSXTW, - CheckMemExtUXTW, - CheckMemScaled]>>>], - MCReturnStatement>>; -def ExynosScaledIdxPred : MCSchedPredicate; - -// Identify FP instructions. -def ExynosFPPred : MCSchedPredicate>; - -// Identify 128-bit NEON instructions. -def ExynosQFormPred : MCSchedPredicate; - -// Identify instructions that reset a register efficiently. -def ExynosResetFn : TIIPredicate< - "isExynosResetFast", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - [ADR, ADRP, - MOVNWi, MOVNXi, - MOVZWi, MOVZXi], - MCReturnStatement>, - MCOpcodeSwitchCase< - [ORRWri, ORRXri], - MCReturnStatement< - CheckAll< - [CheckIsRegOperand<1>, - CheckAny< - [CheckRegOperand<1, WZR>, - CheckRegOperand<1, XZR>]>]>>>], - MCReturnStatement< - CheckAny< - [IsCopyIdiomFn, - IsZeroFPIdiomFn]>>>>; -def ExynosResetPred : MCSchedPredicate; - -// Identify EXTR as the alias for ROR (immediate). -def ExynosRotateRightImmPred : MCSchedPredicate< - CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>, - CheckSameRegOperand<1, 2>]>>; - -// Identify cheap arithmetic and logic immediate instructions. -def ExynosCheapFn : TIIPredicate< - "isExynosCheapAsMove", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsArithLogicImmOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement< - CheckAny< - [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>; diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td b/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td deleted file mode 100644 index 19a3780c73..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td +++ /dev/null @@ -1,441 +0,0 @@ -//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines scheduling predicate definitions that are used by the -// AArch64 subtargets. -// -//===----------------------------------------------------------------------===// - -// Function mappers. - -// Check the extension type in arithmetic instructions. -let FunctionMapper = "AArch64_AM::getArithExtendType" in { - def CheckExtUXTB : CheckImmOperand_s<3, "AArch64_AM::UXTB">; - def CheckExtUXTH : CheckImmOperand_s<3, "AArch64_AM::UXTH">; - def CheckExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">; - def CheckExtUXTX : CheckImmOperand_s<3, "AArch64_AM::UXTX">; - def CheckExtSXTB : CheckImmOperand_s<3, "AArch64_AM::SXTB">; - def CheckExtSXTH : CheckImmOperand_s<3, "AArch64_AM::SXTH">; - def CheckExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">; - def CheckExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">; -} - -// Check for shifting in extended arithmetic instructions. -foreach I = {0-3} in { - let FunctionMapper = "AArch64_AM::getArithShiftValue" in - def CheckExtBy#I : CheckImmOperand<3, I>; -} - -// Check the extension type in the register offset addressing mode. -let FunctionMapper = "AArch64_AM::getMemExtendType" in { - def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">; - def CheckMemExtLSL : CheckImmOperand_s<3, "AArch64_AM::UXTX">; - def CheckMemExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">; - def CheckMemExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">; -} - -// Check for scaling in the register offset addressing mode. -let FunctionMapper = "AArch64_AM::getMemDoShift" in -def CheckMemScaled : CheckImmOperandSimple<4>; - -// Check the shifting type in arithmetic and logic instructions. -let FunctionMapper = "AArch64_AM::getShiftType" in { - def CheckShiftLSL : CheckImmOperand_s<3, "AArch64_AM::LSL">; - def CheckShiftLSR : CheckImmOperand_s<3, "AArch64_AM::LSR">; - def CheckShiftASR : CheckImmOperand_s<3, "AArch64_AM::ASR">; - def CheckShiftROR : CheckImmOperand_s<3, "AArch64_AM::ROR">; - def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">; -} - -// Check for shifting in arithmetic and logic instructions. -foreach I = {0-4, 8} in { - let FunctionMapper = "AArch64_AM::getShiftValue" in - def CheckShiftBy#I : CheckImmOperand<3, I>; -} - -// Generic predicates. - -// Identify whether an instruction is the 16-bit NEON form based on its result. -def CheckHForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, H0>, - CheckRegOperand<0, H1>, - CheckRegOperand<0, H2>, - CheckRegOperand<0, H3>, - CheckRegOperand<0, H4>, - CheckRegOperand<0, H5>, - CheckRegOperand<0, H6>, - CheckRegOperand<0, H7>, - CheckRegOperand<0, H8>, - CheckRegOperand<0, H9>, - CheckRegOperand<0, H10>, - CheckRegOperand<0, H11>, - CheckRegOperand<0, H12>, - CheckRegOperand<0, H13>, - CheckRegOperand<0, H14>, - CheckRegOperand<0, H15>, - CheckRegOperand<0, H16>, - CheckRegOperand<0, H17>, - CheckRegOperand<0, H18>, - CheckRegOperand<0, H19>, - CheckRegOperand<0, H20>, - CheckRegOperand<0, H21>, - CheckRegOperand<0, H22>, - CheckRegOperand<0, H23>, - CheckRegOperand<0, H24>, - CheckRegOperand<0, H25>, - CheckRegOperand<0, H26>, - CheckRegOperand<0, H27>, - CheckRegOperand<0, H28>, - CheckRegOperand<0, H29>, - CheckRegOperand<0, H30>, - CheckRegOperand<0, H31>]>]>; - -// Identify whether an instruction is the 32-bit NEON form based on its result. -def CheckSForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, S0>, - CheckRegOperand<0, S1>, - CheckRegOperand<0, S2>, - CheckRegOperand<0, S3>, - CheckRegOperand<0, S4>, - CheckRegOperand<0, S5>, - CheckRegOperand<0, S6>, - CheckRegOperand<0, S7>, - CheckRegOperand<0, S8>, - CheckRegOperand<0, S9>, - CheckRegOperand<0, S10>, - CheckRegOperand<0, S11>, - CheckRegOperand<0, S12>, - CheckRegOperand<0, S13>, - CheckRegOperand<0, S14>, - CheckRegOperand<0, S15>, - CheckRegOperand<0, S16>, - CheckRegOperand<0, S17>, - CheckRegOperand<0, S18>, - CheckRegOperand<0, S19>, - CheckRegOperand<0, S20>, - CheckRegOperand<0, S21>, - CheckRegOperand<0, S22>, - CheckRegOperand<0, S23>, - CheckRegOperand<0, S24>, - CheckRegOperand<0, S25>, - CheckRegOperand<0, S26>, - CheckRegOperand<0, S27>, - CheckRegOperand<0, S28>, - CheckRegOperand<0, S29>, - CheckRegOperand<0, S30>, - CheckRegOperand<0, S31>]>]>; - -// Identify whether an instruction is the 64-bit NEON form based on its result. -def CheckDForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, D0>, - CheckRegOperand<0, D1>, - CheckRegOperand<0, D2>, - CheckRegOperand<0, D3>, - CheckRegOperand<0, D4>, - CheckRegOperand<0, D5>, - CheckRegOperand<0, D6>, - CheckRegOperand<0, D7>, - CheckRegOperand<0, D8>, - CheckRegOperand<0, D9>, - CheckRegOperand<0, D10>, - CheckRegOperand<0, D11>, - CheckRegOperand<0, D12>, - CheckRegOperand<0, D13>, - CheckRegOperand<0, D14>, - CheckRegOperand<0, D15>, - CheckRegOperand<0, D16>, - CheckRegOperand<0, D17>, - CheckRegOperand<0, D18>, - CheckRegOperand<0, D19>, - CheckRegOperand<0, D20>, - CheckRegOperand<0, D21>, - CheckRegOperand<0, D22>, - CheckRegOperand<0, D23>, - CheckRegOperand<0, D24>, - CheckRegOperand<0, D25>, - CheckRegOperand<0, D26>, - CheckRegOperand<0, D27>, - CheckRegOperand<0, D28>, - CheckRegOperand<0, D29>, - CheckRegOperand<0, D30>, - CheckRegOperand<0, D31>]>]>; - -// Identify whether an instruction is the 128-bit NEON form based on its result. -def CheckQForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, Q0>, - CheckRegOperand<0, Q1>, - CheckRegOperand<0, Q2>, - CheckRegOperand<0, Q3>, - CheckRegOperand<0, Q4>, - CheckRegOperand<0, Q5>, - CheckRegOperand<0, Q6>, - CheckRegOperand<0, Q7>, - CheckRegOperand<0, Q8>, - CheckRegOperand<0, Q9>, - CheckRegOperand<0, Q10>, - CheckRegOperand<0, Q11>, - CheckRegOperand<0, Q12>, - CheckRegOperand<0, Q13>, - CheckRegOperand<0, Q14>, - CheckRegOperand<0, Q15>, - CheckRegOperand<0, Q16>, - CheckRegOperand<0, Q17>, - CheckRegOperand<0, Q18>, - CheckRegOperand<0, Q19>, - CheckRegOperand<0, Q20>, - CheckRegOperand<0, Q21>, - CheckRegOperand<0, Q22>, - CheckRegOperand<0, Q23>, - CheckRegOperand<0, Q24>, - CheckRegOperand<0, Q25>, - CheckRegOperand<0, Q26>, - CheckRegOperand<0, Q27>, - CheckRegOperand<0, Q28>, - CheckRegOperand<0, Q29>, - CheckRegOperand<0, Q30>, - CheckRegOperand<0, Q31>]>]>; - -// Identify arithmetic instructions with extend. -def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx, - SUBWrx, SUBXrx, SUBSWrx, SUBSXrx, - ADDXrx64, ADDSXrx64, - SUBXrx64, SUBSXrx64]>; - -// Identify arithmetic immediate instructions. -def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri, - SUBWri, SUBXri, SUBSWri, SUBSXri]>; - -// Identify arithmetic instructions with shift. -def IsArithShiftOp : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs, - SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>; - -// Identify arithmetic instructions without shift. -def IsArithUnshiftOp : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr, - SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>; - -// Identify logic immediate instructions. -def IsLogicImmOp : CheckOpcode<[ANDWri, ANDXri, - EORWri, EORXri, - ORRWri, ORRXri]>; - -// Identify logic instructions with shift. -def IsLogicShiftOp : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs, - BICWrs, BICXrs, BICSWrs, BICSXrs, - EONWrs, EONXrs, - EORWrs, EORXrs, - ORNWrs, ORNXrs, - ORRWrs, ORRXrs]>; - -// Identify logic instructions without shift. -def IsLogicUnshiftOp : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr, - BICWrr, BICXrr, BICSWrr, BICSXrr, - EONWrr, EONXrr, - EORWrr, EORXrr, - ORNWrr, ORNXrr, - ORRWrr, ORRXrr]>; - -// Identify arithmetic and logic immediate instructions. -def IsArithLogicImmOp : CheckOpcode; - -// Identify arithmetic and logic instructions with shift. -def IsArithLogicShiftOp : CheckOpcode; - -// Identify arithmetic and logic instructions without shift. -def IsArithLogicUnshiftOp : CheckOpcode; - -// Identify whether an instruction is an ASIMD -// load using the post index addressing mode. -def IsLoadASIMDPostOp : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST, - LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST, - LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST, - LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST, - LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST, - LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST, - LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST, - LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST, - LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST, - LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST, - LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST, - LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST, - LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST, - LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST, - LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST, - LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST, - LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST, - LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST, - LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST, - LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST, - LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST, - LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST, - LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST, - LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST, - LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST, - LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>; - -// Identify whether an instruction is an ASIMD -// store using the post index addressing mode. -def IsStoreASIMDPostOp : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST, - ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST, - ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST, - ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST, - ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST, - ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST, - ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST, - ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST, - ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST, - ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST, - ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST, - ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST, - ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST, - ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST, - ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST, - ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST, - ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST, - ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>; - -// Identify whether an instruction is an ASIMD load -// or store using the post index addressing mode. -def IsLoadStoreASIMDPostOp : CheckOpcode; - -// Identify whether an instruction is a load -// using the register offset addressing mode. -def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX, - LDRBBroW, LDRBBroX, - LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX, - LDRHHroW, LDRHHroX, - LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX, - LDRWroW, LDRWroX, - LDRSWroW, LDRSWroX, - LDRXroW, LDRXroX, - LDRBroW, LDRBroX, - LDRHroW, LDRHroX, - LDRSroW, LDRSroX, - LDRDroW, LDRDroX, - LDRQroW, LDRQroX]>; - -// Identify whether an instruction is a store -// using the register offset addressing mode. -def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX, - STRHHroW, STRHHroX, - STRWroW, STRWroX, - STRXroW, STRXroX, - STRBroW, STRBroX, - STRHroW, STRHroX, - STRSroW, STRSroX, - STRDroW, STRDroX, - STRQroW, STRQroX]>; - -// Identify whether an instruction is a load or -// store using the register offset addressing mode. -def IsLoadStoreRegOffsetOp : CheckOpcode; - -// Target predicates. - -// Identify an instruction that effectively transfers a register to another. -def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom", - MCOpcodeSwitchStatement< - [// MOV {Rd, SP}, {SP, Rn} => - // ADD {Rd, SP}, {SP, Rn}, #0 - MCOpcodeSwitchCase< - [ADDWri, ADDXri], - MCReturnStatement< - CheckAll< - [CheckIsRegOperand<0>, - CheckIsRegOperand<1>, - CheckAny< - [CheckRegOperand<0, WSP>, - CheckRegOperand<0, SP>, - CheckRegOperand<1, WSP>, - CheckRegOperand<1, SP>]>, - CheckZeroOperand<2>]>>>, - // MOV Rd, Rm => - // ORR Rd, ZR, Rm, LSL #0 - MCOpcodeSwitchCase< - [ORRWrs, ORRXrs], - MCReturnStatement< - CheckAll< - [CheckIsRegOperand<1>, - CheckIsRegOperand<2>, - CheckAny< - [CheckRegOperand<1, WZR>, - CheckRegOperand<1, XZR>]>, - CheckShiftBy0]>>>], - MCReturnStatement>>; -def IsCopyIdiomPred : MCSchedPredicate; - -// Identify arithmetic instructions with an extended register. -def RegExtendedFn : TIIPredicate<"hasExtendedReg", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsArithExtOp.ValidOpcodes, - MCReturnStatement< - CheckNot>>>], - MCReturnStatement>>; -def RegExtendedPred : MCSchedPredicate; - -// Identify arithmetic and logic instructions with a shifted register. -def RegShiftedFn : TIIPredicate<"hasShiftedReg", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsArithLogicShiftOp.ValidOpcodes, - MCReturnStatement< - CheckNot>>>], - MCReturnStatement>>; -def RegShiftedPred : MCSchedPredicate; - -// Identify a load or store using the register offset addressing mode -// with an extended or scaled register. -def ScaledIdxFn : TIIPredicate<"isScaledAddr", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLoadStoreRegOffsetOp.ValidOpcodes, - MCReturnStatement< - CheckAny<[CheckNot, - CheckMemScaled]>>>], - MCReturnStatement>>; -def ScaledIdxPred : MCSchedPredicate; - -// Identify an instruction that effectively resets a FP register to zero. -def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom", - MCOpcodeSwitchStatement< - [// MOVI Vd, #0 - MCOpcodeSwitchCase< - [MOVIv8b_ns, MOVIv16b_ns, - MOVID, MOVIv2d_ns], - MCReturnStatement>>, - // MOVI Vd, #0, LSL #0 - MCOpcodeSwitchCase< - [MOVIv4i16, MOVIv8i16, - MOVIv2i32, MOVIv4i32], - MCReturnStatement< - CheckAll< - [CheckZeroOperand<1>, - CheckZeroOperand<2>]>>>], - MCReturnStatement>>; -def IsZeroFPIdiomPred : MCSchedPredicate; - -// Identify an instruction that effectively resets a GP register to zero. -def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom", - MCOpcodeSwitchStatement< - [// ORR Rd, ZR, #0 - MCOpcodeSwitchCase< - [ORRWri, ORRXri], - MCReturnStatement< - CheckAll< - [CheckIsRegOperand<1>, - CheckAny< - [CheckRegOperand<1, WZR>, - CheckRegOperand<1, XZR>]>, - CheckZeroOperand<2>]>>>], - MCReturnStatement>>; -def IsZeroIdiomPred : MCSchedPredicate; diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td b/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td deleted file mode 100644 index 77fca22a5f..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td +++ /dev/null @@ -1,747 +0,0 @@ -//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for Huawei TSV110 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. - -// Huawei TSV110 scheduling machine model. -def TSV110Model : SchedMachineModel { - let IssueWidth = 4; // 4 micro-ops dispatched per cycle. - let MicroOpBufferSize = 128; // 128 micro-op re-order buffer - let LoopMicroOpBufferSize = 16; - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); -} - -// Define each kind of processor resource and number available on the TSV110, -// which has 8 pipelines, each with its own queue where micro-ops wait for -// their operands and issue out-of-order to one of eight execution pipelines. -let SchedModel = TSV110Model in { - def TSV110UnitALU : ProcResource<1>; // Int ALU - def TSV110UnitAB : ProcResource<2>; // Int ALU/BRU - def TSV110UnitMDU : ProcResource<1>; // Multi-Cycle - def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD - def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD - def TSV110UnitLdSt : ProcResource<2>; // Load/Store - - def TSV110UnitF : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>; - def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>; - def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>; -} - -let SchedModel = TSV110Model in { - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latency for -// TSV110 - -// Integer ALU -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// Integer Mul/MAC/Div -def : WriteRes { let Latency = 12; - let ResourceCycles = [12]; } -def : WriteRes { let Latency = 20; - let ResourceCycles = [20]; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } - -// Load -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - -// Pre/Post Indexing -def : WriteRes { let Latency = 1; } - -// Store -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// FP -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 5; } - -// FP Div, Sqrt -def : WriteRes { let Latency = 18; } - -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 1; } - -// Branch -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Unsupported = 1; } - -// Forwarding logic is modeled only for multiply and accumulate. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -def : InstRW<[WriteI], (instrs COPY)>; - -// Detailed Refinements -//===----------------------------------------------------------------------===// - -// Contains all of the TSV110 specific SchedWriteRes types. The approach -// below is to define a generic SchedWriteRes for every combination of -// latency and microOps. The naming conventions is to use a prefix, one field -// for latency, and one or more microOp count/type designators. -// Prefix: TSV110Wr -// Latency: #cyc -// MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt) -// -// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are -// 1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes. -// - -//===----------------------------------------------------------------------===// -// Define Generic 1 micro-op types - -def TSV110Wr_1cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 1; } -def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; } -def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; } -def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 1; } - -def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; } -def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; } -def TSV110Wr_2cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 2; } -def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; } -def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; } -def TSV110Wr_2cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 2; } - -def TSV110Wr_3cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 3; } -def TSV110Wr_3cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 3; } -def TSV110Wr_3cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 3; } - -def TSV110Wr_4cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 4; } -def TSV110Wr_4cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 4; } -def TSV110Wr_4cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 4; } -def TSV110Wr_4cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 4; } - -def TSV110Wr_5cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 5; } -def TSV110Wr_5cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 5; } -def TSV110Wr_5cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 5; } -def TSV110Wr_5cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 5; } - -def TSV110Wr_6cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 6; } - -def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; } - -def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; } - -def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; } - -def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; } - -def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; } - -def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; } - -def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; } - -def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; } - -def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; } - -def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; } - -def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; } - -def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; } - -//===----------------------------------------------------------------------===// -// Define Generic 2 micro-op types - -def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitALUAB]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def TSV110Wr_2cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitALUAB]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def TSV110Wr_2cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitLdSt]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def TSV110Wr_2cyc_2F : SchedWriteRes<[TSV110UnitF, - TSV110UnitF]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def TSV110Wr_2cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1, - TSV110UnitFSU2]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def TSV110Wr_4cyc_2F : SchedWriteRes<[TSV110UnitF, - TSV110UnitF]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def TSV110Wr_4cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1, - TSV110UnitFSU2]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitALUAB]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def TSV110Wr_5cyc_1ALU_1F : SchedWriteRes<[TSV110UnitALU, - TSV110UnitF]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def TSV110Wr_6cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitLdSt]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, - TSV110UnitALUAB]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def TSV110Wr_7cyc_1F_1LdSt : SchedWriteRes<[TSV110UnitF, - TSV110UnitLdSt]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def TSV110Wr_8cyc_2FSU1 : SchedWriteRes<[TSV110UnitFSU1, - TSV110UnitFSU1]> { - let Latency = 8; - let NumMicroOps = 2; -} - - -def TSV110Wr_8cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1, - TSV110UnitFSU2]> { - let Latency = 8; - let NumMicroOps = 2; -} - -//===----------------------------------------------------------------------===// -// Define Generic 3 micro-op types - -def TSV110Wr_6cyc_3F : SchedWriteRes<[TSV110UnitF, TSV110UnitF, - TSV110UnitF]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def TSV110Wr_6cyc_3LdSt : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt, - TSV110UnitLdSt]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, - TSV110UnitLdSt]> { - let Latency = 7; - let NumMicroOps = 3; -} - -//===----------------------------------------------------------------------===// -// Define Generic 4 micro-op types - -def TSV110Wr_8cyc_4F : SchedWriteRes<[TSV110UnitF, TSV110UnitF, - TSV110UnitF, TSV110UnitF]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def TSV110Wr_8cyc_3F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, - TSV110UnitF, TSV110UnitLdSt]> { - let Latency = 8; - let NumMicroOps = 4; -} - -//===----------------------------------------------------------------------===// -// Define Generic 5 micro-op types - -def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF, - TSV110UnitLdSt, TSV110UnitLdSt]> { - let Latency = 8; - let NumMicroOps = 5; -} - -//===----------------------------------------------------------------------===// -// Define Generic 8 micro-op types - -def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, - TSV110UnitF, TSV110UnitF, - TSV110UnitLdSt, TSV110UnitLdSt, - TSV110UnitLdSt, TSV110UnitLdSt]> { - let Latency = 10; - let NumMicroOps = 8; -} - - -// Branch Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>; - - -// Cryptography Extensions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>; -def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>; -def : InstRW<[TSV110Wr_2cyc_2F], (instregex "^SHA1(H|SU0)")>; -def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>; -def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>; -def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>; -def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>; -def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC], (instregex "^CRC32.*$")>; - - -// Arithmetic and Logical Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(BIC)S[WX]rr")>; - -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>; - -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>; -def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(ADC|SBC)S[WX]r$")>; - -def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; -def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>; -def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>; -def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>; - -def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; - - -// Move and Shift Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>; -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>; -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>; - - -// Divide and Multiply Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_12cyc_1MDU], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[TSV110Wr_20cyc_1MDU], (instregex "^(S|U)DIVXr$")>; - -def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>; -def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>; -def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>; -def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>; -def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>; -def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>; - - -// Miscellaneous Data-Processing Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^EXTR(W|X)rri$")>; -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(S|U)?BFM(W|X)ri$")>; -def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>; - - -// Load Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(W|X)l$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>; -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>; -def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>; - -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMui$")>; -def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>; - - -// Store Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>; - -def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; - - -// FP Data Processing Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>; - -def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>; -def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>; -def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>; -def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>; - -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>; - -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>; -def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>; - -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>; -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>; - -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>; - -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>; - - -// FP Miscellaneous Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT[HSD][HSD]r")>; - -def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>; -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>; - - -// FP Load Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>; -def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>; - - -// FP Store Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR[BHSDQ]i")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>; -def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>; -def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>; -def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>; -def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>; - - -// ASIMD Integer Instructions -// ----------------------------------------------------------------------------- - -// Reference for forms in this group -// D form - v8i8, v4i16, v2i32 -// Q form - v16i8, v8i16, v4i32 -// D form - v1i8, v1i16, v1i32, v1i64 -// Q form - v16i8, v8i16, v4i32, v2i64 -// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 -// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 - -// ASIMD simple arithmetic -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>; - -// ASIMD complex arithmetic -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>; - -// ASIMD compare -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>; - -// ASIMD max/min -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>; - -// ASIMD logical -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>; - -// ASIMD multiply accumulate, D-form -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>; -// ASIMD multiply accumulate, Q-form -def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>; - -// ASIMD multiply accumulate long -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>; - -// ASIMD shift -// ASIMD shift accumulate -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>; -// ASIMD shift by immed, basic -def : InstRW<[TSV110Wr_4cyc_1FSU1], - (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>; -// ASIMD shift by immed, complex -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>; -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>; -// ASIMD shift by register, basic, Q-form -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// ASIMD shift by register, complex, D-form -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; -// ASIMD shift by register, complex, Q-form -def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; - -// ASIMD reduction -// ASIMD arith, reduce, 4H/4S -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; -// ASIMD arith, reduce, 8B/8H -def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; -// ASIMD arith, reduce, 16B -def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>; - -// ASIMD max/min, reduce, 4H/4S -def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; -// ASIMD max/min, reduce, 8B/8H -def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; -// ASIMD max/min, reduce, 16B -def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; - - -// Vector - Floating Point -// ----------------------------------------------------------------------------- - -// Reference for forms in this group -// D form - v2f32 -// Q form - v4f32, v2f64 -// D form - 32, 64 -// D form - v1i32, v1i64 -// D form - v2i32 -// Q form - v4i32, v2i64 - -// ASIMD FP sign manipulation -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FABSv")>; -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FNEGv")>; - -// ASIMD FP compare -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>; - -// ASIMD FP convert -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FCVT[AMNPZ][SU]v")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT(L)v")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FCVT(N|XN)v")>; - -// ASIMD FP divide, D-form, F32 -def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>; -// ASIMD FP divide, Q-form, F32 -def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>; -// ASIMD FP divide, Q-form, F64 -def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>; - -// ASIMD FP SQRT -def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>; -def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>; -def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>; - -// ASIMD FP max,min -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?v")>; -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?Pv")>; -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(MAX|MIN)(NM)?Vv")>; - -// ASIMD FP add -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|ADDP|SUB)v")>; - -// ASIMD FP multiply -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FMULX?v")>; - - -// ASIMD Miscellaneous Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>; - -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>; -def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>; - -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>; - -def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>; - -// ASIMD table lookup, D-form -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>; -def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>; -def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>; -def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>; -// ASIMD table lookup, Q-form -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>; -def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>; -def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>; -def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>; - -def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>; - -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>; -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>; - - -// ASIMD Load Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; - -def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - - -// ASIMD Store Instructions -// ----------------------------------------------------------------------------- - -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; - -def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -} // SchedModel = TSV110Model diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td deleted file mode 100644 index ff34c0ce9a..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td +++ /dev/null @@ -1,359 +0,0 @@ -//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM ThunderX T8X -// (T88, T81, T83) processors. -// Loosely based on Cortex-A53 which is somewhat similar. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. - -// Cavium ThunderX T8X scheduling machine model. -def ThunderXT8XModel : SchedMachineModel { - let IssueWidth = 2; // 2 micro-ops dispatched per cycle. - let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. - let LoadLatency = 3; // Optimistic load latency. - let MispredictPenalty = 8; // Branch mispredict penalty. - let PostRAScheduler = 1; // Use PostRA scheduler. - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -// Modeling each pipeline with BufferSize == 0 since T8X is in-order. -def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU -def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC -def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division -def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store -def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch -def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU -def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types mapping the ProcResources and -// latencies. - -let SchedModel = ThunderXT8XModel in { - -// ALU -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 2; } - -// MAC -def : WriteRes { - let Latency = 4; - let ResourceCycles = [1]; -} - -def : WriteRes { - let Latency = 4; - let ResourceCycles = [1]; -} - -// Div -def : WriteRes { - let Latency = 12; - let ResourceCycles = [6]; -} - -def : WriteRes { - let Latency = 14; - let ResourceCycles = [8]; -} - -// Load -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } - -// Vector Load -def : WriteRes { - let Latency = 8; - let ResourceCycles = [3]; -} - -def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 6; - let ResourceCycles = [1]; -} - -def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 11; - let ResourceCycles = [7]; -} - -def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 12; - let ResourceCycles = [8]; -} - -def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 13; - let ResourceCycles = [9]; -} - -def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 13; - let ResourceCycles = [9]; -} - -// Pre/Post Indexing -def : WriteRes { let Latency = 0; } - -// Store -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// Vector Store -def : WriteRes; -def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; - -def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 10; - let ResourceCycles = [9]; -} - -def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { - let Latency = 11; - let ResourceCycles = [10]; -} - -def : WriteRes { let Unsupported = 1; } - -// Branch -def : WriteRes; -def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; -def : WriteRes; -def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; -def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; -def : WriteRes; -def : WriteRes; -def : WriteRes; - -// FP ALU -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 6; } - -// FP Mul, Div, Sqrt -def : WriteRes { let Latency = 6; } -def : WriteRes { - let Latency = 22; - let ResourceCycles = [19]; -} - -def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } - -def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { - let Latency = 12; - let ResourceCycles = [9]; -} - -def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { - let Latency = 22; - let ResourceCycles = [19]; -} - -def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { - let Latency = 17; - let ResourceCycles = [14]; -} - -def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { - let Latency = 31; - let ResourceCycles = [28]; -} - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -// No forwarding for these reads. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// FIXME: This needs more targeted benchmarking. -// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable -// operands are needed one cycle later if and only if they are to be -// shifted. Otherwise, they too are needed two cycles later. This same -// ReadAdvance applies to Extended registers as well, even though there is -// a separate SchedPredicate for them. -def : ReadAdvance; -def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, - WriteISReg, WriteIEReg, WriteIS, - WriteID32, WriteID64, - WriteIM32, WriteIM64]>; -def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, - WriteISReg, WriteIEReg, WriteIS, - WriteID32, WriteID64, - WriteIM32, WriteIM64]>; -def THXT8XReadISReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def THXT8XReadIEReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// MAC - Operands are generally needed one cycle later in the MAC pipe. -// Accumulator operands are needed two cycles later. -def : ReadAdvance; -def : ReadAdvance; - -// Div -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Subtarget-specific InstRW. - -//--- -// Branch -//--- -def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; -def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; -def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; -def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; -def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; -def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; -def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; -def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; -def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; - -//--- -// Ret -//--- -def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; - -//--- -// Miscellaneous -//--- -def : InstRW<[WriteI], (instrs COPY)>; - -//--- -// Vector Loads -//--- -def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; - -def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; - -//--- -// Vector Stores -//--- -def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; - -def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; -def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; - -//--- -// Floating Point MAC, DIV, SQRT -//--- -def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; -def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; -def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; - -} diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td deleted file mode 100644 index ffa0a5e7d9..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td +++ /dev/null @@ -1,1867 +0,0 @@ -//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the scheduling model for Cavium ThunderX2T99 -// processors. -// Based on Broadcom Vulcan. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// 2. Pipeline Description. - -def ThunderX2T99Model : SchedMachineModel { - let IssueWidth = 4; // 4 micro-ops dispatched at a time. - let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 12; // Extra cycles for mispredicted branch. - // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 128; - let PostRAScheduler = 1; // Using PostRA sched. - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F, - SMEUnsupported.F); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -let SchedModel = ThunderX2T99Model in { - -// Define the issue ports. - -// Port 0: ALU, FP/SIMD. -def THX2T99P0 : ProcResource<1>; - -// Port 1: ALU, FP/SIMD, integer mul/div. -def THX2T99P1 : ProcResource<1>; - -// Port 2: ALU, Branch. -def THX2T99P2 : ProcResource<1>; - -// Port 3: Store data. -def THX2T99P3 : ProcResource<1>; - -// Port 4: Load/store. -def THX2T99P4 : ProcResource<1>; - -// Port 5: Load/store. -def THX2T99P5 : ProcResource<1>; - -// Define groups for the functional units on each issue port. Each group -// created will be used by a WriteRes later on. -// -// NOTE: Some groups only contain one member. This is a way to create names for -// the various functional units that share a single issue port. For example, -// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. - -// Integer divide and multiply micro-ops only on port 1. -def THX2T99I1 : ProcResGroup<[THX2T99P1]>; - -// Branch micro-ops only on port 2. -def THX2T99I2 : ProcResGroup<[THX2T99P2]>; - -// ALU micro-ops on ports 0, 1, and 2. -def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; - -// Crypto FP/SIMD micro-ops only on port 1. -def THX2T99F1 : ProcResGroup<[THX2T99P1]>; - -// FP/SIMD micro-ops on ports 0 and 1. -def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; - -// Store data micro-ops only on port 3. -def THX2T99SD : ProcResGroup<[THX2T99P3]>; - -// Load/store micro-ops on ports 4 and 5. -def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; - -// 60 entry unified scheduler. -def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, - THX2T99P3, THX2T99P4, THX2T99P5]> { - let BufferSize = 60; -} - -// Define commonly used write types for InstRW specializations. -// All definitions follow the format: THX2T99Write_Cyc_. - -// 3 cycles on I1. -def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { - let Latency = 3; - let NumMicroOps = 2; -} - -// 1 cycles on I2. -def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 4 cycles on I1. -def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { - let Latency = 4; - let NumMicroOps = 2; -} - -// 23 cycles on I1. -def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { - let Latency = 23; - let ResourceCycles = [13, 23]; - let NumMicroOps = 4; -} - -// 39 cycles on I1. -def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { - let Latency = 39; - let ResourceCycles = [13, 39]; - let NumMicroOps = 4; -} - -// 1 cycle on I0, I1, or I2. -def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 2 cycles on I0, I1, or I2. -def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// 4 cycles on I0, I1, or I2. -def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { - let Latency = 2; - let NumMicroOps = 3; -} - -// 5 cycles on I0, I1, or I2. -def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { - let Latency = 2; - let NumMicroOps = 3; -} - -// 5 cycles on F1. -def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 7 cycles on F1. -def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { - let Latency = 7; - let NumMicroOps = 2; -} - -// 4 cycles on F0 or F1. -def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 4; - let NumMicroOps = 2; -} - -// 5 cycles on F0 or F1. -def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 6 cycles on F0 or F1. -def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 7 cycles on F0 or F1. -def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 7; - let NumMicroOps = 3; -} - -// 8 cycles on F0 or F1. -def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 10 cycles on F0 or F1. -def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 10; - let NumMicroOps = 3; -} - -// 16 cycles on F0 or F1. -def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 16; - let NumMicroOps = 3; - let ResourceCycles = [8]; -} - -// 23 cycles on F0 or F1. -def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { - let Latency = 23; - let NumMicroOps = 3; - let ResourceCycles = [11]; -} - -// 1 cycles on LS0 or LS1. -def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { - let Latency = 0; -} - -// 1 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 0; - let NumMicroOps = 2; -} - -// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_1Cyc_LS01_I012_I012 : - SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { - let Latency = 0; - let NumMicroOps = 3; -} - -// 2 cycles on LS0 or LS1. -def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 4 cycles on LS0 or LS1. -def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { - let Latency = 4; - let NumMicroOps = 4; -} - -// 5 cycles on LS0 or LS1. -def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0 or LS1. -def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 4 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_4Cyc_LS01_I012_I012 : - SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 5 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_5Cyc_LS01_I012_I012 : - SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 6; - let NumMicroOps = 4; -} - -// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_6Cyc_LS01_I012_I012 : - SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 1 cycles on LS0 or LS1 and F0 or F1. -def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 5 cycles on LS0 or LS1 and F0 or F1. -def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0 or LS1 and F0 or F1. -def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 7 cycles on LS0 or LS1 and F0 or F1. -def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { - let Latency = 7; - let NumMicroOps = 3; -} - -// 8 cycles on LS0 or LS1 and F0 or F1. -def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 8 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 8; - let NumMicroOps = 4; -} - -// 12 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 12; - let NumMicroOps = 6; -} - -// 16 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 16; - let NumMicroOps = 8; -} - -// 24 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 24; - let NumMicroOps = 12; -} - -// 32 cycles on LS0 or LS1 and I0, I1, or I2. -def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { - let Latency = 32; - let NumMicroOps = 16; -} - -// Define commonly used read types. - -// No forwarding is provided for these types. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// 3. Instruction Tables. - -//--- -// 3.1 Branch Instructions -//--- - -// Branch, immed -// Branch and link, immed -// Compare and branch -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -//--- -// Branch -//--- -def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; -def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; -def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B..$")>; -def : InstRW<[THX2T99Write_1Cyc_I2], - (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; - -//--- -// 3.2 Arithmetic and Logical Instructions -// 3.3 Move and Shift Instructions -//--- - - -// ALU, basic -// Conditional compare -// Conditional select -// Address generation -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteI], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : InstRW<[WriteI], (instrs COPY)>; - -// ALU, extend and/or shift -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteISReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteIEReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -// Move immed -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -def : InstRW<[THX2T99Write_1Cyc_I012], - (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; - -def : InstRW<[THX2T99Write_1Cyc_I012], - (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; - -// Variable shift -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -//--- -// 3.4 Divide and Multiply Instructions -//--- - -// Divide, W-form -// Latency range of 13-23/13-39. -def : WriteRes { - let Latency = 39; - let ResourceCycles = [39]; - let NumMicroOps = 4; -} - -// Divide, X-form -def : WriteRes { - let Latency = 23; - let ResourceCycles = [23]; - let NumMicroOps = 4; -} - -// Multiply accumulate, W-form -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; -} - -// Multiply accumulate, X-form -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; -} - -//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], -// (instrs MADDWrrr, MSUBWrrr)>; -def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; -def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; -def : InstRW<[THX2T99Write_5Cyc_I012], - (instregex "(S|U)(MADDL|MSUBL)rrr")>; - -def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; -def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; - -// Bitfield extract, two reg -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Multiply high -def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; - -// Miscellaneous Data-Processing Instructions -// Bitfield extract -def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; - -// Bitifield move - basic -def : InstRW<[THX2T99Write_1Cyc_I012], - (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; - -// Bitfield move, insert -def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; -def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; - -// Count leading -def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; - -// Reverse bits -def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; - -// Cryptography Extensions -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; - -// CRC Instructions -// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; -def : InstRW<[THX2T99Write_4Cyc_I1], - (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; - -def : InstRW<[THX2T99Write_4Cyc_I1], - (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; - -// Reverse bits/bytes -// NOTE: Handled by WriteI. - -//--- -// 3.6 Load Instructions -// 3.10 FP Load Instructions -//--- - -// Load register, literal -// Load register, unscaled immed -// Load register, immed unprivileged -// Load register, unsigned immed -def : WriteRes { - let Latency = 4; - let NumMicroOps = 4; -} - -// Load register, immed post-index -// NOTE: Handled by WriteLD, WriteI. -// Load register, immed pre-index -// NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handled by WriteLD. -def : WriteRes { - let Latency = 5; - let NumMicroOps = 5; -} - -// Load register offset, basic -// Load register, register offset, scale by 4/8 -// Load register, register offset, scale by 2 -// Load register offset, extend -// Load register, register offset, extend, scale by 4/8 -// Load register, register offset, extend, scale by 2 -def THX2T99WriteLDIdx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def THX2T99ReadAdrBase : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// Load pair, immed pre-index, normal -// Load pair, immed pre-index, signed words -// Load pair, immed post-index, normal -// Load pair, immed post-index, signed words -// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; -def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; -def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPDpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPQpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPSpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPDpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPQpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPSpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPWpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], - (instrs LDPXpost)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPDpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPQpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPSpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPXpre)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPDpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPQpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPSpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPWpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], - (instrs LDPXpost)>; - -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; -def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRBroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRBroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRDroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRHroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRHHroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRQroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSHWroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSHXroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRWroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRXroW)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRBroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRDroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRHroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRHHroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRQroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSHWroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRSHXroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRWroX)>; -def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], - (instrs LDRXroX)>; - -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; -def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; - -//--- -// Prefetch -//--- -def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; -def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; -def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; -def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; -def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; - -//-- -// 3.7 Store Instructions -// 3.11 FP Store Instructions -//-- - -// Store register, unscaled immed -// Store register, immed unprivileged -// Store register, unsigned immed -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store register, immed post-index -// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase - -// Store register, immed pre-index -// NOTE: Handled by WriteAdr, WriteST - -// Store register, register offset, basic -// Store register, register offset, scaled by 4/8 -// Store register, register offset, scaled by 2 -// Store register, register offset, extend -// Store register, register offset, extend, scale by 4/8 -// Store register, register offset, extend, scale by 1 -def : WriteRes { - let Latency = 1; - let NumMicroOps = 3; -} - -// Store pair, immed offset, W-form -// Store pair, immed offset, X-form -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store pair, immed post-index, W-form -// Store pair, immed post-index, X-form -// Store pair, immed pre-index, W-form -// Store pair, immed pre-index, X-form -// NOTE: Handled by WriteAdr, WriteSTP. - -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; - -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; - -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; - -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; -def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; - -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; -def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; - -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STPXpre, STPXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STPXpre, STPXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STPXpre, STPXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STPXpre, STPXpost)>; - -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRXpre, STRXpost)>; - -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRBroW, STRBroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRBroW, STRBroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRBBroW, STRBBroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRBBroW, STRBBroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRDroW, STRDroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRDroW, STRDroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRHroW, STRHroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRHroW, STRHroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRHHroW, STRHHroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRHHroW, STRHHroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRQroW, STRQroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRQroW, STRQroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRSroW, STRSroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRSroW, STRSroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRWroW, STRWroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRWroW, STRWroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], - (instrs STRXroW, STRXroX)>; -def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], - (instrs STRXroW, STRXroX)>; - -//--- -// 3.8 FP Data Processing Instructions -//--- - -// FP absolute value -// FP min/max -// FP negate -def : WriteRes { - let Latency = 5; - let NumMicroOps = 2; -} - -// FP arithmetic -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; - -// FP compare -def : WriteRes { - let Latency = 5; - let NumMicroOps = 2; -} - -// FP Mul, Div, Sqrt -def : WriteRes { - let Latency = 22; - let ResourceCycles = [19]; -} - -def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { - let Latency = 23; - let ResourceCycles = [12]; - let NumMicroOps = 4; -} - -def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { - let Latency = 23; - let ResourceCycles = [12]; - let NumMicroOps = 4; -} - -// FP divide, S-form -// FP square root, S-form -def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; -def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; - -// FP divide, D-form -// FP square root, D-form -def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; -def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; -def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; - -// FP multiply -// FP multiply accumulate -def : WriteRes { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; -def : InstRW<[THX2T99XWriteFMulAcc], - (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; - -// FP round to integral -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; - -// FP select -def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; - -//--- -// 3.9 FP Miscellaneous Instructions -//--- - -// FP convert, from vec to vec reg -// FP convert, from gen to vec reg -// FP convert, from vec to gen reg -def : WriteRes { - let Latency = 7; - let NumMicroOps = 3; -} - -// FP move, immed -// FP move, register -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -// FP transfer, from gen to vec reg -// FP transfer, from vec to gen reg -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; - -//--- -// 3.12 ASIMD Integer Instructions -//--- - -// ASIMD absolute diff, D-form -// ASIMD absolute diff, Q-form -// ASIMD absolute diff accum, D-form -// ASIMD absolute diff accum, Q-form -// ASIMD absolute diff accum long -// ASIMD absolute diff long -// ASIMD arith, basic -// ASIMD arith, complex -// ASIMD compare -// ASIMD logical (AND, BIC, EOR) -// ASIMD max/min, basic -// ASIMD max/min, reduce, 4H/4S -// ASIMD max/min, reduce, 8B/8H -// ASIMD max/min, reduce, 16B -// ASIMD multiply, D-form -// ASIMD multiply, Q-form -// ASIMD multiply accumulate long -// ASIMD multiply accumulate saturating long -// ASIMD multiply long -// ASIMD pairwise add and accumulate -// ASIMD shift accumulate -// ASIMD shift by immed, basic -// ASIMD shift by immed and insert, basic, D-form -// ASIMD shift by immed and insert, basic, Q-form -// ASIMD shift by immed, complex -// ASIMD shift by register, basic, D-form -// ASIMD shift by register, basic, Q-form -// ASIMD shift by register, complex, D-form -// ASIMD shift by register, complex, Q-form -def : WriteRes { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [4]; -} -def : WriteRes { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [4]; -} - -// ASIMD arith, reduce, 4H/4S -// ASIMD arith, reduce, 8B/8H -// ASIMD arith, reduce, 16B - -// ASIMD logical (MVN (alias for NOT), ORN, ORR) -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; - -// ASIMD arith, reduce -def : InstRW<[THX2T99Write_10Cyc_F01], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; - -// ASIMD polynomial (8x8) multiply long -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; - -// ASIMD absolute diff accum, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; -// ASIMD absolute diff accum, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; -// ASIMD absolute diff accum long -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU]ABAL")>; -// ASIMD arith, reduce, 4H/4S -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; -// ASIMD arith, reduce, 8B -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; -// ASIMD arith, reduce, 16B/16H -def : InstRW<[THX2T99Write_10Cyc_F01], - (instregex "^[SU]?ADDL?Vv16i8v$")>; -// ASIMD max/min, reduce, 4H/4S -def : InstRW<[THX2T99Write_10Cyc_F01], - (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; -// ASIMD max/min, reduce, 8B/8H -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; -// ASIMD max/min, reduce, 16B/16H -def : InstRW<[THX2T99Write_10Cyc_F01], - (instregex "^[SU](MIN|MAX)Vv16i8v$")>; -// ASIMD multiply, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^(P?MUL|SQR?DMULH)" # - "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # - "(_indexed)?$")>; -// ASIMD multiply, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -// ASIMD multiply accumulate, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; -// ASIMD multiply accumulate, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; -// ASIMD shift accumulate -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; - -// ASIMD shift by immed, basic -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", - "SQSHRNv","SQSHRUNv", "UQRSHRNv", - "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; -// ASIMD shift by immed, complex -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; -// ASIMD shift by register, basic, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// ASIMD shift by register, complex, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU][QR]{1,2}SHL" # - "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; -// ASIMD shift by register, complex, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; - -// ASIMD Arithmetic -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", - "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # - "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", - "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", - "^SRHADD", "^SUBHNv", "^SUQADD", - "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^CMEQv","^CMGEv","^CMGTv", - "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", - "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; - -//--- -// 3.13 ASIMD Floating-point Instructions -//--- - -// ASIMD FP absolute value -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; - -// ASIMD FP arith, normal, D-form -// ASIMD FP arith, normal, Q-form -def : InstRW<[THX2T99Write_6Cyc_F01], - (instregex "^FABDv", "^FADDv", "^FSUBv")>; - -// ASIMD FP arith,pairwise, D-form -// ASIMD FP arith, pairwise, Q-form -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; - -// ASIMD FP compare, D-form -// ASIMD FP compare, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; - -// ASIMD FP round, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^FRINT[AIMNPXZ](v2f32)")>; -// ASIMD FP round, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; - -// ASIMD FP convert, long -// ASIMD FP convert, narrow -// ASIMD FP convert, other, D-form -// ASIMD FP convert, other, Q-form -// NOTE: Handled by WriteV. - -// ASIMD FP convert, long and narrow -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; -// ASIMD FP convert, other, D-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; -// ASIMD FP convert, other, Q-form -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP divide, D-form, F32 -def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; -def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; - -// ASIMD FP divide, Q-form, F32 -def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; -def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; - -// ASIMD FP divide, Q-form, F64 -def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; -def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; - -// ASIMD FP max/min, normal, D-form -// ASIMD FP max/min, normal, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", - "^FMINv", "^FMINNMv")>; - -// ASIMD FP max/min, pairwise, D-form -// ASIMD FP max/min, pairwise, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", - "^FMINPv", "^FMINNMPv")>; - -// ASIMD FP max/min, reduce -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", - "^FMINVv", "^FMINNMVv")>; - -// ASIMD FP multiply, D-form, FZ -// ASIMD FP multiply, D-form, no FZ -// ASIMD FP multiply, Q-form, FZ -// ASIMD FP multiply, Q-form, no FZ -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; -def : InstRW<[THX2T99Write_6Cyc_F01], - (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; -def : InstRW<[THX2T99Write_6Cyc_F01], - (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP multiply accumulate, Dform, FZ -// ASIMD FP multiply accumulate, Dform, no FZ -// ASIMD FP multiply accumulate, Qform, FZ -// ASIMD FP multiply accumulate, Qform, no FZ -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; -def : InstRW<[THX2T99Write_6Cyc_F01], - (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; -def : InstRW<[THX2T99Write_6Cyc_F01], - (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP negate -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; - -//-- -// 3.14 ASIMD Miscellaneous Instructions -//-- - -// ASIMD bit reverse -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; - -// ASIMD bitwise insert, D-form -// ASIMD bitwise insert, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^BIFv", "^BITv", "^BSLv", "^BSPv")>; - -// ASIMD count, D-form -// ASIMD count, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^CLSv", "^CLZv", "^CNTv")>; - -// ASIMD duplicate, gen reg -// ASIMD duplicate, element -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; - -// ASIMD extract -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; - -// ASIMD extract narrow -def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; - -// ASIMD extract narrow, saturating -def : InstRW<[THX2T99Write_7Cyc_F01], - (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; - -// ASIMD insert, element to element -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; - -// ASIMD move, integer immed -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>; - -// ASIMD move, FP immed -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; - -// ASIMD reciprocal estimate, D-form -// ASIMD reciprocal estimate, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; - -// ASIMD reciprocal step, D-form, FZ -// ASIMD reciprocal step, D-form, no FZ -// ASIMD reciprocal step, Q-form, FZ -// ASIMD reciprocal step, Q-form, no FZ -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; - -// ASIMD reverse -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^REV16v", "^REV32v", "^REV64v")>; - -// ASIMD table lookup, D-form -// ASIMD table lookup, Q-form -def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; - -// ASIMD transfer, element to word or word -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; - -// ASIMD transfer gen reg to element -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; - -// ASIMD transpose -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; - -// ASIMD unzip/zip -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; - -//-- -// 3.15 ASIMD Load Instructions -//-- - -// ASIMD load, 1 element, multiple, 1 reg, D-form -// ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 2 reg, D-form -// ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 3 reg, D-form -// ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 4 reg, D-form -// ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_6Cyc_LS01], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, one lane, B/H/S -// ASIMD load, 1 element, one lane, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD1i(8|16|32|64)_POST$")>; - -// ASIMD load, 1 element, all lanes, D-form, B/H/S -// ASIMD load, 1 element, all lanes, D-form, D -// ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, multiple, D-form, B/H/S -// ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, one lane, B/H -// ASIMD load, 2 element, one lane, S -// ASIMD load, 2 element, one lane, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2i(8|16|32|64)_POST$")>; - -// ASIMD load, 2 element, all lanes, D-form, B/H/S -// ASIMD load, 2 element, all lanes, D-form, D -// ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, multiple, D-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, one lone, B/H -// ASIMD load, 3 element, one lane, S -// ASIMD load, 3 element, one lane, D -def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], - (instregex "^LD3i(8|16|32|64)_POST$")>; - -// ASIMD load, 3 element, all lanes, D-form, B/H/S -// ASIMD load, 3 element, all lanes, D-form, D -// ASIMD load, 3 element, all lanes, Q-form, B/H/S -// ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_7Cyc_LS01_F01], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, multiple, D-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, one lane, B/H -// ASIMD load, 4 element, one lane, S -// ASIMD load, 4 element, one lane, D -def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], - (instregex "^LD4i(8|16|32|64)_POST$")>; - -// ASIMD load, 4 element, all lanes, D-form, B/H/S -// ASIMD load, 4 element, all lanes, D-form, D -// ASIMD load, 4 element, all lanes, Q-form, B/H/S -// ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_6Cyc_LS01_F01], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -//-- -// 3.16 ASIMD Store Instructions -//-- - -// ASIMD store, 1 element, multiple, 1 reg, D-form -// ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 2 reg, D-form -// ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 3 reg, D-form -// ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 4 reg, D-form -// ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, one lane, B/H/S -// ASIMD store, 1 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], - (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST1i(8|16|32|64)_POST$")>; - -// ASIMD store, 2 element, multiple, D-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 2 element, one lane, B/H/S -// ASIMD store, 2 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], - (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST2i(8|16|32|64)_POST$")>; - -// ASIMD store, 3 element, multiple, D-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 3 element, one lane, B/H -// ASIMD store, 3 element, one lane, S -// ASIMD store, 3 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST3i(8|16|32|64)_POST$")>; - -// ASIMD store, 4 element, multiple, D-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 4 element, one lane, B/H -// ASIMD store, 4 element, one lane, S -// ASIMD store, 4 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST4i(8|16|32|64)_POST$")>; - -// V8.1a Atomics (LSE) -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs CASB, CASH, CASW, CASX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs CASAB, CASAH, CASAW, CASAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs CASLB, CASLH, CASLW, CASLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs CASALB, CASALH, CASALW, CASALX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, - LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, - LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, - LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, - LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, - LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, - LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, - LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, - LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, - LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, - LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, - LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, - LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs SWPB, SWPH, SWPW, SWPX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; - -def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], - (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; - -def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], - (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; - -def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], - (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; - -} // SchedModel = ThunderX2T99Model - diff --git a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td deleted file mode 100644 index 46a1c217f9..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td +++ /dev/null @@ -1,2003 +0,0 @@ -//=- AArch64SchedThunderX3T110.td - Marvell ThunderX3 T110 ---*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the scheduling model for Marvell ThunderX3T110 -// family of processors. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Pipeline Description. - -def ThunderX3T110Model : SchedMachineModel { - let IssueWidth = 4; // 4 micro-ops dispatched at a time. - let MicroOpBufferSize = 70; // 70 entries in micro-op re-order buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 12; // Extra cycles for mispredicted branch. - // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 128; // FIXME: might be much bigger in TX3. - let PostRAScheduler = 1; // Using PostRA sched. - let CompleteModel = 1; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; -} - -let SchedModel = ThunderX3T110Model in { - -// Issue ports. - -// Port 0: ALU. -def THX3T110P0 : ProcResource<1>; - -// Port 1: ALU. -def THX3T110P1 : ProcResource<1>; - -// Port 2: ALU/Branch. -def THX3T110P2 : ProcResource<1>; - -// Port 3: ALU/Branch. -def THX3T110P3 : ProcResource<1>; - -// Port 4: Load/Store. -def THX3T110P4 : ProcResource<1>; - -// Port 5: Load/store. -def THX3T110P5 : ProcResource<1>; - -// Port 6: FP/Neon/SIMD/Crypto. -def THX3T110P6FP0 : ProcResource<1>; - -// Port 7: FP/Neon/SIMD/Crypto. -def THX3T110P7FP1 : ProcResource<1>; - -// Port 8: FP/Neon/SIMD/Crypto. -def THX3T110P8FP2 : ProcResource<1>; - -// Port 9: FP/Neon/SIMD/Crypto. -def THX3T110P9FP3 : ProcResource<1>; - -// Port 10: Store Data Unit. -def THX3T110SD0 : ProcResource<1>; - -// Define groups for the functional units on each issue port. Each group -// created will be used by a WriteRes. - -// Integer divide/mulhi micro-ops only on port I1. -def THX3T110I1 : ProcResGroup<[THX3T110P1]>; - -// Branch micro-ops on ports I2/I3. -def THX3T110I23 : ProcResGroup<[THX3T110P2, THX3T110P3]>; - -// Branch micro-ops on ports I1/I2/I3. -def THX3T110I123 : ProcResGroup<[THX3T110P1, THX3T110P2, THX3T110P3]>; - -// Integer micro-ops on ports I0/I1/I2. -def THX3T110I012 : ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2]>; - -// Integer micro-ops on ports I0/I1/I2/I3. -def THX3T110I0123 : ProcResGroup<[THX3T110P0, THX3T110P1, - THX3T110P2, THX3T110P3]>; - -// FP micro-ops on ports FP0/FP1/FP2/FP3. -def THX3T110FP0123 : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, - THX3T110P8FP2, THX3T110P9FP3]>; - -// FP micro-ops on ports FP2/FP3. -def THX3T110FP23 : ProcResGroup<[THX3T110P8FP2, THX3T110P9FP3]>; - -// ASIMD micro-ops on ports FP0/FP1/FP2/FP3. -def THX3T110SIMD : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, - THX3T110P8FP2, THX3T110P9FP3]>; - -// Store data micro-ops only on port 10. -def THX3T110SD : ProcResGroup<[THX3T110SD0]>; - -// Load/store micro-ops on ports P4/P5. -def THX3T110LS : ProcResGroup<[THX3T110P4, THX3T110P5]>; - -// 70 entry unified scheduler. -def THX3T110ANY: ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2, - THX3T110P3, THX3T110P4, THX3T110P5, - THX3T110P6FP0, THX3T110P7FP1, - THX3T110P8FP2, THX3T110P9FP3]> { - let BufferSize = 70; -} - -// Define commonly used write types for InstRW specializations. -// All definitions follow the format: THX3T110Write_Cyc_. - -// 3 cycles on I1. -def THX3T110Write_3Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 3; - let NumMicroOps = 2; -} - -// 4 cycles on I1. -def THX3T110Write_4Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 4; - let NumMicroOps = 2; -} - -// 5 cycles on I1. -def THX3T110Write_5Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 7 cycles on I1. -def THX3T110Write_7Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 7; - let NumMicroOps = 3; -} - -// 23 cycles on I1. -def THX3T110Write_23Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 23; - let ResourceCycles = [13, 23]; - let NumMicroOps = 4; -} - -// 39 cycles on I1. -def THX3T110Write_39Cyc_I1 : SchedWriteRes<[THX3T110I1]> { - let Latency = 39; - let ResourceCycles = [13, 39]; - let NumMicroOps = 4; -} - -// 1 cycle on I2/I3 -def THX3T110Write_1Cyc_I23 : SchedWriteRes<[THX3T110I23]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 8 cycles on I2/I3 -def THX3T110Write_8Cyc_I23 : SchedWriteRes<[THX3T110I23]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 1 cycle on I1/I2/I3 -def THX3T110Write_1Cyc_I123 : SchedWriteRes<[THX3T110I123]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 8 cycles on I1/I2/I3 -def THX3T110Write_8Cyc_I123 : SchedWriteRes<[THX3T110I123]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 1 cycle on I0/I1/I2/I3. -def THX3T110Write_1Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 2 cycles on I0/I1/I2/I3. -def THX3T110Write_2Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// 3 cycles on I0/I1/I2/I3. -def THX3T110Write_3Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 3; - let NumMicroOps = 2; -} - -// 4 cycles on I0/I1/I2/I3. -def THX3T110Write_4Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 5 cycles on I0/I1/I2/I3. -def THX3T110Write_5Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on I0/I1/I2/I3. -def THX3T110Write_6Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 8 cycles on I0/I1/I2/I3. -def THX3T110Write_8Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 8; - let NumMicroOps = 4; -} - -// 13 cycles on I0/I1/I2/I3. -def THX3T110Write_13Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 13; - let NumMicroOps = 3; -} - -// 23 cycles on I0/I1/I2/I3. -def THX3T110Write_23Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 23; - let NumMicroOps = 3; -} - -// 39 cycles on I0/I1/I2/I3. -def THX3T110Write_39Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { - let Latency = 39; - let NumMicroOps = 3; -} - -// 4 cycles on F2/F3. -def THX3T110Write_4Cyc_F23 : SchedWriteRes<[THX3T110FP23]> { - let Latency = 4; - let NumMicroOps = 2; -} - -// 5 cycles on F0/F1/F2/F3. -def THX3T110Write_5Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 6 cycles on F0/F1/F2/F3. -def THX3T110Write_6Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 7 cycles on F0/F1/F2/F3. -def THX3T110Write_7Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 7; - let NumMicroOps = 3; -} - -// 8 cycles on F0/F1/F2/F3. -def THX3T110Write_8Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 10 cycles on F0/F1/F2/F3. -def THX3T110Write_10Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 10; - let NumMicroOps = 3; -} - -// 16 cycles on F0/F1/F2/F3. -def THX3T110Write_16Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 16; - let NumMicroOps = 3; - let ResourceCycles = [8]; -} - -// 23 cycles on F0/F1/F2/F3. -def THX3T110Write_23Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 23; - let NumMicroOps = 3; - let ResourceCycles = [11]; -} - -// 1 cycle on LS0/LS1. -def THX3T110Write_1Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 1; - let NumMicroOps = 1; -} - -// 2 cycles on LS0/LS1. -def THX3T110Write_2Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// 4 cycles on LS0/LS1. -def THX3T110Write_4Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} - -// 5 cycles on LS0/LS1. -def THX3T110Write_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0/LS1. -def THX3T110Write_6Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 4 + 5 cycles on LS0/LS1. -// First resource is available after 4 cycles. -// Second resource is available after 5 cycles. -// Load vector pair, immed offset, Q-form [LDP/LDNP]. -def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [4, 5]; -} - -// 4 + 8 cycles on LS0/LS1. -// First resource is available after 4 cycles. -// Second resource is available after 8 cycles. -// Load vector pair, immed offset, S/D-form [LDP/LDNP]. -def THX3T110Write_4_8Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [4, 8]; -} - -// 11 cycles on LS0/LS1 and I1. -def THX3T110Write_11Cyc_LS01_I1 : - SchedWriteRes<[THX3T110LS, THX3T110I1]> { - let Latency = 11; - let NumMicroOps = 4; -} - -// 1 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_1Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 1 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. -def THX3T110Write_1Cyc_LS01_I0123_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { - let Latency = 1; - let NumMicroOps = 3; -} - -// 4 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_4Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 4 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. -def THX3T110Write_4Cyc_LS01_I0123_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 5 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_5Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 5 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. -def THX3T110Write_5Cyc_LS01_I0123_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_6Cyc_LS01_I012 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 6; - let NumMicroOps = 4; -} - -// 6 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. -def THX3T110Write_6Cyc_LS01_I0123_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 1 cycle on LS0/LS1 and SD. -def THX3T110Write_1Cyc_LS01_SD : - SchedWriteRes<[THX3T110LS, THX3T110SD]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 2 cycles on LS0/LS1 and SD. -def THX3T110Write_2Cyc_LS01_SD : - SchedWriteRes<[THX3T110LS, THX3T110SD]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// 4 cycles on LS0/LS1 and SD. -def THX3T110Write_4Cyc_LS01_SD : - SchedWriteRes<[THX3T110LS, THX3T110SD]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 5 cycles on LS0/LS1 and SD. -def THX3T110Write_5Cyc_LS01_SD : - SchedWriteRes<[THX3T110LS, THX3T110SD]> { - let Latency = 5; - let NumMicroOps = 4; -} - -// 6 cycles on LS0/LS1 and SD. -def THX3T110Write_6Cyc_LS01_SD : - SchedWriteRes<[THX3T110LS, THX3T110SD]> { - let Latency = 6; - let NumMicroOps = 5; -} - -// 1 cycle on LS0/LS1, SD and I0/I1/I2/I3. -def THX3T110Write_1Cyc_LS01_SD_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 2 cycles on LS0/LS1, SD and I0/I1/I2/I3. -def THX3T110Write_2Cyc_LS01_SD_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// 4 cycles on LS0/LS1, SD and I0/I1/I2/I3. -def THX3T110Write_4Cyc_LS01_SD_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { - let Latency = 4; - let NumMicroOps = 3; -} - -// 5 cycles on LS0/LS1, SD and I0/I1/I2/I3. -def THX3T110Write_5Cyc_LS01_SD_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { - let Latency = 5; - let NumMicroOps = 4; -} - -// 6 cycles on LS0/LS1, SD and I0/I1/I2/I3. -def THX3T110Write_6Cyc_LS01_SD_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { - let Latency = 6; - let NumMicroOps = 5; -} - -// 1 cycles on LS0/LS1 and F0/F1/F2/F3. -def THX3T110Write_1Cyc_LS01_F0123 : - SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 5 cycles on LS0/LS1 and F0/F1/F2/F3. -def THX3T110Write_5Cyc_LS01_F0123 : - SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { - let Latency = 5; - let NumMicroOps = 3; -} - -// 6 cycles on LS0/LS1 and F0/F1/F2/F3. -def THX3T110Write_6Cyc_LS01_F0123 : - SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 7 cycles on LS0/LS1 and F0/F1/F2/F3. -def THX3T110Write_7Cyc_LS01_F0123 : - SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { - let Latency = 7; - let NumMicroOps = 3; -} - -// 8 cycles on LS0/LS1 and F0/F1/F2/F3. -def THX3T110Write_8Cyc_LS01_F0123 : - SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 8 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_8Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 8; - let NumMicroOps = 3; -} - -// 12 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_12Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 12; - let NumMicroOps = 4; -} - -// 16 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_16Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 16; - let NumMicroOps = 5; -} - -// 24 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_24Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 24; - let NumMicroOps = 10; -} - -// 32 cycles on LS0/LS1 and I0/I1/I2/I3. -def THX3T110Write_32Cyc_LS01_I0123 : - SchedWriteRes<[THX3T110LS, THX3T110I0123]> { - let Latency = 32; - let NumMicroOps = 14; -} - -// 3 cycles on F0/F1/F2/F3. -def THX3T110Write_3Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 3; - let NumMicroOps = 2; -} - -// 4 cycles on F0/F1/F2/F3. -def THX3T110Write_4Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 4; - let NumMicroOps = 2; -} - -// 5 cycles on F0/F1/F2/F3. -def THX3T110Write_5Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 10 cycles on F0/F1/F2/F3. -def THX3T110Write_10Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 10; - let NumMicroOps = 4; -} - -// 15 cycles on F0/F1/F2/F3. -def THX3T110Write_15Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 15; - let NumMicroOps = 7; -} - -// 16 cycles on F0/F1/F2/F3. -def THX3T110Write_16Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 16; - let NumMicroOps = 3; -} - -// 18 cycles on F0/F1/F2/F3. -def THX3T110Write_18Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 18; - let NumMicroOps = 3; -} - -// 19 cycles on F0/F1/F2/F3. -def THX3T110Write_19Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 19; - let NumMicroOps = 4; -} - -// 20 cycles on F0/F1/F2/F3. -def THX3T110Write_20Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 20; - let NumMicroOps = 4; -} - -// 23 cycles on F0/F1/F2/F3. -def THX3T110Write_23Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 23; - let NumMicroOps = 4; -} - -// 3 cycles on F2/F3 and 4 cycles on F0/F1/F2/F3. -def THX3T110Write_3_4Cyc_F23_F0123 : - SchedWriteRes<[THX3T110FP23, THX3T110FP0123]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [3, 4]; -} - - -// Define commonly used read types. - -// No forwarding is provided for these types. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// 3. Instruction Tables. - -//--- -// 3.1 Branch Instructions -//--- - -// Branch, immed -// Branch and link, immed -// Compare and branch -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -//--- -// Branch -//--- -def : InstRW<[THX3T110Write_1Cyc_I23], (instrs B, BL, BR, BLR)>; -def : InstRW<[THX3T110Write_1Cyc_I23], (instrs Bcc)>; -def : InstRW<[THX3T110Write_1Cyc_I23], (instrs RET)>; -def : InstRW<[THX3T110Write_1Cyc_I23], - (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; - -//--- -// 3.2 Arithmetic and Logical Instructions -// 3.3 Move and Shift Instructions -//--- - - -// ALU, basic -// Conditional compare -// Conditional select -// Address generation -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteI], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : InstRW<[WriteI], (instrs COPY)>; - -// ALU, extend and/or shift -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteISReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; - let NumMicroOps = 2; -} - -def : InstRW<[WriteIEReg], - (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", - "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", - "ADC(W|X)r", - "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", - "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", - "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", - "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", - "SBCS(W|X)r", "CCMN(W|X)(i|r)", - "CCMP(W|X)(i|r)", "CSEL(W|X)r", - "CSINC(W|X)r", "CSINV(W|X)r", - "CSNEG(W|X)r")>; - -// Move immed -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -def : InstRW<[THX3T110Write_1Cyc_I0123], - (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; - -def : InstRW<[THX3T110Write_1Cyc_I0123], - (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; - -// Variable shift -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -//--- -// 3.4 Divide and Multiply Instructions -//--- - -// Divide, W-form -// Latency range of 13-23/13-39. -def : WriteRes { - let Latency = 39; - let ResourceCycles = [39]; - let NumMicroOps = 4; -} - -// Divide, X-form -def : WriteRes { - let Latency = 23; - let ResourceCycles = [23]; - let NumMicroOps = 4; -} - -// Multiply accumulate, W-form -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; -} - -// Multiply accumulate, X-form -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; -} - -//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX3T110Write_5Cyc_I012], -// (instrs MADDWrrr, MSUBWrrr)>; -def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; -def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; -def : InstRW<[THX3T110Write_5Cyc_I0123], - (instregex "(S|U)(MADDL|MSUBL)rrr")>; - -def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; -def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; - -// Bitfield extract, two reg -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Multiply high -def : InstRW<[THX3T110Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; - -// Miscellaneous Data-Processing Instructions -// Bitfield extract -def : InstRW<[THX3T110Write_1Cyc_I0123], (instrs EXTRWrri, EXTRXrri)>; - -// Bitifield move - basic -def : InstRW<[THX3T110Write_1Cyc_I0123], - (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; - -// Bitfield move, insert -def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "^BFM")>; -def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "(S|U)?BFM.*")>; - -// Count leading -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], - (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>; - -// Reverse bits -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instrs RBITWr, RBITXr)>; - -// Cryptography Extensions -def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AES[DE]")>; -def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AESI?MC")>; -def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^PMULL")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1SU0")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1(H|SU1)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1[CMP]")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256SU0")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256(H|H2|SU1)")>; - -// CRC Instructions -// def : InstRW<[THX3T110Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; -def : InstRW<[THX3T110Write_4Cyc_I1], - (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; - -def : InstRW<[THX3T110Write_4Cyc_I1], - (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; - -// Reverse bits/bytes -// NOTE: Handled by WriteI. - -//--- -// 3.6 Load Instructions -// 3.10 FP Load Instructions -//--- - -// Load register, literal -// Load register, unscaled immed -// Load register, immed unprivileged -// Load register, unsigned immed -def : WriteRes { - let Latency = 4; - let NumMicroOps = 4; -} - -// Load register, immed post-index -// NOTE: Handled by WriteLD, WriteI. -// Load register, immed pre-index -// NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handled by WriteLD. -def : WriteRes { - let Latency = 4; - let NumMicroOps = 4; -} - -// Load register offset, basic -// Load register, register offset, scale by 4/8 -// Load register, register offset, scale by 2 -// Load register offset, extend -// Load register, register offset, extend, scale by 4/8 -// Load register, register offset, extend, scale by 2 -def THX3T110WriteLDIdx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def THX3T110ReadAdrBase : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// Load pair, immed pre-index, normal -// Load pair, immed pre-index, signed words -// Load pair, immed post-index, normal -// Load pair, immed post-index, signed words -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPDi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPQi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPSi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPXi)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPDi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPQi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPXi)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRBui)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDui)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRHui)>; -def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRQui)>; -def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRSui)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDl)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRQl)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRWl)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRXl)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRBi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRHi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRXi)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSWi)>; - -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPDpre)>; -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPQpre)>; -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPSpre)>; -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPWpre)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], - (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, - LDRSpre, LDRWpre, LDRXpre, - LDRSBWpre, LDRSBXpre, LDRSBWpost, LDRSBXpost, - LDRSHWpre, LDRSHXpre, LDRSHWpost, LDRSHXpost, - LDRBBpre, LDRBBpost, LDRHHpre, LDRHHpost)>; - -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], - (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; - -def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteI], - (instrs LDRBpost, LDRDpost, LDRHpost, - LDRQpost, LDRSpost, LDRWpost, LDRXpost)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], - (instrs LDPDpre, LDPQpre, LDPSpre, LDPWpre, LDPXpre)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteAdr], - (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, - LDRSpre, LDRWpre, LDRXpre)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], - (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteI], - (instrs LDRBpost, LDRDpost, LDRHpost, LDRQpost, - LDRSpost, LDRWpost, LDRXpost)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroW)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroW)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroX)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroX)>; - -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBBi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURDi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHHi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURQi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHWi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSWi)>; - -// Load exclusive -def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAR(B|H|W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXR(B|H|W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXR(B|H|W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXP(W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXP(W|X)$")>; - -//--- -// Prefetch -//--- -def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMl)>; -def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFUMi)>; -def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMui)>; -def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroW)>; -def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroX)>; - -//-- -// 3.7 Store Instructions -// 3.11 FP Store Instructions -//-- - -// Store register, unscaled immed -// Store register, immed unprivileged -// Store register, unsigned immed -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store register, immed post-index -// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase - -// Store register, immed pre-index -// NOTE: Handled by WriteAdr, WriteST - -// Store register, register offset, basic -// Store register, register offset, scaled by 4/8 -// Store register, register offset, scaled by 2 -// Store register, register offset, extend -// Store register, register offset, extend, scale by 4/8 -// Store register, register offset, extend, scale by 1 -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store pair, immed offset, W-form -// Store pair, immed offset, X-form -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store pair, immed post-index, W-form -// Store pair, immed post-index, X-form -// Store pair, immed pre-index, W-form -// Store pair, immed pre-index, X-form -// NOTE: Handled by WriteAdr, WriteSTP. -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBBi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURDi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHHi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURQi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURSi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURWi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURXi)>; - -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRBi)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRHi)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRWi)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRXi)>; - -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPDi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPQi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPXi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPWi)>; - -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPDi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPQi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPXi)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPWi)>; - -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRBui)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRDui)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRHui)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRQui)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRXui)>; -def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRWui)>; - -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRBui)>; -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRDui)>; -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRHui)>; -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRQui)>; -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRXui)>; -def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRWui)>; - -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRBui)>; -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRDui)>; -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRHui)>; -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRQui)>; -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRXui)>; -def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRWui)>; - -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STPDpre, STPDpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STPQpre, STPQpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STPSpre, STPSpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STPWpre, STPWpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STPXpre, STPXpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STPXpre, STPXpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRBpre, STRBpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRBBpre, STRBBpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRDpre, STRDpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRHpre, STRHpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRHHpre, STRHHpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRQpre, STRQpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRSpre, STRSpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRWpre, STRWpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRXpre, STRXpost)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRBroW, STRBroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRBBroW, STRBBroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRDroW, STRDroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRHroW, STRHroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRHHroW, STRHHroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRQroW, STRQroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRSroW, STRSroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRWroW, STRWroX)>; -def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], - (instrs STRXroW, STRXroX)>; - -// Store exclusive -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instrs STNPWi, STNPXi)>; -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXP(W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXR(B|H|W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXP(W|X)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXR(B|H|W|X)$")>; - -//--- -// 3.8 FP Data Processing Instructions -//--- - -// FP absolute value -// FP min/max -// FP negate -def : WriteRes { - let Latency = 5; - let NumMicroOps = 2; -} - -// FP arithmetic -def : InstRW<[THX3T110Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; - -// FP compare -def : WriteRes { - let Latency = 5; - let NumMicroOps = 2; -} - -// FP Mul, Div, Sqrt -def : WriteRes { - let Latency = 22; - let ResourceCycles = [19]; -} - -def THX3T110XWriteFDiv : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX3T110XWriteFDivSP : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX3T110XWriteFDivDP : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 23; - let ResourceCycles = [12]; - let NumMicroOps = 4; -} - -def THX3T110XWriteFSqrtSP : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 16; - let ResourceCycles = [8]; - let NumMicroOps = 4; -} - -def THX3T110XWriteFSqrtDP : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 23; - let ResourceCycles = [12]; - let NumMicroOps = 4; -} - -// FP divide, S-form -// FP square root, S-form -def : InstRW<[THX3T110XWriteFDivSP], (instrs FDIVSrr)>; -def : InstRW<[THX3T110XWriteFSqrtSP], (instrs FSQRTSr)>; -def : InstRW<[THX3T110XWriteFDivSP], (instregex "^FDIVv.*32$")>; -def : InstRW<[THX3T110XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; -def : InstRW<[THX3T110Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; - -// FP divide, D-form -// FP square root, D-form -def : InstRW<[THX3T110XWriteFDivDP], (instrs FDIVDrr)>; -def : InstRW<[THX3T110XWriteFSqrtDP], (instrs FSQRTDr)>; -def : InstRW<[THX3T110XWriteFDivDP], (instregex "^FDIVv.*64$")>; -def : InstRW<[THX3T110XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; -def : InstRW<[THX3T110Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; - -// FP multiply -// FP multiply accumulate -def : WriteRes { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def THX3T110XWriteFMul : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def THX3T110XWriteFMulAcc : SchedWriteRes<[THX3T110FP0123]> { - let Latency = 6; - let ResourceCycles = [2]; - let NumMicroOps = 3; -} - -def : InstRW<[THX3T110XWriteFMul], (instregex "^FMUL", "^FNMUL")>; -def : InstRW<[THX3T110XWriteFMulAcc], - (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; - -// FP round to integral -def : InstRW<[THX3T110Write_7Cyc_F01], - (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; - -// FP select -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FCSEL")>; - -//--- -// 3.9 FP Miscellaneous Instructions -//--- - -// FP convert, from vec to vec reg -// FP convert, from gen to vec reg -// FP convert, from vec to gen reg -def : WriteRes { - let Latency = 7; - let NumMicroOps = 3; -} - -// FP move, immed -// FP move, register -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -// FP transfer, from gen to vec reg -// FP transfer, from vec to gen reg -def : WriteRes { - let Latency = 4; - let NumMicroOps = 2; -} - -def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; - -//--- -// 3.12 ASIMD Integer Instructions -//--- - -// ASIMD absolute diff, D-form -// ASIMD absolute diff, Q-form -// ASIMD absolute diff accum, D-form -// ASIMD absolute diff accum, Q-form -// ASIMD absolute diff accum long -// ASIMD absolute diff long -// ASIMD arith, basic -// ASIMD arith, complex -// ASIMD compare -// ASIMD logical (AND, BIC, EOR) -// ASIMD max/min, basic -// ASIMD max/min, reduce, 4H/4S -// ASIMD max/min, reduce, 8B/8H -// ASIMD max/min, reduce, 16B -// ASIMD multiply, D-form -// ASIMD multiply, Q-form -// ASIMD multiply accumulate long -// ASIMD multiply accumulate saturating long -// ASIMD multiply long -// ASIMD pairwise add and accumulate -// ASIMD shift accumulate -// ASIMD shift by immed, basic -// ASIMD shift by immed and insert, basic, D-form -// ASIMD shift by immed and insert, basic, Q-form -// ASIMD shift by immed, complex -// ASIMD shift by register, basic, D-form -// ASIMD shift by register, basic, Q-form -// ASIMD shift by register, complex, D-form -// ASIMD shift by register, complex, Q-form -def : WriteRes { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [4]; -} -def : WriteRes { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [4]; -} - -// ASIMD arith, reduce, 4H/4S -// ASIMD arith, reduce, 8B/8H -// ASIMD arith, reduce, 16B - -// ASIMD logical (MVN (alias for NOT), ORN, ORR) -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; - -// ASIMD arith, reduce -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; - -// ASIMD polynomial (8x8) multiply long -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^(S|U|SQD)MULL")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v8i8|v16i8)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v1i64|v2i64)")>; - -// ASIMD absolute diff accum, D-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; -// ASIMD absolute diff accum, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; -// ASIMD absolute diff accum long -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU]ABAL")>; -// ASIMD arith, reduce, 4H/4S -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; -// ASIMD arith, reduce, 8B -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; -// ASIMD arith, reduce, 16B/16H -def : InstRW<[THX3T110Write_10Cyc_F0123], - (instregex "^[SU]?ADDL?Vv16i8v$")>; -// ASIMD max/min, reduce, 4H/4S -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; -// ASIMD max/min, reduce, 8B/8H -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; -// ASIMD max/min, reduce, 16B/16H -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU](MIN|MAX)Vv16i8v$")>; -// ASIMD multiply, D-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^(P?MUL|SQR?DMULH)" # - "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # - "(_indexed)?$")>; -// ASIMD multiply, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -// ASIMD multiply accumulate, D-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; -// ASIMD multiply accumulate, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; -// ASIMD shift accumulate -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; - -// ASIMD shift by immed, basic -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", - "SQSHRNv","SQSHRUNv", "UQRSHRNv", - "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; -// ASIMD shift by immed, complex -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]?(Q|R){1,2}SHR")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SQSHLU")>; -// ASIMD shift by register, basic, Q-form -def : InstRW<[THX3T110Write_5Cyc_F01], - (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// ASIMD shift by register, complex, D-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU][QR]{1,2}SHL" # - "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; -// ASIMD shift by register, complex, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; - -// ASIMD Arithmetic -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(ADD|SUB)HNv.*")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(RADD|RSUB)HNv.*")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", - "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # - "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADALP","^UADALP")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLPv","^UADDLPv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLV","^UADDLV")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SUQADDv","^USQADDv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", - "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", - "^SRHADD", "^SUBHNv", "^SUQADD", - "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^CMEQv","^CMGEv","^CMGTv", - "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", - "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; - -//--- -// 3.13 ASIMD Floating-point Instructions -//--- - -// ASIMD FP absolute value -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FABSv")>; - -// ASIMD FP arith, normal, D-form -// ASIMD FP arith, normal, Q-form -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], - (instregex "^FABDv", "^FADDv", "^FSUBv")>; - -// ASIMD FP arith,pairwise, D-form -// ASIMD FP arith, pairwise, Q-form -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FADDPv")>; - -// ASIMD FP compare, D-form -// ASIMD FP compare, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FACGEv", "^FACGTv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; - -// ASIMD FP round, D-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FRINT[AIMNPXZ](v2f32)")>; -// ASIMD FP round, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; - -// ASIMD FP convert, long -// ASIMD FP convert, narrow -// ASIMD FP convert, other, D-form -// ASIMD FP convert, other, Q-form -// NOTE: Handled by WriteV. - -// ASIMD FP convert, long and narrow -def : InstRW<[THX3T110Write_5Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; -// ASIMD FP convert, other, D-form -def : InstRW<[THX3T110Write_5Cyc_F01], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; -// ASIMD FP convert, other, Q-form -def : InstRW<[THX3T110Write_5Cyc_F01], - (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP divide, D-form, F32 -def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv2f32)>; -def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv2f32")>; - -// ASIMD FP divide, Q-form, F32 -def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv4f32)>; -def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv4f32")>; - -// ASIMD FP divide, Q-form, F64 -def : InstRW<[THX3T110Write_23Cyc_F0123], (instrs FDIVv2f64)>; -def : InstRW<[THX3T110Write_23Cyc_F0123], (instregex "FDIVv2f64")>; - -// ASIMD FP max/min, normal, D-form -// ASIMD FP max/min, normal, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXv", "^FMAXNMv", - "^FMINv", "^FMINNMv")>; - -// ASIMD FP max/min, pairwise, D-form -// ASIMD FP max/min, pairwise, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXPv", "^FMAXNMPv", - "^FMINPv", "^FMINNMPv")>; - -// ASIMD FP max/min, reduce -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXVv", "^FMAXNMVv", - "^FMINVv", "^FMINNMVv")>; - -// ASIMD FP multiply, D-form, FZ -// ASIMD FP multiply, D-form, no FZ -// ASIMD FP multiply, Q-form, FZ -// ASIMD FP multiply, Q-form, no FZ -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FMULv", "^FMULXv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP multiply accumulate, Dform, FZ -// ASIMD FP multiply accumulate, Dform, no FZ -// ASIMD FP multiply accumulate, Qform, FZ -// ASIMD FP multiply accumulate, Qform, no FZ -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FMLAv", "^FMLSv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; - -// ASIMD FP negate -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FNEGv")>; - -//-- -// 3.14 ASIMD Miscellaneous Instructions -//-- - -// ASIMD bit reverse -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^RBITv")>; - -// ASIMD bitwise insert, D-form -// ASIMD bitwise insert, Q-form -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], - (instregex "^BIFv", "^BITv", "^BSLv")>; - -// ASIMD count, D-form -// ASIMD count, Q-form -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], - (instregex "^CLSv", "^CLZv", "^CNTv")>; - -// ASIMD duplicate, gen reg -// ASIMD duplicate, element -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUP(i8|i16|i32|i64)$")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>; - -// ASIMD extract -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^EXTv")>; - -// ASIMD extract narrow -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^XTNv")>; - -// ASIMD extract narrow, saturating -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; - -// ASIMD insert, element to element -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; - -// ASIMD move, integer immed -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^MOVIv")>; - -// ASIMD move, FP immed -def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FMOVv")>; - -// ASIMD transpose -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^TRN1", "^TRN2")>; - -// ASIMD unzip/zip -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; - -// ASIMD reciprocal estimate, D-form -// ASIMD reciprocal estimate, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; - -// ASIMD reciprocal step, D-form, FZ -// ASIMD reciprocal step, D-form, no FZ -// ASIMD reciprocal step, Q-form, FZ -// ASIMD reciprocal step, Q-form, no FZ -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^FRECPSv", "^FRSQRTSv")>; - -// ASIMD reverse -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^REV16v", "^REV32v", "^REV64v")>; - -// ASIMD table lookup, D-form -// ASIMD table lookup, Q-form -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; -def : InstRW<[THX3T110Write_10Cyc_F0123], - (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; -def : InstRW<[THX3T110Write_15Cyc_F0123], - (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; -def : InstRW<[THX3T110Write_20Cyc_F0123], - (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; - -// ASIMD transfer, element to word or word -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(S|U)MOVv.*")>; - -// ASIMD transfer gen reg to element -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; - -// ASIMD transpose -def : InstRW<[THX3T110Write_5Cyc_F0123], - (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; - -// ASIMD unzip/zip -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^ZIP1v", "^ZIP2v")>; - -//-- -// 3.15 ASIMD Load Instructions -//-- - -// ASIMD load, 1 element, multiple, 1 reg, D-form -// ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX3T110Write_4Cyc_LS01], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 2 reg, D-form -// ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX3T110Write_4Cyc_LS01], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 3 reg, D-form -// ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX3T110Write_5Cyc_LS01], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01, WriteAdr], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 4 reg, D-form -// ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX3T110Write_6Cyc_LS01], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_6Cyc_LS01, WriteAdr], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, one lane, B/H/S -// ASIMD load, 1 element, one lane, D -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], - (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], - (instregex "^LD1i(8|16|32|64)_POST$")>; - -// ASIMD load, 1 element, all lanes, D-form, B/H/S -// ASIMD load, 1 element, all lanes, D-form, D -// ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, multiple, D-form, B/H/S -// ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, one lane, B/H -// ASIMD load, 2 element, one lane, S -// ASIMD load, 2 element, one lane, D -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], - (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], - (instregex "^LD2i(8|16|32|64)_POST$")>; - -// ASIMD load, 2 element, all lanes, D-form, B/H/S -// ASIMD load, 2 element, all lanes, D-form, D -// ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, multiple, D-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, one lone, B/H -// ASIMD load, 3 element, one lane, S -// ASIMD load, 3 element, one lane, D -def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], - (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], - (instregex "^LD3i(8|16|32|64)_POST$")>; - -// ASIMD load, 3 element, all lanes, D-form, B/H/S -// ASIMD load, 3 element, all lanes, D-form, D -// ASIMD load, 3 element, all lanes, Q-form, B/H/S -// ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, multiple, D-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, one lane, B/H -// ASIMD load, 4 element, one lane, S -// ASIMD load, 4 element, one lane, D -def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], - (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], - (instregex "^LD4i(8|16|32|64)_POST$")>; - -// ASIMD load, 4 element, all lanes, D-form, B/H/S -// ASIMD load, 4 element, all lanes, D-form, D -// ASIMD load, 4 element, all lanes, Q-form, B/H/S -// ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -//-- -// 3.16 ASIMD Store Instructions -//-- - -// ASIMD store, 1 element, multiple, 1 reg, D-form -// ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX3T110Write_1Cyc_LS01], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 2 reg, D-form -// ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX3T110Write_1Cyc_LS01], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 3 reg, D-form -// ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX3T110Write_1Cyc_LS01], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 4 reg, D-form -// ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX3T110Write_1Cyc_LS01], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, one lane, B/H/S -// ASIMD store, 1 element, one lane, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST1i(8|16|32|64)_POST$")>; - -// ASIMD store, 2 element, multiple, D-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 2 element, one lane, B/H/S -// ASIMD store, 2 element, one lane, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST2i(8|16|32|64)_POST$")>; - -// ASIMD store, 3 element, multiple, D-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 3 element, one lane, B/H -// ASIMD store, 3 element, one lane, S -// ASIMD store, 3 element, one lane, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST3i(8|16|32|64)_POST$")>; - -// ASIMD store, 4 element, multiple, D-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 4 element, one lane, B/H -// ASIMD store, 4 element, one lane, S -// ASIMD store, 4 element, one lane, D -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], - (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], - (instregex "^ST4i(8|16|32|64)_POST$")>; - -// V8.1a Atomics (LSE) -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs CASB, CASH, CASW, CASX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs CASAB, CASAH, CASAW, CASAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs CASLB, CASLH, CASLW, CASLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs CASALB, CASALH, CASALW, CASALX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, - LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, - LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, - LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, - LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, - LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, - LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, - LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, - LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, - LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, - LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, - LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, - LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs SWPB, SWPH, SWPW, SWPX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; - -def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], - (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; - -def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], - (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; - -def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], - (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; - -// V8.3a PAC -def : InstRW<[THX3T110Write_11Cyc_LS01_I1], (instregex "^LDRAA", "^LDRAB")>; -def : InstRW<[THX3T110Write_8Cyc_I123], - (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, - BRAA, BRAAZ, BRAB, BRABZ)>; -def : InstRW<[THX3T110Write_8Cyc_I123], (instrs RETAA, RETAB)>; - -} // SchedModel = ThunderX3T110Model diff --git a/suite/synctools/tablegen/AArch64/AArch64Schedule.td b/suite/synctools/tablegen/AArch64/AArch64Schedule.td deleted file mode 100644 index b8572c9b45..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64Schedule.td +++ /dev/null @@ -1,96 +0,0 @@ -//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Define TII for use in SchedVariant Predicates. -// const MachineInstr *MI and const TargetSchedModel *SchedModel -// are defined by default. -def : PredicateProlog<[{ - const AArch64InstrInfo *TII = - static_cast(SchedModel->getInstrInfo()); - (void)TII; -}]>; - -// AArch64 Scheduler Definitions - -def WriteImm : SchedWrite; // MOVN, MOVZ -// TODO: Provide variants for MOV32/64imm Pseudos that dynamically -// select the correct sequence of WriteImms. - -def WriteI : SchedWrite; // ALU -def WriteISReg : SchedWrite; // ALU of Shifted-Reg -def WriteIEReg : SchedWrite; // ALU of Extended-Reg -def ReadI : SchedRead; // ALU -def ReadISReg : SchedRead; // ALU of Shifted-Reg -def ReadIEReg : SchedRead; // ALU of Extended-Reg -def WriteExtr : SchedWrite; // EXTR shifts a reg pair -def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair -def WriteIS : SchedWrite; // Shift/Scale -def WriteID32 : SchedWrite; // 32-bit Divide -def WriteID64 : SchedWrite; // 64-bit Divide -def ReadID : SchedRead; // 32/64-bit Divide -def WriteIM32 : SchedWrite; // 32-bit Multiply -def WriteIM64 : SchedWrite; // 64-bit Multiply -def ReadIM : SchedRead; // 32/64-bit Multiply -def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate -def WriteBr : SchedWrite; // Branch -def WriteBrReg : SchedWrite; // Indirect Branch - -def WriteLD : SchedWrite; // Load from base addr plus immediate offset -def WriteST : SchedWrite; // Store to base addr plus immediate offset -def WriteSTP : SchedWrite; // Store a register pair. -def WriteAdr : SchedWrite; // Address pre/post increment. - -def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). -def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). -def ReadST : SchedRead; // Read the stored value. -def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. - -// Serialized two-level address load. -// EXAMPLE: LOADGot -def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; - -// Serialized two-level address lookup. -// EXAMPLE: MOVaddr... -def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; - -// The second register of a load-pair. -// LDP,LDPSW,LDNP,LDXP,LDAXP -def WriteLDHi : SchedWrite; - -// Store-exclusive is a store followed by a dependent load. -def WriteSTX : WriteSequence<[WriteST, WriteLD]>; - -def WriteSys : SchedWrite; // Long, variable latency system ops. -def WriteBarrier : SchedWrite; // Memory barrier. -def WriteHint : SchedWrite; // Hint instruction. - -def WriteF : SchedWrite; // General floating-point ops. -def WriteFCmp : SchedWrite; // Floating-point compare. -def WriteFCvt : SchedWrite; // Float conversion. -def WriteFCopy : SchedWrite; // Float-int register copy. -def WriteFImm : SchedWrite; // Floating-point immediate. -def WriteFMul : SchedWrite; // Floating-point multiply. -def WriteFDiv : SchedWrite; // Floating-point division. - -def WriteVd : SchedWrite; // 64bit Vector D ops. -def WriteVq : SchedWrite; // 128bit Vector Q ops. -def WriteVLD : SchedWrite; // Vector loads. -def WriteVST : SchedWrite; // Vector stores. - -def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP) - -// Read the unwritten lanes of the VLD's destination registers. -def ReadVLD : SchedRead; - -// Sequential vector load and shuffle. -def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>; -def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>; - -// Store a shuffled vector. -def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>; -def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>; diff --git a/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td b/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td deleted file mode 100644 index cce5813fe6..0000000000 --- a/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td +++ /dev/null @@ -1,1719 +0,0 @@ -//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the symbolic operands permitted for various kinds of -// AArch64 system instruction. -// -//===----------------------------------------------------------------------===// - -include "llvm/TableGen/SearchableTable.td" - -//===----------------------------------------------------------------------===// -// Features that, for the compiler, only enable system operands and PStates -//===----------------------------------------------------------------------===// - -def HasCCPP : Predicate<"Subtarget->hasCCPP()">, - AssemblerPredicate<(all_of FeatureCCPP), "ccpp">; - -def HasPAN : Predicate<"Subtarget->hasPAN()">, - AssemblerPredicate<(all_of FeaturePAN), - "ARM v8.1 Privileged Access-Never extension">; - -def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">, - AssemblerPredicate<(all_of FeaturePsUAO), - "ARM v8.2 UAO PState extension (psuao)">; - -def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">, - AssemblerPredicate<(all_of FeaturePAN_RWV), - "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">; - -def HasCONTEXTIDREL2 - : Predicate<"Subtarget->hasCONTEXTIDREL2()">, - AssemblerPredicate<(all_of FeatureCONTEXTIDREL2), - "Target contains CONTEXTIDR_EL2 RW operand">; - -//===----------------------------------------------------------------------===// -// AT (address translate) instruction options. -//===----------------------------------------------------------------------===// - -class AT op1, bits<4> crn, bits<4> crm, - bits<3> op2> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<14> Encoding; - let Encoding{13-11} = op1; - let Encoding{10-7} = crn; - let Encoding{6-3} = crm; - let Encoding{2-0} = op2; - code Requires = [{ {} }]; -} - -def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>; -def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>; -def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>; -def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>; -def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>; -def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>; -def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>; -def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>; -def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>; -def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>; -def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>; -def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>; - -let Requires = [{ {AArch64::FeaturePAN_RWV} }] in { -def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>; -def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>; -} - -//===----------------------------------------------------------------------===// -// DMB/DSB (data barrier) instruction options. -//===----------------------------------------------------------------------===// - -class DB encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<4> Encoding = encoding; -} - -def : DB<"oshld", 0x1>; -def : DB<"oshst", 0x2>; -def : DB<"osh", 0x3>; -def : DB<"nshld", 0x5>; -def : DB<"nshst", 0x6>; -def : DB<"nsh", 0x7>; -def : DB<"ishld", 0x9>; -def : DB<"ishst", 0xa>; -def : DB<"ish", 0xb>; -def : DB<"ld", 0xd>; -def : DB<"st", 0xe>; -def : DB<"sy", 0xf>; - -class DBnXS encoding, bits<5> immValue> : SearchableTable { - let SearchableFields = ["Name", "Encoding", "ImmValue"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<4> Encoding = encoding; - bits<5> ImmValue = immValue; - code Requires = [{ {AArch64::FeatureXS} }]; -} - -def : DBnXS<"oshnxs", 0x3, 0x10>; -def : DBnXS<"nshnxs", 0x7, 0x14>; -def : DBnXS<"ishnxs", 0xb, 0x18>; -def : DBnXS<"synxs", 0xf, 0x1c>; - -//===----------------------------------------------------------------------===// -// DC (data cache maintenance) instruction options. -//===----------------------------------------------------------------------===// - -class DC op1, bits<4> crn, bits<4> crm, - bits<3> op2> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<14> Encoding; - let Encoding{13-11} = op1; - let Encoding{10-7} = crn; - let Encoding{6-3} = crm; - let Encoding{2-0} = op2; - code Requires = [{ {} }]; -} - -def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>; -def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>; -def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>; -def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>; -def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>; -def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>; -def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>; -def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>; - -let Requires = [{ {AArch64::FeatureCCPP} }] in -def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>; - -let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in -def : DC<"CVADP", 0b011, 0b0111, 0b1101, 0b001>; - -let Requires = [{ {AArch64::FeatureMTE} }] in { -def : DC<"IGVAC", 0b000, 0b0111, 0b0110, 0b011>; -def : DC<"IGSW", 0b000, 0b0111, 0b0110, 0b100>; -def : DC<"CGSW", 0b000, 0b0111, 0b1010, 0b100>; -def : DC<"CIGSW", 0b000, 0b0111, 0b1110, 0b100>; -def : DC<"CGVAC", 0b011, 0b0111, 0b1010, 0b011>; -def : DC<"CGVAP", 0b011, 0b0111, 0b1100, 0b011>; -def : DC<"CGVADP", 0b011, 0b0111, 0b1101, 0b011>; -def : DC<"CIGVAC", 0b011, 0b0111, 0b1110, 0b011>; -def : DC<"GVA", 0b011, 0b0111, 0b0100, 0b011>; -def : DC<"IGDVAC", 0b000, 0b0111, 0b0110, 0b101>; -def : DC<"IGDSW", 0b000, 0b0111, 0b0110, 0b110>; -def : DC<"CGDSW", 0b000, 0b0111, 0b1010, 0b110>; -def : DC<"CIGDSW", 0b000, 0b0111, 0b1110, 0b110>; -def : DC<"CGDVAC", 0b011, 0b0111, 0b1010, 0b101>; -def : DC<"CGDVAP", 0b011, 0b0111, 0b1100, 0b101>; -def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>; -def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>; -def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>; -} - -//===----------------------------------------------------------------------===// -// IC (instruction cache maintenance) instruction options. -//===----------------------------------------------------------------------===// - -class IC op1, bits<4> crn, bits<4> crm, bits<3> op2, - bit needsreg> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<14> Encoding; - let Encoding{13-11} = op1; - let Encoding{10-7} = crn; - let Encoding{6-3} = crm; - let Encoding{2-0} = op2; - bit NeedsReg = needsreg; -} - -def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>; -def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>; -def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>; - -//===----------------------------------------------------------------------===// -// ISB (instruction-fetch barrier) instruction options. -//===----------------------------------------------------------------------===// - -class ISB encoding> : SearchableTable{ - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<4> Encoding; - let Encoding = encoding; -} - -def : ISB<"sy", 0xf>; - -//===----------------------------------------------------------------------===// -// TSB (Trace synchronization barrier) instruction options. -//===----------------------------------------------------------------------===// - -class TSB encoding> : SearchableTable{ - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<4> Encoding; - let Encoding = encoding; - - code Requires = [{ {AArch64::FeatureTRACEV8_4} }]; -} - -def : TSB<"csync", 0>; - -//===----------------------------------------------------------------------===// -// PRFM (prefetch) instruction options. -//===----------------------------------------------------------------------===// - -class PRFM encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<5> Encoding; - let Encoding = encoding; -} - -def : PRFM<"pldl1keep", 0x00>; -def : PRFM<"pldl1strm", 0x01>; -def : PRFM<"pldl2keep", 0x02>; -def : PRFM<"pldl2strm", 0x03>; -def : PRFM<"pldl3keep", 0x04>; -def : PRFM<"pldl3strm", 0x05>; -def : PRFM<"plil1keep", 0x08>; -def : PRFM<"plil1strm", 0x09>; -def : PRFM<"plil2keep", 0x0a>; -def : PRFM<"plil2strm", 0x0b>; -def : PRFM<"plil3keep", 0x0c>; -def : PRFM<"plil3strm", 0x0d>; -def : PRFM<"pstl1keep", 0x10>; -def : PRFM<"pstl1strm", 0x11>; -def : PRFM<"pstl2keep", 0x12>; -def : PRFM<"pstl2strm", 0x13>; -def : PRFM<"pstl3keep", 0x14>; -def : PRFM<"pstl3strm", 0x15>; - -//===----------------------------------------------------------------------===// -// SVE Prefetch instruction options. -//===----------------------------------------------------------------------===// - -class SVEPRFM encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<4> Encoding; - let Encoding = encoding; - code Requires = [{ {} }]; -} - -let Requires = [{ {AArch64::FeatureSVE} }] in { -def : SVEPRFM<"pldl1keep", 0x00>; -def : SVEPRFM<"pldl1strm", 0x01>; -def : SVEPRFM<"pldl2keep", 0x02>; -def : SVEPRFM<"pldl2strm", 0x03>; -def : SVEPRFM<"pldl3keep", 0x04>; -def : SVEPRFM<"pldl3strm", 0x05>; -def : SVEPRFM<"pstl1keep", 0x08>; -def : SVEPRFM<"pstl1strm", 0x09>; -def : SVEPRFM<"pstl2keep", 0x0a>; -def : SVEPRFM<"pstl2strm", 0x0b>; -def : SVEPRFM<"pstl3keep", 0x0c>; -def : SVEPRFM<"pstl3strm", 0x0d>; -} - -//===----------------------------------------------------------------------===// -// SVE Predicate patterns -//===----------------------------------------------------------------------===// - -class SVEPREDPAT encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<5> Encoding; - let Encoding = encoding; -} - -def : SVEPREDPAT<"pow2", 0x00>; -def : SVEPREDPAT<"vl1", 0x01>; -def : SVEPREDPAT<"vl2", 0x02>; -def : SVEPREDPAT<"vl3", 0x03>; -def : SVEPREDPAT<"vl4", 0x04>; -def : SVEPREDPAT<"vl5", 0x05>; -def : SVEPREDPAT<"vl6", 0x06>; -def : SVEPREDPAT<"vl7", 0x07>; -def : SVEPREDPAT<"vl8", 0x08>; -def : SVEPREDPAT<"vl16", 0x09>; -def : SVEPREDPAT<"vl32", 0x0a>; -def : SVEPREDPAT<"vl64", 0x0b>; -def : SVEPREDPAT<"vl128", 0x0c>; -def : SVEPREDPAT<"vl256", 0x0d>; -def : SVEPREDPAT<"mul4", 0x1d>; -def : SVEPREDPAT<"mul3", 0x1e>; -def : SVEPREDPAT<"all", 0x1f>; - -//===----------------------------------------------------------------------===// -// Exact FP Immediates. -// -// These definitions are used to create a lookup table with FP Immediates that -// is used for a few instructions that only accept a limited set of exact FP -// immediates values. -//===----------------------------------------------------------------------===// -class ExactFPImm enum > : SearchableTable { - let SearchableFields = ["Enum", "Repr"]; - let EnumValueField = "Enum"; - - string Name = name; - bits<4> Enum = enum; - string Repr = repr; -} - -def : ExactFPImm<"zero", "0.0", 0x0>; -def : ExactFPImm<"half", "0.5", 0x1>; -def : ExactFPImm<"one", "1.0", 0x2>; -def : ExactFPImm<"two", "2.0", 0x3>; - -//===----------------------------------------------------------------------===// -// PState instruction options. -//===----------------------------------------------------------------------===// - -class PState encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<5> Encoding; - let Encoding = encoding; - code Requires = [{ {} }]; -} - -def : PState<"SPSel", 0b00101>; -def : PState<"DAIFSet", 0b11110>; -def : PState<"DAIFClr", 0b11111>; -// v8.1a "Privileged Access Never" extension-specific PStates -let Requires = [{ {AArch64::FeaturePAN} }] in -def : PState<"PAN", 0b00100>; - -// v8.2a "User Access Override" extension-specific PStates -let Requires = [{ {AArch64::FeaturePsUAO} }] in -def : PState<"UAO", 0b00011>; -// v8.4a timing insensitivity of data processing instructions -let Requires = [{ {AArch64::FeatureDIT} }] in -def : PState<"DIT", 0b11010>; -// v8.5a Spectre Mitigation -let Requires = [{ {AArch64::FeatureSSBS} }] in -def : PState<"SSBS", 0b11001>; -// v8.5a Memory Tagging Extension -let Requires = [{ {AArch64::FeatureMTE} }] in -def : PState<"TCO", 0b11100>; - -//===----------------------------------------------------------------------===// -// SVCR instruction options. -//===----------------------------------------------------------------------===// - -class SVCR encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<3> Encoding; - let Encoding = encoding; - code Requires = [{ {} }]; -} - -let Requires = [{ {AArch64::FeatureSME} }] in { -def : SVCR<"SVCRSM", 0b001>; -def : SVCR<"SVCRZA", 0b010>; -def : SVCR<"SVCRSMZA", 0b011>; -} - -//===----------------------------------------------------------------------===// -// PSB instruction options. -//===----------------------------------------------------------------------===// - -class PSB encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<5> Encoding; - let Encoding = encoding; -} - -def : PSB<"csync", 0x11>; - -//===----------------------------------------------------------------------===// -// BTI instruction options. -//===----------------------------------------------------------------------===// - -class BTI encoding> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<3> Encoding; - let Encoding = encoding; -} - -def : BTI<"c", 0b010>; -def : BTI<"j", 0b100>; -def : BTI<"jc", 0b110>; - -//===----------------------------------------------------------------------===// -// TLBI (translation lookaside buffer invalidate) instruction options. -//===----------------------------------------------------------------------===// - -class TLBIEntry op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> { - string Name = name; - bits<14> Encoding; - let Encoding{13-11} = op1; - let Encoding{10-7} = crn; - let Encoding{6-3} = crm; - let Encoding{2-0} = op2; - bit NeedsReg = needsreg; - list Requires = []; - list ExtraRequires = []; - code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; -} - -def TLBITable : GenericTable { - let FilterClass = "TLBIEntry"; - let CppTypeName = "TLBI"; - let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; -} - -def lookupTLBIByName : SearchIndex { - let Table = TLBITable; - let Key = ["Name"]; -} - -def lookupTLBIByEncoding : SearchIndex { - let Table = TLBITable; - let Key = ["Encoding"]; -} - -multiclass TLBI op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg = 1> { - def : TLBIEntry; - def : TLBIEntry { - let Encoding{7} = 1; - let ExtraRequires = ["AArch64::FeatureXS"]; - } -} - -defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; -defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; -defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; -defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; -defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; -defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; -defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; -defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; -defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; -defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; -defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; -defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; -defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; -defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; -defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; -defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; - -// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) -let Requires = ["AArch64::FeatureTLB_RMI"] in { -// Armv8.4-A Outer Sharable TLB Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; -defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; -defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; -defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; -defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; -defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; -defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; -defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; -defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; - -// Armv8.4-A TLB Range Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; -defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; -defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; -defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; -defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; -defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; -defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; -defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; -defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; -defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; -defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; -defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; -} //FeatureTLB_RMI - -// Armv9-A Realm Management Extention TLBI Instructions -let Requires = ["AArch64::FeatureRME"] in { -defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>; -defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; -} - -// Armv8.5-A Prediction Restriction by Context instruction options: -class PRCTX crm> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - bits<11> Encoding; - let Encoding{10-4} = 0b0110111; - let Encoding{3-0} = crm; - bit NeedsReg = 1; - code Requires = [{ {} }]; -} - -let Requires = [{ {AArch64::FeaturePredRes} }] in { -def : PRCTX<"RCTX", 0b0011>; -} - -//===----------------------------------------------------------------------===// -// MRS/MSR (system register read/write) instruction options. -//===----------------------------------------------------------------------===// - -class SysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2> : SearchableTable { - let SearchableFields = ["Name", "Encoding"]; - let EnumValueField = "Encoding"; - - string Name = name; - string AltName = name; - bits<16> Encoding; - let Encoding{15-14} = op0; - let Encoding{13-11} = op1; - let Encoding{10-7} = crn; - let Encoding{6-3} = crm; - let Encoding{2-0} = op2; - bit Readable = ?; - bit Writeable = ?; - code Requires = [{ {} }]; -} - -class RWSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2> - : SysReg { - let Readable = 1; - let Writeable = 1; -} - -class ROSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2> - : SysReg { - let Readable = 1; - let Writeable = 0; -} - -class WOSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2> - : SysReg { - let Readable = 0; - let Writeable = 1; -} - -//===---------------------- -// Read-only regs -//===---------------------- - -// Op0 Op1 CRn CRm Op2 -def : ROSysReg<"MDCCSR_EL0", 0b10, 0b011, 0b0000, 0b0001, 0b000>; -def : ROSysReg<"DBGDTRRX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; -def : ROSysReg<"MDRAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b000>; -def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>; -def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>; -def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>; -def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; -def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; -def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; - -//v8.3 CCIDX - extending the CCsIDr number of sets -def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> { - let Requires = [{ {AArch64::FeatureCCIDX} }]; -} -def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>; -def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>; -def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>; -def : ROSysReg<"REVIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b110>; -def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>; -def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>; -def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>; -def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>; -def : ROSysReg<"ID_PFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b100> { - let Requires = [{ {AArch64::FeatureSpecRestrict} }]; -} -def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>; -def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>; -def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>; -def : ROSysReg<"ID_MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b101>; -def : ROSysReg<"ID_MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b110>; -def : ROSysReg<"ID_MMFR3_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b111>; -def : ROSysReg<"ID_ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b000>; -def : ROSysReg<"ID_ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b001>; -def : ROSysReg<"ID_ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b010>; -def : ROSysReg<"ID_ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b011>; -def : ROSysReg<"ID_ISAR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b100>; -def : ROSysReg<"ID_ISAR5_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b101>; -def : ROSysReg<"ID_ISAR6_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b111> { - let Requires = [{ {AArch64::HasV8_2aOps} }]; -} -def : ROSysReg<"ID_AA64PFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b000>; -def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>; -def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>; -def : ROSysReg<"ID_AA64DFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b001>; -def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>; -def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>; -def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>; -def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>; -def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>; -def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>; -def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>; -def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>; -def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>; -def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>; -def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>; -def : ROSysReg<"RVBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b001>; -def : ROSysReg<"RVBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b001>; -def : ROSysReg<"RVBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b001>; -def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>; -def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>; -def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>; -def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>; -def : ROSysReg<"ID_MMFR5_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b110>; - -// Trace registers -// Op0 Op1 CRn CRm Op2 -def : ROSysReg<"TRCSTATR", 0b10, 0b001, 0b0000, 0b0011, 0b000>; -def : ROSysReg<"TRCIDR8", 0b10, 0b001, 0b0000, 0b0000, 0b110>; -def : ROSysReg<"TRCIDR9", 0b10, 0b001, 0b0000, 0b0001, 0b110>; -def : ROSysReg<"TRCIDR10", 0b10, 0b001, 0b0000, 0b0010, 0b110>; -def : ROSysReg<"TRCIDR11", 0b10, 0b001, 0b0000, 0b0011, 0b110>; -def : ROSysReg<"TRCIDR12", 0b10, 0b001, 0b0000, 0b0100, 0b110>; -def : ROSysReg<"TRCIDR13", 0b10, 0b001, 0b0000, 0b0101, 0b110>; -def : ROSysReg<"TRCIDR0", 0b10, 0b001, 0b0000, 0b1000, 0b111>; -def : ROSysReg<"TRCIDR1", 0b10, 0b001, 0b0000, 0b1001, 0b111>; -def : ROSysReg<"TRCIDR2", 0b10, 0b001, 0b0000, 0b1010, 0b111>; -def : ROSysReg<"TRCIDR3", 0b10, 0b001, 0b0000, 0b1011, 0b111>; -def : ROSysReg<"TRCIDR4", 0b10, 0b001, 0b0000, 0b1100, 0b111>; -def : ROSysReg<"TRCIDR5", 0b10, 0b001, 0b0000, 0b1101, 0b111>; -def : ROSysReg<"TRCIDR6", 0b10, 0b001, 0b0000, 0b1110, 0b111>; -def : ROSysReg<"TRCIDR7", 0b10, 0b001, 0b0000, 0b1111, 0b111>; -def : ROSysReg<"TRCOSLSR", 0b10, 0b001, 0b0001, 0b0001, 0b100>; -def : ROSysReg<"TRCPDSR", 0b10, 0b001, 0b0001, 0b0101, 0b100>; -def : ROSysReg<"TRCDEVAFF0", 0b10, 0b001, 0b0111, 0b1010, 0b110>; -def : ROSysReg<"TRCDEVAFF1", 0b10, 0b001, 0b0111, 0b1011, 0b110>; -def : ROSysReg<"TRCLSR", 0b10, 0b001, 0b0111, 0b1101, 0b110>; -def : ROSysReg<"TRCAUTHSTATUS", 0b10, 0b001, 0b0111, 0b1110, 0b110>; -def : ROSysReg<"TRCDEVARCH", 0b10, 0b001, 0b0111, 0b1111, 0b110>; -def : ROSysReg<"TRCDEVID", 0b10, 0b001, 0b0111, 0b0010, 0b111>; -def : ROSysReg<"TRCDEVTYPE", 0b10, 0b001, 0b0111, 0b0011, 0b111>; -def : ROSysReg<"TRCPIDR4", 0b10, 0b001, 0b0111, 0b0100, 0b111>; -def : ROSysReg<"TRCPIDR5", 0b10, 0b001, 0b0111, 0b0101, 0b111>; -def : ROSysReg<"TRCPIDR6", 0b10, 0b001, 0b0111, 0b0110, 0b111>; -def : ROSysReg<"TRCPIDR7", 0b10, 0b001, 0b0111, 0b0111, 0b111>; -def : ROSysReg<"TRCPIDR0", 0b10, 0b001, 0b0111, 0b1000, 0b111>; -def : ROSysReg<"TRCPIDR1", 0b10, 0b001, 0b0111, 0b1001, 0b111>; -def : ROSysReg<"TRCPIDR2", 0b10, 0b001, 0b0111, 0b1010, 0b111>; -def : ROSysReg<"TRCPIDR3", 0b10, 0b001, 0b0111, 0b1011, 0b111>; -def : ROSysReg<"TRCCIDR0", 0b10, 0b001, 0b0111, 0b1100, 0b111>; -def : ROSysReg<"TRCCIDR1", 0b10, 0b001, 0b0111, 0b1101, 0b111>; -def : ROSysReg<"TRCCIDR2", 0b10, 0b001, 0b0111, 0b1110, 0b111>; -def : ROSysReg<"TRCCIDR3", 0b10, 0b001, 0b0111, 0b1111, 0b111>; - -// GICv3 registers -// Op0 Op1 CRn CRm Op2 -def : ROSysReg<"ICC_IAR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b000>; -def : ROSysReg<"ICC_IAR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b000>; -def : ROSysReg<"ICC_HPPIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b010>; -def : ROSysReg<"ICC_HPPIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b010>; -def : ROSysReg<"ICC_RPR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b011>; -def : ROSysReg<"ICH_VTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b001>; -def : ROSysReg<"ICH_EISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b011>; -def : ROSysReg<"ICH_ELRSR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b101>; - -// SVE control registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSVE} }] in { -def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>; -} - -// v8.1a "Limited Ordering Regions" extension-specific system register -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureLOR} }] in -def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>; - -// v8.2a "RAS extension" registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureRAS} }] in { -def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>; -def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>; -} - -// v8.5a "random number" registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureRandGen} }] in { -def : ROSysReg<"RNDR", 0b11, 0b011, 0b0010, 0b0100, 0b000>; -def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>; -} - -// v8.5a Software Context Number registers -let Requires = [{ {AArch64::FeatureSpecRestrict} }] in { -def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>; -def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>; -def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>; -def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>; -def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>; -} - -// v9a Realm Management Extension registers -let Requires = [{ {AArch64::FeatureRME} }] in { -def : RWSysReg<"MFAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b101>; -def : RWSysReg<"GPCCR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b110>; -def : RWSysReg<"GPTBR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b100>; -} - -// v9-a Scalable Matrix Extension (SME) registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSME} }] in { -def : ROSysReg<"ID_AA64SMFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b101>; -} - -//===---------------------- -// Write-only regs -//===---------------------- - -// Op0 Op1 CRn CRm Op2 -def : WOSysReg<"DBGDTRTX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; -def : WOSysReg<"OSLAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b100>; -def : WOSysReg<"PMSWINC_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b100>; - -// Trace Registers -// Op0 Op1 CRn CRm Op2 -def : WOSysReg<"TRCOSLAR", 0b10, 0b001, 0b0001, 0b0000, 0b100>; -def : WOSysReg<"TRCLAR", 0b10, 0b001, 0b0111, 0b1100, 0b110>; - -// GICv3 registers -// Op0 Op1 CRn CRm Op2 -def : WOSysReg<"ICC_EOIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b001>; -def : WOSysReg<"ICC_EOIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b001>; -def : WOSysReg<"ICC_DIR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b001>; -def : WOSysReg<"ICC_SGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b101>; -def : WOSysReg<"ICC_ASGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b110>; -def : WOSysReg<"ICC_SGI0R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b111>; - -//===---------------------- -// Read-write regs -//===---------------------- - -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"OSDTRRX_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b010>; -def : RWSysReg<"OSDTRTX_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b010>; -def : RWSysReg<"TEECR32_EL1", 0b10, 0b010, 0b0000, 0b0000, 0b000>; -def : RWSysReg<"MDCCINT_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b000>; -def : RWSysReg<"MDSCR_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b010>; -def : RWSysReg<"DBGDTR_EL0", 0b10, 0b011, 0b0000, 0b0100, 0b000>; -def : RWSysReg<"OSECCR_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b010>; -def : RWSysReg<"DBGVCR32_EL2", 0b10, 0b100, 0b0000, 0b0111, 0b000>; -def : RWSysReg<"DBGBVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b100>; -def : RWSysReg<"DBGBVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b100>; -def : RWSysReg<"DBGBVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b100>; -def : RWSysReg<"DBGBVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b100>; -def : RWSysReg<"DBGBVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b100>; -def : RWSysReg<"DBGBVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b100>; -def : RWSysReg<"DBGBVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b100>; -def : RWSysReg<"DBGBVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b100>; -def : RWSysReg<"DBGBVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b100>; -def : RWSysReg<"DBGBVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b100>; -def : RWSysReg<"DBGBVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b100>; -def : RWSysReg<"DBGBVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b100>; -def : RWSysReg<"DBGBVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b100>; -def : RWSysReg<"DBGBVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b100>; -def : RWSysReg<"DBGBVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b100>; -def : RWSysReg<"DBGBVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b100>; -def : RWSysReg<"DBGBCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b101>; -def : RWSysReg<"DBGBCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b101>; -def : RWSysReg<"DBGBCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b101>; -def : RWSysReg<"DBGBCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b101>; -def : RWSysReg<"DBGBCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b101>; -def : RWSysReg<"DBGBCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b101>; -def : RWSysReg<"DBGBCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b101>; -def : RWSysReg<"DBGBCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b101>; -def : RWSysReg<"DBGBCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b101>; -def : RWSysReg<"DBGBCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b101>; -def : RWSysReg<"DBGBCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b101>; -def : RWSysReg<"DBGBCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b101>; -def : RWSysReg<"DBGBCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b101>; -def : RWSysReg<"DBGBCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b101>; -def : RWSysReg<"DBGBCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b101>; -def : RWSysReg<"DBGBCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b101>; -def : RWSysReg<"DBGWVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b110>; -def : RWSysReg<"DBGWVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b110>; -def : RWSysReg<"DBGWVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b110>; -def : RWSysReg<"DBGWVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b110>; -def : RWSysReg<"DBGWVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b110>; -def : RWSysReg<"DBGWVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b110>; -def : RWSysReg<"DBGWVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b110>; -def : RWSysReg<"DBGWVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b110>; -def : RWSysReg<"DBGWVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b110>; -def : RWSysReg<"DBGWVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b110>; -def : RWSysReg<"DBGWVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b110>; -def : RWSysReg<"DBGWVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b110>; -def : RWSysReg<"DBGWVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b110>; -def : RWSysReg<"DBGWVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b110>; -def : RWSysReg<"DBGWVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b110>; -def : RWSysReg<"DBGWVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b110>; -def : RWSysReg<"DBGWCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b111>; -def : RWSysReg<"DBGWCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b111>; -def : RWSysReg<"DBGWCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b111>; -def : RWSysReg<"DBGWCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b111>; -def : RWSysReg<"DBGWCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b111>; -def : RWSysReg<"DBGWCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b111>; -def : RWSysReg<"DBGWCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b111>; -def : RWSysReg<"DBGWCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b111>; -def : RWSysReg<"DBGWCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b111>; -def : RWSysReg<"DBGWCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b111>; -def : RWSysReg<"DBGWCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b111>; -def : RWSysReg<"DBGWCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b111>; -def : RWSysReg<"DBGWCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b111>; -def : RWSysReg<"DBGWCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b111>; -def : RWSysReg<"DBGWCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b111>; -def : RWSysReg<"DBGWCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b111>; -def : RWSysReg<"TEEHBR32_EL1", 0b10, 0b010, 0b0001, 0b0000, 0b000>; -def : RWSysReg<"OSDLR_EL1", 0b10, 0b000, 0b0001, 0b0011, 0b100>; -def : RWSysReg<"DBGPRCR_EL1", 0b10, 0b000, 0b0001, 0b0100, 0b100>; -def : RWSysReg<"DBGCLAIMSET_EL1", 0b10, 0b000, 0b0111, 0b1000, 0b110>; -def : RWSysReg<"DBGCLAIMCLR_EL1", 0b10, 0b000, 0b0111, 0b1001, 0b110>; -def : RWSysReg<"CSSELR_EL1", 0b11, 0b010, 0b0000, 0b0000, 0b000>; -def : RWSysReg<"VPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b000>; -def : RWSysReg<"VMPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b101>; -def : RWSysReg<"CPACR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b010>; -def : RWSysReg<"SCTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b000>; -def : RWSysReg<"SCTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b000>; -def : RWSysReg<"SCTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b000>; -def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>; -def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>; -def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>; -def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>; -def : RWSysReg<"HCRX_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b010> { - let Requires = [{ {AArch64::FeatureHCX} }]; -} -def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>; -def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>; -def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>; -def : RWSysReg<"CPTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b010>; -def : RWSysReg<"CPTR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b010>; -def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>; -def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>; -def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>; -def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>; -def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>; - -let Requires = [{ {AArch64::FeatureEL2VMSA} }] in { -def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> { - let AltName = "VSCTLR_EL2"; -} -def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>; -} - -def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>; -def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>; -def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>; -def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>; -def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>; -def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>; -def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>; -def : RWSysReg<"SPSR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b000>; -def : RWSysReg<"SPSR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b000>; -def : RWSysReg<"ELR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b001>; -def : RWSysReg<"ELR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b001>; -def : RWSysReg<"ELR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b001>; -def : RWSysReg<"SP_EL0", 0b11, 0b000, 0b0100, 0b0001, 0b000>; -def : RWSysReg<"SP_EL1", 0b11, 0b100, 0b0100, 0b0001, 0b000>; -def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>; -def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>; -def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>; -def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>; -def : ROSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>; -def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>; -def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>; -def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>; -def : RWSysReg<"SPSR_fiq", 0b11, 0b100, 0b0100, 0b0011, 0b011>; -def : RWSysReg<"FPCR", 0b11, 0b011, 0b0100, 0b0100, 0b000>; -def : RWSysReg<"FPSR", 0b11, 0b011, 0b0100, 0b0100, 0b001>; -def : RWSysReg<"DSPSR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b000>; -def : RWSysReg<"DLR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b001>; -def : RWSysReg<"IFSR32_EL2", 0b11, 0b100, 0b0101, 0b0000, 0b001>; -def : RWSysReg<"AFSR0_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b000>; -def : RWSysReg<"AFSR0_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b000>; -def : RWSysReg<"AFSR0_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b000>; -def : RWSysReg<"AFSR1_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b001>; -def : RWSysReg<"AFSR1_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b001>; -def : RWSysReg<"AFSR1_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b001>; -def : RWSysReg<"ESR_EL1", 0b11, 0b000, 0b0101, 0b0010, 0b000>; -def : RWSysReg<"ESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b000>; -def : RWSysReg<"ESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b000>; -def : RWSysReg<"FPEXC32_EL2", 0b11, 0b100, 0b0101, 0b0011, 0b000>; -def : RWSysReg<"FAR_EL1", 0b11, 0b000, 0b0110, 0b0000, 0b000>; -def : RWSysReg<"FAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b000>; -def : RWSysReg<"FAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b000>; -def : RWSysReg<"HPFAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b100>; -def : RWSysReg<"PAR_EL1", 0b11, 0b000, 0b0111, 0b0100, 0b000>; -def : RWSysReg<"PMCR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b000>; -def : RWSysReg<"PMCNTENSET_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b001>; -def : RWSysReg<"PMCNTENCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b010>; -def : RWSysReg<"PMOVSCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b011>; -def : RWSysReg<"PMSELR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b101>; -def : RWSysReg<"PMCCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b000>; -def : RWSysReg<"PMXEVTYPER_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b001>; -def : RWSysReg<"PMXEVCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b010>; -def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>; -def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>; -def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>; -def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>; -def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; -def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>; -def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>; -def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>; -def : RWSysReg<"AMAIR_EL1", 0b11, 0b000, 0b1010, 0b0011, 0b000>; -def : RWSysReg<"AMAIR_EL2", 0b11, 0b100, 0b1010, 0b0011, 0b000>; -def : RWSysReg<"AMAIR_EL3", 0b11, 0b110, 0b1010, 0b0011, 0b000>; -def : RWSysReg<"VBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b000>; -def : RWSysReg<"VBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b000>; -def : RWSysReg<"VBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b000>; -def : RWSysReg<"RMR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b010>; -def : RWSysReg<"RMR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b010>; -def : RWSysReg<"RMR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b010>; -def : RWSysReg<"CONTEXTIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b001>; -def : RWSysReg<"TPIDR_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b010>; -def : RWSysReg<"TPIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b010>; -def : RWSysReg<"TPIDR_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b010>; -def : RWSysReg<"TPIDRRO_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b011>; -def : RWSysReg<"TPIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b100>; -def : RWSysReg<"CNTFRQ_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b000>; -def : RWSysReg<"CNTVOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b011>; -def : RWSysReg<"CNTKCTL_EL1", 0b11, 0b000, 0b1110, 0b0001, 0b000>; -def : RWSysReg<"CNTHCTL_EL2", 0b11, 0b100, 0b1110, 0b0001, 0b000>; -def : RWSysReg<"CNTP_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b000>; -def : RWSysReg<"CNTHP_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b000>; -def : RWSysReg<"CNTPS_TVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b000>; -def : RWSysReg<"CNTP_CTL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b001>; -def : RWSysReg<"CNTHP_CTL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b001>; -def : RWSysReg<"CNTPS_CTL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b001>; -def : RWSysReg<"CNTP_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b010>; -def : RWSysReg<"CNTHP_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b010>; -def : RWSysReg<"CNTPS_CVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b010>; -def : RWSysReg<"CNTV_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b000>; -def : RWSysReg<"CNTV_CTL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b001>; -def : RWSysReg<"CNTV_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b010>; -def : RWSysReg<"PMEVCNTR0_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b000>; -def : RWSysReg<"PMEVCNTR1_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b001>; -def : RWSysReg<"PMEVCNTR2_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b010>; -def : RWSysReg<"PMEVCNTR3_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b011>; -def : RWSysReg<"PMEVCNTR4_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b100>; -def : RWSysReg<"PMEVCNTR5_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b101>; -def : RWSysReg<"PMEVCNTR6_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b110>; -def : RWSysReg<"PMEVCNTR7_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b111>; -def : RWSysReg<"PMEVCNTR8_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b000>; -def : RWSysReg<"PMEVCNTR9_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b001>; -def : RWSysReg<"PMEVCNTR10_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b010>; -def : RWSysReg<"PMEVCNTR11_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b011>; -def : RWSysReg<"PMEVCNTR12_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b100>; -def : RWSysReg<"PMEVCNTR13_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b101>; -def : RWSysReg<"PMEVCNTR14_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b110>; -def : RWSysReg<"PMEVCNTR15_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b111>; -def : RWSysReg<"PMEVCNTR16_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b000>; -def : RWSysReg<"PMEVCNTR17_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b001>; -def : RWSysReg<"PMEVCNTR18_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b010>; -def : RWSysReg<"PMEVCNTR19_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b011>; -def : RWSysReg<"PMEVCNTR20_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b100>; -def : RWSysReg<"PMEVCNTR21_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b101>; -def : RWSysReg<"PMEVCNTR22_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b110>; -def : RWSysReg<"PMEVCNTR23_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b111>; -def : RWSysReg<"PMEVCNTR24_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b000>; -def : RWSysReg<"PMEVCNTR25_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b001>; -def : RWSysReg<"PMEVCNTR26_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b010>; -def : RWSysReg<"PMEVCNTR27_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b011>; -def : RWSysReg<"PMEVCNTR28_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b100>; -def : RWSysReg<"PMEVCNTR29_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b101>; -def : RWSysReg<"PMEVCNTR30_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b110>; -def : RWSysReg<"PMCCFILTR_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b111>; -def : RWSysReg<"PMEVTYPER0_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b000>; -def : RWSysReg<"PMEVTYPER1_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b001>; -def : RWSysReg<"PMEVTYPER2_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b010>; -def : RWSysReg<"PMEVTYPER3_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b011>; -def : RWSysReg<"PMEVTYPER4_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b100>; -def : RWSysReg<"PMEVTYPER5_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b101>; -def : RWSysReg<"PMEVTYPER6_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b110>; -def : RWSysReg<"PMEVTYPER7_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b111>; -def : RWSysReg<"PMEVTYPER8_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b000>; -def : RWSysReg<"PMEVTYPER9_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b001>; -def : RWSysReg<"PMEVTYPER10_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b010>; -def : RWSysReg<"PMEVTYPER11_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b011>; -def : RWSysReg<"PMEVTYPER12_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b100>; -def : RWSysReg<"PMEVTYPER13_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b101>; -def : RWSysReg<"PMEVTYPER14_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b110>; -def : RWSysReg<"PMEVTYPER15_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b111>; -def : RWSysReg<"PMEVTYPER16_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b000>; -def : RWSysReg<"PMEVTYPER17_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b001>; -def : RWSysReg<"PMEVTYPER18_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b010>; -def : RWSysReg<"PMEVTYPER19_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b011>; -def : RWSysReg<"PMEVTYPER20_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b100>; -def : RWSysReg<"PMEVTYPER21_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b101>; -def : RWSysReg<"PMEVTYPER22_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b110>; -def : RWSysReg<"PMEVTYPER23_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b111>; -def : RWSysReg<"PMEVTYPER24_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b000>; -def : RWSysReg<"PMEVTYPER25_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b001>; -def : RWSysReg<"PMEVTYPER26_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b010>; -def : RWSysReg<"PMEVTYPER27_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b011>; -def : RWSysReg<"PMEVTYPER28_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b100>; -def : RWSysReg<"PMEVTYPER29_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b101>; -def : RWSysReg<"PMEVTYPER30_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b110>; - -// Trace registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"TRCPRGCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b000>; -def : RWSysReg<"TRCPROCSELR", 0b10, 0b001, 0b0000, 0b0010, 0b000>; -def : RWSysReg<"TRCCONFIGR", 0b10, 0b001, 0b0000, 0b0100, 0b000>; -def : RWSysReg<"TRCAUXCTLR", 0b10, 0b001, 0b0000, 0b0110, 0b000>; -def : RWSysReg<"TRCEVENTCTL0R", 0b10, 0b001, 0b0000, 0b1000, 0b000>; -def : RWSysReg<"TRCEVENTCTL1R", 0b10, 0b001, 0b0000, 0b1001, 0b000>; -def : RWSysReg<"TRCSTALLCTLR", 0b10, 0b001, 0b0000, 0b1011, 0b000>; -def : RWSysReg<"TRCTSCTLR", 0b10, 0b001, 0b0000, 0b1100, 0b000>; -def : RWSysReg<"TRCSYNCPR", 0b10, 0b001, 0b0000, 0b1101, 0b000>; -def : RWSysReg<"TRCCCCTLR", 0b10, 0b001, 0b0000, 0b1110, 0b000>; -def : RWSysReg<"TRCBBCTLR", 0b10, 0b001, 0b0000, 0b1111, 0b000>; -def : RWSysReg<"TRCTRACEIDR", 0b10, 0b001, 0b0000, 0b0000, 0b001>; -def : RWSysReg<"TRCQCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b001>; -def : RWSysReg<"TRCVICTLR", 0b10, 0b001, 0b0000, 0b0000, 0b010>; -def : RWSysReg<"TRCVIIECTLR", 0b10, 0b001, 0b0000, 0b0001, 0b010>; -def : RWSysReg<"TRCVISSCTLR", 0b10, 0b001, 0b0000, 0b0010, 0b010>; -def : RWSysReg<"TRCVIPCSSCTLR", 0b10, 0b001, 0b0000, 0b0011, 0b010>; -def : RWSysReg<"TRCVDCTLR", 0b10, 0b001, 0b0000, 0b1000, 0b010>; -def : RWSysReg<"TRCVDSACCTLR", 0b10, 0b001, 0b0000, 0b1001, 0b010>; -def : RWSysReg<"TRCVDARCCTLR", 0b10, 0b001, 0b0000, 0b1010, 0b010>; -def : RWSysReg<"TRCSEQEVR0", 0b10, 0b001, 0b0000, 0b0000, 0b100>; -def : RWSysReg<"TRCSEQEVR1", 0b10, 0b001, 0b0000, 0b0001, 0b100>; -def : RWSysReg<"TRCSEQEVR2", 0b10, 0b001, 0b0000, 0b0010, 0b100>; -def : RWSysReg<"TRCSEQRSTEVR", 0b10, 0b001, 0b0000, 0b0110, 0b100>; -def : RWSysReg<"TRCSEQSTR", 0b10, 0b001, 0b0000, 0b0111, 0b100>; -def : RWSysReg<"TRCEXTINSELR", 0b10, 0b001, 0b0000, 0b1000, 0b100>; -def : RWSysReg<"TRCCNTRLDVR0", 0b10, 0b001, 0b0000, 0b0000, 0b101>; -def : RWSysReg<"TRCCNTRLDVR1", 0b10, 0b001, 0b0000, 0b0001, 0b101>; -def : RWSysReg<"TRCCNTRLDVR2", 0b10, 0b001, 0b0000, 0b0010, 0b101>; -def : RWSysReg<"TRCCNTRLDVR3", 0b10, 0b001, 0b0000, 0b0011, 0b101>; -def : RWSysReg<"TRCCNTCTLR0", 0b10, 0b001, 0b0000, 0b0100, 0b101>; -def : RWSysReg<"TRCCNTCTLR1", 0b10, 0b001, 0b0000, 0b0101, 0b101>; -def : RWSysReg<"TRCCNTCTLR2", 0b10, 0b001, 0b0000, 0b0110, 0b101>; -def : RWSysReg<"TRCCNTCTLR3", 0b10, 0b001, 0b0000, 0b0111, 0b101>; -def : RWSysReg<"TRCCNTVR0", 0b10, 0b001, 0b0000, 0b1000, 0b101>; -def : RWSysReg<"TRCCNTVR1", 0b10, 0b001, 0b0000, 0b1001, 0b101>; -def : RWSysReg<"TRCCNTVR2", 0b10, 0b001, 0b0000, 0b1010, 0b101>; -def : RWSysReg<"TRCCNTVR3", 0b10, 0b001, 0b0000, 0b1011, 0b101>; -def : RWSysReg<"TRCIMSPEC0", 0b10, 0b001, 0b0000, 0b0000, 0b111>; -def : RWSysReg<"TRCIMSPEC1", 0b10, 0b001, 0b0000, 0b0001, 0b111>; -def : RWSysReg<"TRCIMSPEC2", 0b10, 0b001, 0b0000, 0b0010, 0b111>; -def : RWSysReg<"TRCIMSPEC3", 0b10, 0b001, 0b0000, 0b0011, 0b111>; -def : RWSysReg<"TRCIMSPEC4", 0b10, 0b001, 0b0000, 0b0100, 0b111>; -def : RWSysReg<"TRCIMSPEC5", 0b10, 0b001, 0b0000, 0b0101, 0b111>; -def : RWSysReg<"TRCIMSPEC6", 0b10, 0b001, 0b0000, 0b0110, 0b111>; -def : RWSysReg<"TRCIMSPEC7", 0b10, 0b001, 0b0000, 0b0111, 0b111>; -def : RWSysReg<"TRCRSCTLR2", 0b10, 0b001, 0b0001, 0b0010, 0b000>; -def : RWSysReg<"TRCRSCTLR3", 0b10, 0b001, 0b0001, 0b0011, 0b000>; -def : RWSysReg<"TRCRSCTLR4", 0b10, 0b001, 0b0001, 0b0100, 0b000>; -def : RWSysReg<"TRCRSCTLR5", 0b10, 0b001, 0b0001, 0b0101, 0b000>; -def : RWSysReg<"TRCRSCTLR6", 0b10, 0b001, 0b0001, 0b0110, 0b000>; -def : RWSysReg<"TRCRSCTLR7", 0b10, 0b001, 0b0001, 0b0111, 0b000>; -def : RWSysReg<"TRCRSCTLR8", 0b10, 0b001, 0b0001, 0b1000, 0b000>; -def : RWSysReg<"TRCRSCTLR9", 0b10, 0b001, 0b0001, 0b1001, 0b000>; -def : RWSysReg<"TRCRSCTLR10", 0b10, 0b001, 0b0001, 0b1010, 0b000>; -def : RWSysReg<"TRCRSCTLR11", 0b10, 0b001, 0b0001, 0b1011, 0b000>; -def : RWSysReg<"TRCRSCTLR12", 0b10, 0b001, 0b0001, 0b1100, 0b000>; -def : RWSysReg<"TRCRSCTLR13", 0b10, 0b001, 0b0001, 0b1101, 0b000>; -def : RWSysReg<"TRCRSCTLR14", 0b10, 0b001, 0b0001, 0b1110, 0b000>; -def : RWSysReg<"TRCRSCTLR15", 0b10, 0b001, 0b0001, 0b1111, 0b000>; -def : RWSysReg<"TRCRSCTLR16", 0b10, 0b001, 0b0001, 0b0000, 0b001>; -def : RWSysReg<"TRCRSCTLR17", 0b10, 0b001, 0b0001, 0b0001, 0b001>; -def : RWSysReg<"TRCRSCTLR18", 0b10, 0b001, 0b0001, 0b0010, 0b001>; -def : RWSysReg<"TRCRSCTLR19", 0b10, 0b001, 0b0001, 0b0011, 0b001>; -def : RWSysReg<"TRCRSCTLR20", 0b10, 0b001, 0b0001, 0b0100, 0b001>; -def : RWSysReg<"TRCRSCTLR21", 0b10, 0b001, 0b0001, 0b0101, 0b001>; -def : RWSysReg<"TRCRSCTLR22", 0b10, 0b001, 0b0001, 0b0110, 0b001>; -def : RWSysReg<"TRCRSCTLR23", 0b10, 0b001, 0b0001, 0b0111, 0b001>; -def : RWSysReg<"TRCRSCTLR24", 0b10, 0b001, 0b0001, 0b1000, 0b001>; -def : RWSysReg<"TRCRSCTLR25", 0b10, 0b001, 0b0001, 0b1001, 0b001>; -def : RWSysReg<"TRCRSCTLR26", 0b10, 0b001, 0b0001, 0b1010, 0b001>; -def : RWSysReg<"TRCRSCTLR27", 0b10, 0b001, 0b0001, 0b1011, 0b001>; -def : RWSysReg<"TRCRSCTLR28", 0b10, 0b001, 0b0001, 0b1100, 0b001>; -def : RWSysReg<"TRCRSCTLR29", 0b10, 0b001, 0b0001, 0b1101, 0b001>; -def : RWSysReg<"TRCRSCTLR30", 0b10, 0b001, 0b0001, 0b1110, 0b001>; -def : RWSysReg<"TRCRSCTLR31", 0b10, 0b001, 0b0001, 0b1111, 0b001>; -def : RWSysReg<"TRCSSCCR0", 0b10, 0b001, 0b0001, 0b0000, 0b010>; -def : RWSysReg<"TRCSSCCR1", 0b10, 0b001, 0b0001, 0b0001, 0b010>; -def : RWSysReg<"TRCSSCCR2", 0b10, 0b001, 0b0001, 0b0010, 0b010>; -def : RWSysReg<"TRCSSCCR3", 0b10, 0b001, 0b0001, 0b0011, 0b010>; -def : RWSysReg<"TRCSSCCR4", 0b10, 0b001, 0b0001, 0b0100, 0b010>; -def : RWSysReg<"TRCSSCCR5", 0b10, 0b001, 0b0001, 0b0101, 0b010>; -def : RWSysReg<"TRCSSCCR6", 0b10, 0b001, 0b0001, 0b0110, 0b010>; -def : RWSysReg<"TRCSSCCR7", 0b10, 0b001, 0b0001, 0b0111, 0b010>; -def : RWSysReg<"TRCSSCSR0", 0b10, 0b001, 0b0001, 0b1000, 0b010>; -def : RWSysReg<"TRCSSCSR1", 0b10, 0b001, 0b0001, 0b1001, 0b010>; -def : RWSysReg<"TRCSSCSR2", 0b10, 0b001, 0b0001, 0b1010, 0b010>; -def : RWSysReg<"TRCSSCSR3", 0b10, 0b001, 0b0001, 0b1011, 0b010>; -def : RWSysReg<"TRCSSCSR4", 0b10, 0b001, 0b0001, 0b1100, 0b010>; -def : RWSysReg<"TRCSSCSR5", 0b10, 0b001, 0b0001, 0b1101, 0b010>; -def : RWSysReg<"TRCSSCSR6", 0b10, 0b001, 0b0001, 0b1110, 0b010>; -def : RWSysReg<"TRCSSCSR7", 0b10, 0b001, 0b0001, 0b1111, 0b010>; -def : RWSysReg<"TRCSSPCICR0", 0b10, 0b001, 0b0001, 0b0000, 0b011>; -def : RWSysReg<"TRCSSPCICR1", 0b10, 0b001, 0b0001, 0b0001, 0b011>; -def : RWSysReg<"TRCSSPCICR2", 0b10, 0b001, 0b0001, 0b0010, 0b011>; -def : RWSysReg<"TRCSSPCICR3", 0b10, 0b001, 0b0001, 0b0011, 0b011>; -def : RWSysReg<"TRCSSPCICR4", 0b10, 0b001, 0b0001, 0b0100, 0b011>; -def : RWSysReg<"TRCSSPCICR5", 0b10, 0b001, 0b0001, 0b0101, 0b011>; -def : RWSysReg<"TRCSSPCICR6", 0b10, 0b001, 0b0001, 0b0110, 0b011>; -def : RWSysReg<"TRCSSPCICR7", 0b10, 0b001, 0b0001, 0b0111, 0b011>; -def : RWSysReg<"TRCPDCR", 0b10, 0b001, 0b0001, 0b0100, 0b100>; -def : RWSysReg<"TRCACVR0", 0b10, 0b001, 0b0010, 0b0000, 0b000>; -def : RWSysReg<"TRCACVR1", 0b10, 0b001, 0b0010, 0b0010, 0b000>; -def : RWSysReg<"TRCACVR2", 0b10, 0b001, 0b0010, 0b0100, 0b000>; -def : RWSysReg<"TRCACVR3", 0b10, 0b001, 0b0010, 0b0110, 0b000>; -def : RWSysReg<"TRCACVR4", 0b10, 0b001, 0b0010, 0b1000, 0b000>; -def : RWSysReg<"TRCACVR5", 0b10, 0b001, 0b0010, 0b1010, 0b000>; -def : RWSysReg<"TRCACVR6", 0b10, 0b001, 0b0010, 0b1100, 0b000>; -def : RWSysReg<"TRCACVR7", 0b10, 0b001, 0b0010, 0b1110, 0b000>; -def : RWSysReg<"TRCACVR8", 0b10, 0b001, 0b0010, 0b0000, 0b001>; -def : RWSysReg<"TRCACVR9", 0b10, 0b001, 0b0010, 0b0010, 0b001>; -def : RWSysReg<"TRCACVR10", 0b10, 0b001, 0b0010, 0b0100, 0b001>; -def : RWSysReg<"TRCACVR11", 0b10, 0b001, 0b0010, 0b0110, 0b001>; -def : RWSysReg<"TRCACVR12", 0b10, 0b001, 0b0010, 0b1000, 0b001>; -def : RWSysReg<"TRCACVR13", 0b10, 0b001, 0b0010, 0b1010, 0b001>; -def : RWSysReg<"TRCACVR14", 0b10, 0b001, 0b0010, 0b1100, 0b001>; -def : RWSysReg<"TRCACVR15", 0b10, 0b001, 0b0010, 0b1110, 0b001>; -def : RWSysReg<"TRCACATR0", 0b10, 0b001, 0b0010, 0b0000, 0b010>; -def : RWSysReg<"TRCACATR1", 0b10, 0b001, 0b0010, 0b0010, 0b010>; -def : RWSysReg<"TRCACATR2", 0b10, 0b001, 0b0010, 0b0100, 0b010>; -def : RWSysReg<"TRCACATR3", 0b10, 0b001, 0b0010, 0b0110, 0b010>; -def : RWSysReg<"TRCACATR4", 0b10, 0b001, 0b0010, 0b1000, 0b010>; -def : RWSysReg<"TRCACATR5", 0b10, 0b001, 0b0010, 0b1010, 0b010>; -def : RWSysReg<"TRCACATR6", 0b10, 0b001, 0b0010, 0b1100, 0b010>; -def : RWSysReg<"TRCACATR7", 0b10, 0b001, 0b0010, 0b1110, 0b010>; -def : RWSysReg<"TRCACATR8", 0b10, 0b001, 0b0010, 0b0000, 0b011>; -def : RWSysReg<"TRCACATR9", 0b10, 0b001, 0b0010, 0b0010, 0b011>; -def : RWSysReg<"TRCACATR10", 0b10, 0b001, 0b0010, 0b0100, 0b011>; -def : RWSysReg<"TRCACATR11", 0b10, 0b001, 0b0010, 0b0110, 0b011>; -def : RWSysReg<"TRCACATR12", 0b10, 0b001, 0b0010, 0b1000, 0b011>; -def : RWSysReg<"TRCACATR13", 0b10, 0b001, 0b0010, 0b1010, 0b011>; -def : RWSysReg<"TRCACATR14", 0b10, 0b001, 0b0010, 0b1100, 0b011>; -def : RWSysReg<"TRCACATR15", 0b10, 0b001, 0b0010, 0b1110, 0b011>; -def : RWSysReg<"TRCDVCVR0", 0b10, 0b001, 0b0010, 0b0000, 0b100>; -def : RWSysReg<"TRCDVCVR1", 0b10, 0b001, 0b0010, 0b0100, 0b100>; -def : RWSysReg<"TRCDVCVR2", 0b10, 0b001, 0b0010, 0b1000, 0b100>; -def : RWSysReg<"TRCDVCVR3", 0b10, 0b001, 0b0010, 0b1100, 0b100>; -def : RWSysReg<"TRCDVCVR4", 0b10, 0b001, 0b0010, 0b0000, 0b101>; -def : RWSysReg<"TRCDVCVR5", 0b10, 0b001, 0b0010, 0b0100, 0b101>; -def : RWSysReg<"TRCDVCVR6", 0b10, 0b001, 0b0010, 0b1000, 0b101>; -def : RWSysReg<"TRCDVCVR7", 0b10, 0b001, 0b0010, 0b1100, 0b101>; -def : RWSysReg<"TRCDVCMR0", 0b10, 0b001, 0b0010, 0b0000, 0b110>; -def : RWSysReg<"TRCDVCMR1", 0b10, 0b001, 0b0010, 0b0100, 0b110>; -def : RWSysReg<"TRCDVCMR2", 0b10, 0b001, 0b0010, 0b1000, 0b110>; -def : RWSysReg<"TRCDVCMR3", 0b10, 0b001, 0b0010, 0b1100, 0b110>; -def : RWSysReg<"TRCDVCMR4", 0b10, 0b001, 0b0010, 0b0000, 0b111>; -def : RWSysReg<"TRCDVCMR5", 0b10, 0b001, 0b0010, 0b0100, 0b111>; -def : RWSysReg<"TRCDVCMR6", 0b10, 0b001, 0b0010, 0b1000, 0b111>; -def : RWSysReg<"TRCDVCMR7", 0b10, 0b001, 0b0010, 0b1100, 0b111>; -def : RWSysReg<"TRCCIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b000>; -def : RWSysReg<"TRCCIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b000>; -def : RWSysReg<"TRCCIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b000>; -def : RWSysReg<"TRCCIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b000>; -def : RWSysReg<"TRCCIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b000>; -def : RWSysReg<"TRCCIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b000>; -def : RWSysReg<"TRCCIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b000>; -def : RWSysReg<"TRCCIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b000>; -def : RWSysReg<"TRCVMIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b001>; -def : RWSysReg<"TRCVMIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b001>; -def : RWSysReg<"TRCVMIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b001>; -def : RWSysReg<"TRCVMIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b001>; -def : RWSysReg<"TRCVMIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b001>; -def : RWSysReg<"TRCVMIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b001>; -def : RWSysReg<"TRCVMIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b001>; -def : RWSysReg<"TRCVMIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b001>; -def : RWSysReg<"TRCCIDCCTLR0", 0b10, 0b001, 0b0011, 0b0000, 0b010>; -def : RWSysReg<"TRCCIDCCTLR1", 0b10, 0b001, 0b0011, 0b0001, 0b010>; -def : RWSysReg<"TRCVMIDCCTLR0", 0b10, 0b001, 0b0011, 0b0010, 0b010>; -def : RWSysReg<"TRCVMIDCCTLR1", 0b10, 0b001, 0b0011, 0b0011, 0b010>; -def : RWSysReg<"TRCITCTRL", 0b10, 0b001, 0b0111, 0b0000, 0b100>; -def : RWSysReg<"TRCCLAIMSET", 0b10, 0b001, 0b0111, 0b1000, 0b110>; -def : RWSysReg<"TRCCLAIMCLR", 0b10, 0b001, 0b0111, 0b1001, 0b110>; - -// GICv3 registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"ICC_BPR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b011>; -def : RWSysReg<"ICC_BPR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b011>; -def : RWSysReg<"ICC_PMR_EL1", 0b11, 0b000, 0b0100, 0b0110, 0b000>; -def : RWSysReg<"ICC_CTLR_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b100>; -def : RWSysReg<"ICC_CTLR_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b100>; -def : RWSysReg<"ICC_SRE_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b101>; -def : RWSysReg<"ICC_SRE_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b101>; -def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>; -def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>; -def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>; -def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>; -def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>; -def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>; -def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>; -def : RWSysReg<"ICC_AP0R3_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b111>; -def : RWSysReg<"ICC_AP1R0_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b000>; -def : RWSysReg<"ICC_AP1R1_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b001>; -def : RWSysReg<"ICC_AP1R2_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b010>; -def : RWSysReg<"ICC_AP1R3_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b011>; -def : RWSysReg<"ICH_AP0R0_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b000>; -def : RWSysReg<"ICH_AP0R1_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b001>; -def : RWSysReg<"ICH_AP0R2_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b010>; -def : RWSysReg<"ICH_AP0R3_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b011>; -def : RWSysReg<"ICH_AP1R0_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b000>; -def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>; -def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>; -def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>; -def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>; -def : ROSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>; -def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>; -def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>; -def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>; -def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>; -def : RWSysReg<"ICH_LR3_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b011>; -def : RWSysReg<"ICH_LR4_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b100>; -def : RWSysReg<"ICH_LR5_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b101>; -def : RWSysReg<"ICH_LR6_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b110>; -def : RWSysReg<"ICH_LR7_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b111>; -def : RWSysReg<"ICH_LR8_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b000>; -def : RWSysReg<"ICH_LR9_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b001>; -def : RWSysReg<"ICH_LR10_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b010>; -def : RWSysReg<"ICH_LR11_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b011>; -def : RWSysReg<"ICH_LR12_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b100>; -def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>; -def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>; -def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>; - -// v8r system registers -let Requires = [{ {AArch64::HasV8_0rOps} }] in { -//Virtualization System Control Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> { - let AltName = "TTBR0_EL2"; -} - -//MPU Type Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"MPUIR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b100>; -def : RWSysReg<"MPUIR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b100>; - -//Protection Region Enable Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"PRENR_EL1", 0b11, 0b000, 0b0110, 0b0001, 0b001>; -def : RWSysReg<"PRENR_EL2", 0b11, 0b100, 0b0110, 0b0001, 0b001>; - -//Protection Region Selection Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"PRSELR_EL1", 0b11, 0b000, 0b0110, 0b0010, 0b001>; -def : RWSysReg<"PRSELR_EL2", 0b11, 0b100, 0b0110, 0b0010, 0b001>; - -//Protection Region Base Address Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"PRBAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b000>; -def : RWSysReg<"PRBAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b000>; - -//Protection Region Limit Address Register -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"PRLAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b001>; -def : RWSysReg<"PRLAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b001>; - -foreach n = 1-15 in { -foreach x = 1-2 in { -//Direct acces to Protection Region Base Address Register for n th MPU region - def : RWSysReg{ - let Encoding{5-2} = n; - let Encoding{13} = !add(x,-1); - } - - def : RWSysReg{ - let Encoding{5-2} = n; - let Encoding{13} = !add(x,-1); - } -} //foreach x = 1-2 in -} //foreach n = 1-15 in -} //let Requires = [{ {AArch64::HasV8_0rOps} }] in - -// v8.1a "Privileged Access Never" extension-specific system registers -let Requires = [{ {AArch64::FeaturePAN} }] in -def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>; - -// v8.1a "Limited Ordering Regions" extension-specific system registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureLOR} }] in { -def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>; -def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>; -def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>; -def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>; -} - -// v8.1a "Virtualization Host extensions" system registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureVH} }] in { -def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>; -def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>; -def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>; -def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>; -def : RWSysReg<"SCTLR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b000>; -def : RWSysReg<"CPACR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b010>; -def : RWSysReg<"TTBR0_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b000>; -def : RWSysReg<"TTBR1_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b001>; -def : RWSysReg<"TCR_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b010>; -def : RWSysReg<"AFSR0_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b000>; -def : RWSysReg<"AFSR1_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b001>; -def : RWSysReg<"ESR_EL12", 0b11, 0b101, 0b0101, 0b0010, 0b000>; -def : RWSysReg<"FAR_EL12", 0b11, 0b101, 0b0110, 0b0000, 0b000>; -def : RWSysReg<"MAIR_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b000>; -def : RWSysReg<"AMAIR_EL12", 0b11, 0b101, 0b1010, 0b0011, 0b000>; -def : RWSysReg<"VBAR_EL12", 0b11, 0b101, 0b1100, 0b0000, 0b000>; -def : RWSysReg<"CONTEXTIDR_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b001>; -def : RWSysReg<"CNTKCTL_EL12", 0b11, 0b101, 0b1110, 0b0001, 0b000>; -def : RWSysReg<"CNTP_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b000>; -def : RWSysReg<"CNTP_CTL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b001>; -def : RWSysReg<"CNTP_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b010>; -def : RWSysReg<"CNTV_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b000>; -def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>; -def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>; -def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>; -def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>; -let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in { - def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; -} -} -// v8.2a registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeaturePsUAO} }] in -def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>; - -// v8.2a "Statistical Profiling extension" registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSPE} }] in { -def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>; -def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>; -def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>; -def : ROSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>; -def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>; -def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>; -def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>; -def : RWSysReg<"PMSICR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b010>; -def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>; -def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>; -def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>; -def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>; -def : ROSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>; -} - -// v8.2a "RAS extension" registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureRAS} }] in { -def : RWSysReg<"ERRSELR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b001>; -def : RWSysReg<"ERXCTLR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b001>; -def : RWSysReg<"ERXSTATUS_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b010>; -def : RWSysReg<"ERXADDR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b011>; -def : RWSysReg<"ERXMISC0_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b000>; -def : RWSysReg<"ERXMISC1_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b001>; -def : RWSysReg<"DISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b001>; -def : RWSysReg<"VDISR_EL2", 0b11, 0b100, 0b1100, 0b0001, 0b001>; -def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>; -} - -// v8.3a "Pointer authentication extension" registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeaturePAuth} }] in { -def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>; -def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>; -def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>; -def : RWSysReg<"APIBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b011>; -def : RWSysReg<"APDAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b000>; -def : RWSysReg<"APDAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b001>; -def : RWSysReg<"APDBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b010>; -def : RWSysReg<"APDBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b011>; -def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>; -def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>; -} - -// v8.4 "Secure Exception Level 2 extension" -let Requires = [{ {AArch64::FeatureSEL2} }] in { -// v8.4a "Virtualization secure second stage translation" registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>; -def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> { - let Requires = [{ {AArch64::HasV8_0aOps} }]; -} - -// v8.4a "Virtualization timer" registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"CNTHVS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b000>; -def : RWSysReg<"CNTHVS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b010>; -def : RWSysReg<"CNTHVS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b001>; -def : RWSysReg<"CNTHPS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b000>; -def : RWSysReg<"CNTHPS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b010>; -def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>; - -// v8.4a "Virtualization debug state" registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; -} // FeatureSEL2 - -// v8.4a RAS registers -// Op0 Op1 CRn CRm Op2 -def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>; -def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>; -def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>; -def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>; -def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; - -// v8.4a MPAM registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureMPAM} }] in { -def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; -def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>; -def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>; -def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>; -def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>; -def : RWSysReg<"MPAMVPM2_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b010>; -def : RWSysReg<"MPAMVPM3_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b011>; -def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>; -def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>; -def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>; -def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>; -def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; -} //FeatureMPAM - -// v8.4a Activity Monitor registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureAM} }] in { -def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>; -def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>; -def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>; -def : RWSysReg<"AMUSERENR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b011>; -def : RWSysReg<"AMCNTENCLR0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b100>; -def : RWSysReg<"AMCNTENSET0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b101>; -def : RWSysReg<"AMEVCNTR00_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b000>; -def : RWSysReg<"AMEVCNTR01_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b001>; -def : RWSysReg<"AMEVCNTR02_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b010>; -def : RWSysReg<"AMEVCNTR03_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b011>; -def : ROSysReg<"AMEVTYPER00_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b000>; -def : ROSysReg<"AMEVTYPER01_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b001>; -def : ROSysReg<"AMEVTYPER02_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b010>; -def : ROSysReg<"AMEVTYPER03_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b011>; -def : RWSysReg<"AMCNTENCLR1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b000>; -def : RWSysReg<"AMCNTENSET1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b001>; -def : RWSysReg<"AMEVCNTR10_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b000>; -def : RWSysReg<"AMEVCNTR11_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b001>; -def : RWSysReg<"AMEVCNTR12_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b010>; -def : RWSysReg<"AMEVCNTR13_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b011>; -def : RWSysReg<"AMEVCNTR14_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b100>; -def : RWSysReg<"AMEVCNTR15_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b101>; -def : RWSysReg<"AMEVCNTR16_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b110>; -def : RWSysReg<"AMEVCNTR17_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b111>; -def : RWSysReg<"AMEVCNTR18_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b000>; -def : RWSysReg<"AMEVCNTR19_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b001>; -def : RWSysReg<"AMEVCNTR110_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b010>; -def : RWSysReg<"AMEVCNTR111_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b011>; -def : RWSysReg<"AMEVCNTR112_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b100>; -def : RWSysReg<"AMEVCNTR113_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b101>; -def : RWSysReg<"AMEVCNTR114_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b110>; -def : RWSysReg<"AMEVCNTR115_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b111>; -def : RWSysReg<"AMEVTYPER10_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b000>; -def : RWSysReg<"AMEVTYPER11_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b001>; -def : RWSysReg<"AMEVTYPER12_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b010>; -def : RWSysReg<"AMEVTYPER13_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b011>; -def : RWSysReg<"AMEVTYPER14_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b100>; -def : RWSysReg<"AMEVTYPER15_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b101>; -def : RWSysReg<"AMEVTYPER16_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b110>; -def : RWSysReg<"AMEVTYPER17_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b111>; -def : RWSysReg<"AMEVTYPER18_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b000>; -def : RWSysReg<"AMEVTYPER19_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b001>; -def : RWSysReg<"AMEVTYPER110_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b010>; -def : RWSysReg<"AMEVTYPER111_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b011>; -def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>; -def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>; -def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>; -def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>; -} //FeatureAM - -// v8.4a Trace Extension registers -// -// Please note that the 8.4 spec also defines these registers: -// TRCIDR1, ID_DFR0_EL1, ID_AA64DFR0_EL1, MDSCR_EL1, MDCR_EL2, and MDCR_EL3, -// but they are already defined above. -// -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in { -def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>; -def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>; -def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>; -} //FeatureTRACEV8_4 - -// v8.4a Timing insensitivity of data processing instructions -// DIT: Data Independent Timing instructions -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureDIT} }] in { -def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>; -} //FeatureDIT - -// v8.4a Enhanced Support for Nested Virtualization -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureNV} }] in { -def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>; -} //FeatureNV - -// SVE control registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSVE} }] in { -def : RWSysReg<"ZCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b000>; -def : RWSysReg<"ZCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b000>; -def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>; -def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>; -} - -// V8.5a Spectre mitigation SSBS register -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSSBS} }] in -def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>; - -// v8.5a Memory Tagging Extension -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureMTE} }] in { -def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>; -def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>; -def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>; -def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b000>; -def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0101, 0b0110, 0b000>; -def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0101, 0b0110, 0b000>; -def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0101, 0b0110, 0b000>; -def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b001>; -def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>; -} // HasMTE - -// Embedded Trace Extension R/W System registers -let Requires = [{ {AArch64::FeatureETE} }] in { -// Name Op0 Op1 CRn CRm Op2 -def : RWSysReg<"TRCRSR", 0b10, 0b001, 0b0000, 0b1010, 0b000>; -// TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR -def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>; -def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>; -def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>; -def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>; -} // FeatureETE - -// Trace Buffer Extension System registers -let Requires = [{ {AArch64::FeatureTRBE} }] in { -// Name Op0 Op1 CRn CRm Op2 -def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>; -def : RWSysReg<"TRBPTR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b001>; -def : RWSysReg<"TRBBASER_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b010>; -def : RWSysReg<"TRBSR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b011>; -def : RWSysReg<"TRBMAR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b100>; -def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>; -def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>; -} // FeatureTRBE - - -// v8.6a Activity Monitors Virtualization Support -let Requires = [{ {AArch64::FeatureAMVS} }] in { -foreach n = 0-15 in { - foreach x = 0-1 in { - def : RWSysReg<"AMEVCNTVOFF"#x#n#"_EL2", - 0b11, 0b100, 0b1101, 0b1000, 0b000>{ - let Encoding{4} = x; - let Encoding{3-0} = n; - } - } -} -} - -// v8.6a Fine Grained Virtualization Traps -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureFineGrainedTraps} }] in { -def : RWSysReg<"HFGRTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b100>; -def : RWSysReg<"HFGWTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b101>; -def : RWSysReg<"HFGITR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b110>; -def : RWSysReg<"HDFGRTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b100>; -def : RWSysReg<"HDFGWTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b101>; -} - -// v8.6a Enhanced Counter Virtualization -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureEnhancedCounterVirtualization} }] in { -def : RWSysReg<"CNTSCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b100>; -def : RWSysReg<"CNTISCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b101>; -def : RWSysReg<"CNTPOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b110>; -def : RWSysReg<"CNTVFRQ_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b111>; -def : RWSysReg<"CNTPCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b101>; -def : RWSysReg<"CNTVCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b110>; -} - -// v8.7a LD64B/ST64B Accelerator Extension system register -let Requires = [{ {AArch64::FeatureLS64} }] in -def : RWSysReg<"ACCDATA_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b101>; - -// Branch Record Buffer system registers -let Requires = [{ {AArch64::FeatureBRBE} }] in { -def : RWSysReg<"BRBCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b000>; -def : RWSysReg<"BRBCR_EL12", 0b10, 0b101, 0b1001, 0b0000, 0b000>; -def : RWSysReg<"BRBCR_EL2", 0b10, 0b100, 0b1001, 0b0000, 0b000>; -def : RWSysReg<"BRBFCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b001>; -def : ROSysReg<"BRBIDR0_EL1", 0b10, 0b001, 0b1001, 0b0010, 0b000>; -def : RWSysReg<"BRBINFINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b000>; -def : RWSysReg<"BRBSRCINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b001>; -def : RWSysReg<"BRBTGTINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b010>; -def : RWSysReg<"BRBTS_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b010>; -foreach n = 0-31 in { - defvar nb = !cast>(n); - def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>; - def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>; - def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>; -} -} - -// Statistical Profiling Extension system register -let Requires = [{ {AArch64::FeatureSPE_EEF} }] in -def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>; - -// Cyclone specific system registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in -def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>; - -// Scalable Matrix Extension (SME) -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureSME} }] in { -def : RWSysReg<"SMCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b110>; -def : RWSysReg<"SMCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b110>; -def : RWSysReg<"SMCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b110>; -def : RWSysReg<"SMCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b110>; -def : RWSysReg<"SVCR", 0b11, 0b011, 0b0100, 0b0010, 0b010>; -def : RWSysReg<"SMPRI_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b100>; -def : RWSysReg<"SMPRIMAP_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b101>; -def : ROSysReg<"SMIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b110>; -def : RWSysReg<"TPIDR2_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b101>; -} // HasSME - -// v8.4a MPAM and SME registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureMPAM, AArch64::FeatureSME} }] in { -def : RWSysReg<"MPAMSM_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b011>; -} // HasMPAM, HasSME diff --git a/suite/synctools/tablegen/AArch64/SMEInstrFormats.td b/suite/synctools/tablegen/AArch64/SMEInstrFormats.td deleted file mode 100644 index 41f2cead4c..0000000000 --- a/suite/synctools/tablegen/AArch64/SMEInstrFormats.td +++ /dev/null @@ -1,726 +0,0 @@ -//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// SME Outer Products -//===----------------------------------------------------------------------===// - -class sme_fp_outer_product_inst - : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), - mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", - "", []>, - Sched<[]> { - bits<5> Zm; - bits<3> Pm; - bits<3> Pn; - bits<5> Zn; - let Inst{31-23} = 0b100000001; - let Inst{22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-13} = Pm; - let Inst{12-10} = Pn; - let Inst{9-5} = Zn; - let Inst{4} = S; - let Inst{3} = 0b0; -} - -class sme_outer_product_fp32 - : sme_fp_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; -} - -class sme_outer_product_fp64 - : sme_fp_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; -} - -class sme_int_outer_product_inst - : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), - mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", - "", []>, - Sched<[]> { - bits<5> Zm; - bits<3> Pm; - bits<3> Pn; - bits<5> Zn; - let Inst{31-25} = 0b1010000; - let Inst{24} = u0; - let Inst{23} = 0b1; - let Inst{22} = sz; - let Inst{21} = u1; - let Inst{20-16} = Zm; - let Inst{15-13} = Pm; - let Inst{12-10} = Pn; - let Inst{9-5} = Zn; - let Inst{4} = S; - let Inst{3} = 0b0; -} - -class sme_int_outer_product_i32 opc, string mnemonic> - : sme_int_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; -} - -class sme_int_outer_product_i64 opc, string mnemonic> - : sme_int_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; -} - -class sme_outer_product_widening_inst - : I<(outs TileOp32:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), - mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", - "", []>, - Sched<[]> { - bits<5> Zm; - bits<3> Pm; - bits<3> Pn; - bits<5> Zn; - bits<2> ZAda; - let Inst{31-22} = 0b1000000110; - let Inst{21} = op; - let Inst{20-16} = Zm; - let Inst{15-13} = Pm; - let Inst{12-10} = Pn; - let Inst{9-5} = Zn; - let Inst{4} = S; - let Inst{3-2} = 0b00; - let Inst{1-0} = ZAda; -} - -multiclass sme_bf16_outer_product { - def : sme_outer_product_widening_inst<0b0, S, mnemonic>; -} - -multiclass sme_f16_outer_product { - def : sme_outer_product_widening_inst<0b1, S, mnemonic>; -} - -//===----------------------------------------------------------------------===// -// SME Add Vector to Tile -//===----------------------------------------------------------------------===// - -class sme_add_vector_to_tile_inst - : I<(outs tile_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), - mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", - "", []>, Sched<[]> { - bits<3> Pm; - bits<3> Pn; - bits<5> Zn; - let Inst{31-23} = 0b110000001; - let Inst{22} = op; - let Inst{21-17} = 0b01000; - let Inst{16} = V; - let Inst{15-13} = Pm; - let Inst{12-10} = Pn; - let Inst{9-5} = Zn; - let Inst{4-3} = 0b00; -} - -class sme_add_vector_to_tile_u32 - : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> { - bits<2> ZAda; - let Inst{2} = 0b0; - let Inst{1-0} = ZAda; -} - -class sme_add_vector_to_tile_u64 - : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> { - bits<3> ZAda; - let Inst{2-0} = ZAda; -} - -//===----------------------------------------------------------------------===// -// SME Contiguous Loads -//===----------------------------------------------------------------------===// - -class sme_mem_ld_ss_base msz, dag outs, dag ins, - string mnemonic, string argstr> - : I, Sched<[]> { - bits<5> Rm; - bits<2> Rv; - bits<3> Pg; - bits<5> Rn; - let Inst{31-25} = 0b1110000; - let Inst{24} = Q; - let Inst{23-22} = msz; - let Inst{21} = 0b0; - let Inst{20-16} = Rm; - let Inst{15} = V; - let Inst{14-13} = Rv; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - - let mayLoad = 1; -} - -class sme_mem_ld_ss_inst msz, string mnemonic, - MatrixTileVectorOperand tile_ty, bit is_col, - Operand imm_ty, RegisterOperand gpr_ty> - : sme_mem_ld_ss_base< - Q, is_col, msz, (outs tile_ty:$ZAt), - (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, - gpr_ty:$Rm), - mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">; - -multiclass sme_mem_ss_aliases_base { - def : InstAlias; - // Default XZR offset aliases - def : InstAlias; - def : InstAlias; -} - -multiclass sme_mem_ss_aliases { - defm : sme_mem_ss_aliases_base(inst # _B), - !if(is_col, TileVectorOpV8, TileVectorOpH8), - sme_elm_idx0_15, GPR64shifted8, pg_suffix>; - defm : sme_mem_ss_aliases_base(inst # _H), - !if(is_col, TileVectorOpV16, TileVectorOpH16), - sme_elm_idx0_7, GPR64shifted16, pg_suffix>; - defm : sme_mem_ss_aliases_base(inst # _S), - !if(is_col, TileVectorOpV32, TileVectorOpH32), - sme_elm_idx0_3, GPR64shifted32, pg_suffix>; - defm : sme_mem_ss_aliases_base(inst # _D), - !if(is_col, TileVectorOpV64, TileVectorOpH64), - sme_elm_idx0_1, GPR64shifted64, pg_suffix>; - defm : sme_mem_ss_aliases_base(inst # _Q), - !if(is_col, TileVectorOpV128, TileVectorOpH128), - sme_elm_idx0_0, GPR64shifted128, pg_suffix>; -} - -multiclass sme_mem_ld_ss_aliases { - defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; -} - -multiclass sme_mem_ld_v_ss { - def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", - !if(is_col, TileVectorOpV8, TileVectorOpH8), - is_col, sme_elm_idx0_15, GPR64shifted8> { - bits<4> imm; - let Inst{3-0} = imm; - } - def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h", - !if(is_col, TileVectorOpV16, TileVectorOpH16), - is_col, sme_elm_idx0_7, GPR64shifted16> { - bits<1> ZAt; - bits<3> imm; - let Inst{3} = ZAt; - let Inst{2-0} = imm; - } - def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w", - !if(is_col, TileVectorOpV32, TileVectorOpH32), - is_col, sme_elm_idx0_3, GPR64shifted32> { - bits<2> ZAt; - bits<2> imm; - let Inst{3-2} = ZAt; - let Inst{1-0} = imm; - } - def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d", - !if(is_col, TileVectorOpV64, TileVectorOpH64), - is_col, sme_elm_idx0_1, GPR64shifted64> { - bits<3> ZAt; - bits<1> imm; - let Inst{3-1} = ZAt; - let Inst{0} = imm; - } - def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q", - !if(is_col, TileVectorOpV128, TileVectorOpH128), - is_col, sme_elm_idx0_0, GPR64shifted128> { - bits<4> ZAt; - let Inst{3-0} = ZAt; - } - - defm : sme_mem_ld_ss_aliases; -} - -multiclass sme_mem_ld_ss { - defm _H : sme_mem_ld_v_ss; - defm _V : sme_mem_ld_v_ss; -} - -//===----------------------------------------------------------------------===// -// SME Contiguous Stores -//===----------------------------------------------------------------------===// - -class sme_mem_st_ss_base msz, dag ins, - string mnemonic, string argstr> - : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> { - bits<5> Rm; - bits<2> Rv; - bits<3> Pg; - bits<5> Rn; - let Inst{31-25} = 0b1110000; - let Inst{24} = Q; - let Inst{23-22} = msz; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15} = V; - let Inst{14-13} = Rv; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - - let mayStore = 1; - let hasSideEffects = 1; -} - -class sme_mem_st_ss_inst msz, string mnemonic, - MatrixTileVectorOperand tile_ty, bit is_col, - Operand imm_ty, RegisterOperand gpr_ty> - : sme_mem_st_ss_base< - Q, is_col, msz, - (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, - GPR64sp:$Rn, gpr_ty:$Rm), - mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">; - -multiclass sme_mem_st_ss_aliases { - defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; -} - -multiclass sme_mem_st_v_ss { - def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", - !if(is_col, TileVectorOpV8, TileVectorOpH8), - is_col, sme_elm_idx0_15, GPR64shifted8> { - bits<4> imm; - let Inst{3-0} = imm; - } - def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h", - !if(is_col, TileVectorOpV16, TileVectorOpH16), - is_col, sme_elm_idx0_7, GPR64shifted16> { - bits<1> ZAt; - bits<3> imm; - let Inst{3} = ZAt; - let Inst{2-0} = imm; - } - def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w", - !if(is_col, TileVectorOpV32, TileVectorOpH32), - is_col, sme_elm_idx0_3, GPR64shifted32> { - bits<2> ZAt; - bits<2> imm; - let Inst{3-2} = ZAt; - let Inst{1-0} = imm; - } - def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d", - !if(is_col, TileVectorOpV64, TileVectorOpH64), - is_col, sme_elm_idx0_1, GPR64shifted64> { - bits<3> ZAt; - bits<1> imm; - let Inst{3-1} = ZAt; - let Inst{0} = imm; - } - def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q", - !if(is_col, TileVectorOpV128, TileVectorOpH128), - is_col, sme_elm_idx0_0, GPR64shifted128> { - bits<4> ZAt; - let Inst{3-0} = ZAt; - } - - defm : sme_mem_st_ss_aliases; -} - -multiclass sme_mem_st_ss { - defm _H : sme_mem_st_v_ss; - defm _V : sme_mem_st_v_ss; -} - -//===----------------------------------------------------------------------===// -// SME Save and Restore Array -//===----------------------------------------------------------------------===// - -class sme_spill_fill_inst - : I, - Sched<[]> { - bits<2> Rv; - bits<5> Rn; - bits<4> imm4; - let Inst{31-22} = 0b1110000100; - let Inst{21} = isStore; - let Inst{20-15} = 0b000000; - let Inst{14-13} = Rv; - let Inst{12-10} = 0b000; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = imm4; - - let mayLoad = !not(isStore); - let mayStore = isStore; -} - -multiclass sme_spill_fill { - def NAME : sme_spill_fill_inst; - - def : InstAlias(NAME) MatrixOp:$ZAt, - MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; -} - -multiclass sme_spill { - defm NAME : sme_spill_fill<0b1, (outs), - (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, - sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), - opcodestr>; -} - -multiclass sme_fill { - defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt), - (ins MatrixIndexGPR32Op12_15:$Rv, - sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), - opcodestr>; -} - -//===----------------------------------------------------------------------===// -// Move instructions -//===----------------------------------------------------------------------===// - -class sme_vector_to_tile_base sz, dag outs, dag ins, - string mnemonic, string argstr> - : I, Sched<[]> { - bits<2> Rv; - bits<3> Pg; - bits<5> Zn; - let Inst{31-24} = 0b11000000; - let Inst{23-22} = sz; - let Inst{21-17} = 0b00000; - let Inst{16} = Q; - let Inst{15} = V; - let Inst{14-13} = Rv; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = 0b0; -} - -class sme_vector_to_tile_inst sz, MatrixTileVectorOperand tile_ty, - bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, - string mnemonic> - : sme_vector_to_tile_base; - -multiclass sme_vector_to_tile_aliases { - def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", - (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; -} - -multiclass sme_vector_v_to_tile { - def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, - TileVectorOpH8), - is_col, sme_elm_idx0_15, ZPR8, mnemonic> { - bits<4> imm; - let Inst{3-0} = imm; - } - def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, - TileVectorOpH16), - is_col, sme_elm_idx0_7, ZPR16, mnemonic> { - bits<1> ZAd; - bits<3> imm; - let Inst{3} = ZAd; - let Inst{2-0} = imm; - } - def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, - TileVectorOpH32), - is_col, sme_elm_idx0_3, ZPR32, mnemonic> { - bits<2> ZAd; - bits<2> imm; - let Inst{3-2} = ZAd; - let Inst{1-0} = imm; - } - def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, - TileVectorOpH64), - is_col, sme_elm_idx0_1, ZPR64, mnemonic> { - bits<3> ZAd; - bits<1> imm; - let Inst{3-1} = ZAd; - let Inst{0} = imm; - } - def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, - TileVectorOpH128), - is_col, sme_elm_idx0_0, ZPR128, mnemonic> { - bits<4> ZAd; - bits<1> imm; - let Inst{3-0} = ZAd; - } - - defm : sme_vector_to_tile_aliases(NAME # _B), - !if(is_col, TileVectorOpV8, - TileVectorOpH8), - ZPR8, sme_elm_idx0_15>; - defm : sme_vector_to_tile_aliases(NAME # _H), - !if(is_col, TileVectorOpV16, - TileVectorOpH16), - ZPR16, sme_elm_idx0_7>; - defm : sme_vector_to_tile_aliases(NAME # _S), - !if(is_col, TileVectorOpV32, - TileVectorOpH32), - ZPR32, sme_elm_idx0_3>; - defm : sme_vector_to_tile_aliases(NAME # _D), - !if(is_col, TileVectorOpV64, - TileVectorOpH64), - ZPR64, sme_elm_idx0_1>; - defm : sme_vector_to_tile_aliases(NAME # _Q), - !if(is_col, TileVectorOpV128, - TileVectorOpH128), - ZPR128, sme_elm_idx0_0>; -} - -multiclass sme_vector_to_tile { - defm _H : sme_vector_v_to_tile; - defm _V : sme_vector_v_to_tile; -} - -class sme_tile_to_vector_base sz, dag outs, dag ins, - string mnemonic, string argstr> - : I, Sched<[]> { - bits<2> Rv; - bits<3> Pg; - bits<5> Zd; - let Inst{31-24} = 0b11000000; - let Inst{23-22} = sz; - let Inst{21-17} = 0b00001; - let Inst{16} = Q; - let Inst{15} = V; - let Inst{14-13} = Rv; - let Inst{12-10} = Pg; - let Inst{9} = 0b0; - let Inst{4-0} = Zd; -} - -class sme_tile_to_vector_inst sz, ZPRRegOp zpr_ty, - MatrixTileVectorOperand tile_ty, - bit is_col, Operand imm_ty, string mnemonic> - : sme_tile_to_vector_base; - -multiclass sme_tile_to_vector_aliases { - def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]", - (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>; -} - -multiclass sme_tile_to_vector_v { - def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, - TileVectorOpH8), - is_col, sme_elm_idx0_15, mnemonic> { - bits<4> imm; - let Inst{8-5} = imm; - } - def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16, - TileVectorOpH16), - is_col, sme_elm_idx0_7, mnemonic> { - bits<1> ZAn; - bits<3> imm; - let Inst{8} = ZAn; - let Inst{7-5} = imm; - } - def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32, - TileVectorOpH32), - is_col, sme_elm_idx0_3, mnemonic> { - bits<2> ZAn; - bits<2> imm; - let Inst{8-7} = ZAn; - let Inst{6-5} = imm; - } - def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64, - TileVectorOpH64), - is_col, sme_elm_idx0_1, mnemonic> { - bits<3> ZAn; - bits<1> imm; - let Inst{8-6} = ZAn; - let Inst{5} = imm; - } - def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128, - TileVectorOpH128), - is_col, sme_elm_idx0_0, mnemonic> { - bits<4> ZAn; - let Inst{8-5} = ZAn; - } - - defm : sme_tile_to_vector_aliases(NAME # _B), ZPR8, - !if(is_col, TileVectorOpV8, - TileVectorOpH8), sme_elm_idx0_15>; - defm : sme_tile_to_vector_aliases(NAME # _H), ZPR16, - !if(is_col, TileVectorOpV16, - TileVectorOpH16), sme_elm_idx0_7>; - defm : sme_tile_to_vector_aliases(NAME # _S), ZPR32, - !if(is_col, TileVectorOpV32, - TileVectorOpH32), sme_elm_idx0_3>; - defm : sme_tile_to_vector_aliases(NAME # _D), ZPR64, - !if(is_col, TileVectorOpV64, - TileVectorOpH64), sme_elm_idx0_1>; - defm : sme_tile_to_vector_aliases(NAME # _Q), ZPR128, - !if(is_col, TileVectorOpV128, - TileVectorOpH128), sme_elm_idx0_0>; -} - -multiclass sme_tile_to_vector { - defm _H : sme_tile_to_vector_v; - defm _V : sme_tile_to_vector_v; -} - -//===----------------------------------------------------------------------===// -// SME Zero -//===----------------------------------------------------------------------===// - -class sme_zero_inst - : I<(outs MatrixTileList:$imm), (ins), - mnemonic, "\t$imm", "", []>, Sched<[]> { - bits<8> imm; - let Inst{31-8} = 0b110000000000100000000000; - let Inst{7-0} = imm; -} - -multiclass sme_zero { - def NAME : sme_zero_inst; - - def : InstAlias<"zero\t\\{za\\}", (!cast(NAME) 0b11111111), 1>; - def : InstAlias<"zero\t\\{za0.h\\}", (!cast(NAME) 0b01010101), 1>; - def : InstAlias<"zero\t\\{za1.h\\}", (!cast(NAME) 0b10101010), 1>; - def : InstAlias<"zero\t\\{za0.s\\}", (!cast(NAME) 0b00010001), 1>; - def : InstAlias<"zero\t\\{za1.s\\}", (!cast(NAME) 0b00100010), 1>; - def : InstAlias<"zero\t\\{za2.s\\}", (!cast(NAME) 0b01000100), 1>; - def : InstAlias<"zero\t\\{za3.s\\}", (!cast(NAME) 0b10001000), 1>; - def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast(NAME) 0b00110011), 1>; - def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast(NAME) 0b10011001), 1>; - def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast(NAME) 0b01100110), 1>; - def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast(NAME) 0b11001100), 1>; - def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast(NAME) 0b01110111), 1>; - def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast(NAME) 0b10111011), 1>; - def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; - def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Instructions -//===----------------------------------------------------------------------===// - -class sve2_int_perm_revd - : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", "", []>, - Sched<[]> { - bits<5> Zd; - bits<3> Pg; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = 0b00; // size - let Inst{21-13} = 0b101110100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveUnary; - let ElementSize = ZPR128.ElementSize; -} - -class sve2_clamp sz, bit U, ZPRRegOp zpr_ty> - : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), - asm, "\t$Zd, $Zn, $Zm", "", []>, - Sched<[]> { - bits<5> Zm; - bits<5> Zn; - bits<5> Zd; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-11} = 0b11000; - let Inst{10} = U; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zpr_ty.ElementSize; -} - -multiclass sve2_clamp { - def _B : sve2_clamp; - def _H : sve2_clamp; - def _S : sve2_clamp; - def _D : sve2_clamp; -} - -class sve2_int_perm_sel_p - : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm, - MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), - asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, - Sched<[]> { - bits<2> Rv; - bits<4> Pn; - bits<4> Pm; - bits<4> Pd; - let Inst{31-24} = 0b00100101; - let Inst{21} = 0b1; - let Inst{17-16} = Rv; - let Inst{15-14} = 0b01; - let Inst{13-10} = Pn; - let Inst{9} = 0b0; - let Inst{8-5} = Pm; - let Inst{4} = 0b0; - let Inst{3-0} = Pd; -} - -multiclass sve2_int_perm_sel_p { - def _B : sve2_int_perm_sel_p { - bits<4> imm; - let Inst{23-22} = imm{3-2}; - let Inst{20-19} = imm{1-0}; - let Inst{18} = 0b1; - } - def _H : sve2_int_perm_sel_p { - bits<3> imm; - let Inst{23-22} = imm{2-1}; - let Inst{20} = imm{0}; - let Inst{19-18} = 0b10; - } - def _S : sve2_int_perm_sel_p { - bits<2> imm; - let Inst{23-22} = imm{1-0}; - let Inst{20-18} = 0b100; - } - def _D : sve2_int_perm_sel_p { - bits<1> imm; - let Inst{23} = imm; - let Inst{22} = 0b1; - let Inst{20-18} = 0b000; - } -} diff --git a/suite/synctools/tablegen/AArch64/SVEInstrFormats.td b/suite/synctools/tablegen/AArch64/SVEInstrFormats.td deleted file mode 100644 index 37b2ac4d87..0000000000 --- a/suite/synctools/tablegen/AArch64/SVEInstrFormats.td +++ /dev/null @@ -1,8515 +0,0 @@ -//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions. -// -//===----------------------------------------------------------------------===// - -def SDT_AArch64Setcc : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<0, i1>, SDTCVecEltisVT<1, i1>, SDTCisSameAs<2, 3>, - SDTCisVT<4, OtherVT> -]>; - -def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>; - -def SVEPatternOperand : AsmOperandClass { - let Name = "SVEPattern"; - let ParserMethod = "tryParseSVEPattern"; - let PredicateMethod = "isSVEPattern"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "InvalidSVEPattern"; -} - -def sve_pred_enum : Operand, TImmLeaf { - - let PrintMethod = "printSVEPattern"; - let ParserMatchClass = SVEPatternOperand; -} - -def SVEPrefetchOperand : AsmOperandClass { - let Name = "SVEPrefetch"; - let ParserMethod = "tryParsePrefetch"; - let PredicateMethod = "isPrefetch"; - let RenderMethod = "addPrefetchOperands"; -} - -def sve_prfop : Operand, TImmLeaf { - let PrintMethod = "printPrefetchOp"; - let ParserMatchClass = SVEPrefetchOperand; -} - -class SVELogicalImmOperand : AsmOperandClass { - let Name = "SVELogicalImm" # Width; - let DiagnosticType = "LogicalSecondSource"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmOperands"; -} - -def sve_logical_imm8 : Operand { - let ParserMatchClass = SVELogicalImmOperand<8>; - let PrintMethod = "printLogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val); - }]; -} - -def sve_logical_imm16 : Operand { - let ParserMatchClass = SVELogicalImmOperand<16>; - let PrintMethod = "printLogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val); - }]; -} - -def sve_logical_imm32 : Operand { - let ParserMatchClass = SVELogicalImmOperand<32>; - let PrintMethod = "printLogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val); - }]; -} - -class SVEPreferredLogicalImmOperand : AsmOperandClass { - let Name = "SVEPreferredLogicalImm" # Width; - let PredicateMethod = "isSVEPreferredLogicalImm"; - let RenderMethod = "addLogicalImmOperands"; -} - -def sve_preferred_logical_imm16 : Operand { - let ParserMatchClass = SVEPreferredLogicalImmOperand<16>; - let PrintMethod = "printSVELogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && - AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); - }]; -} - -def sve_preferred_logical_imm32 : Operand { - let ParserMatchClass = SVEPreferredLogicalImmOperand<32>; - let PrintMethod = "printSVELogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && - AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); - }]; -} - -def sve_preferred_logical_imm64 : Operand { - let ParserMatchClass = SVEPreferredLogicalImmOperand<64>; - let PrintMethod = "printSVELogicalImm"; - - let MCOperandPredicate = [{ - if (!MCOp.isImm()) - return false; - int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); - return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && - AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); - }]; -} - -class SVELogicalImmNotOperand : AsmOperandClass { - let Name = "SVELogicalImm" # Width # "Not"; - let DiagnosticType = "LogicalSecondSource"; - let PredicateMethod = "isLogicalImm"; - let RenderMethod = "addLogicalImmNotOperands"; -} - -def sve_logical_imm8_not : Operand { - let ParserMatchClass = SVELogicalImmNotOperand<8>; -} - -def sve_logical_imm16_not : Operand { - let ParserMatchClass = SVELogicalImmNotOperand<16>; -} - -def sve_logical_imm32_not : Operand { - let ParserMatchClass = SVELogicalImmNotOperand<32>; -} - -class SVEShiftedImmOperand - : AsmOperandClass { - let Name = "SVE" # Infix # "Imm" # ElementWidth; - let DiagnosticType = "Invalid" # Name; - let RenderMethod = "addImmWithOptionalShiftOperands<8>"; - let ParserMethod = "tryParseImmWithOptionalShift"; - let PredicateMethod = Predicate; -} - -def SVECpyImmOperand8 : SVEShiftedImmOperand<8, "Cpy", "isSVECpyImm">; -def SVECpyImmOperand16 : SVEShiftedImmOperand<16, "Cpy", "isSVECpyImm">; -def SVECpyImmOperand32 : SVEShiftedImmOperand<32, "Cpy", "isSVECpyImm">; -def SVECpyImmOperand64 : SVEShiftedImmOperand<64, "Cpy", "isSVECpyImm">; - -def SVEAddSubImmOperand8 : SVEShiftedImmOperand<8, "AddSub", "isSVEAddSubImm">; -def SVEAddSubImmOperand16 : SVEShiftedImmOperand<16, "AddSub", "isSVEAddSubImm">; -def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm">; -def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm">; - -class imm8_opt_lsl - : Operand { - let EncoderMethod = "getImm8OptLsl"; - let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">"; - let PrintMethod = "printImm8OptLsl<" # printType # ">"; - let ParserMatchClass = OpndClass; - let MIOperandInfo = (ops i32imm, i32imm); -} - -def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8>; -def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16>; -def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32>; -def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64>; - -def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8>; -def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16>; -def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>; -def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>; - -def SVEAddSubImm8Pat : ComplexPattern", []>; -def SVEAddSubImm16Pat : ComplexPattern", []>; -def SVEAddSubImm32Pat : ComplexPattern", []>; -def SVEAddSubImm64Pat : ComplexPattern", []>; - -def SVELogicalImm8Pat : ComplexPattern", []>; -def SVELogicalImm16Pat : ComplexPattern", []>; -def SVELogicalImm32Pat : ComplexPattern", []>; -def SVELogicalImm64Pat : ComplexPattern", []>; - -def SVELogicalImm8NotPat : ComplexPattern", []>; -def SVELogicalImm16NotPat : ComplexPattern", []>; -def SVELogicalImm32NotPat : ComplexPattern", []>; -def SVELogicalImm64NotPat : ComplexPattern", []>; - -def SVE8BitLslImm32 : ComplexPattern; -def SVE8BitLslImm64 : ComplexPattern; -class SVE8BitLslImm { - ComplexPattern Pat = !cond( - !eq(ty, i32): SVE8BitLslImm32, - !eq(ty, i64): SVE8BitLslImm64); -} - -def SVEArithUImm8Pat : ComplexPattern", []>; -def SVEArithUImm16Pat : ComplexPattern", []>; -def SVEArithUImm32Pat : ComplexPattern", []>; -def SVEArithUImm64Pat : ComplexPattern", []>; - -def SVEArithSImmPat32 : ComplexPattern; -def SVEArithSImmPat64 : ComplexPattern; - -def SVEShiftImmL8 : ComplexPattern", []>; -def SVEShiftImmL16 : ComplexPattern", []>; -def SVEShiftImmL32 : ComplexPattern", []>; -def SVEShiftImmL64 : ComplexPattern", []>; -def SVEShiftImmR8 : ComplexPattern", []>; -def SVEShiftImmR16 : ComplexPattern", []>; -def SVEShiftImmR32 : ComplexPattern", []>; -def SVEShiftImmR64 : ComplexPattern", []>; - -def SVEAllActive : ComplexPattern; - -class SVEExactFPImm : AsmOperandClass { - let Name = "SVEExactFPImmOperand" # Suffix; - let DiagnosticType = "Invalid" # Name; - let ParserMethod = "tryParseFPImm"; - let PredicateMethod = "isExactFPImm<" # ValA # ", " # ValB # ">"; - let RenderMethod = "addExactFPImmOperands<" # ValA # ", " # ValB # ">"; -} - -class SVEExactFPImmOperand : Operand { - let PrintMethod = "printExactFPImm<" # ValA # ", " # ValB # ">"; - let ParserMatchClass = SVEExactFPImm; -} - -def sve_fpimm_half_one - : SVEExactFPImmOperand<"HalfOne", "AArch64ExactFPImm::half", - "AArch64ExactFPImm::one">; -def sve_fpimm_half_two - : SVEExactFPImmOperand<"HalfTwo", "AArch64ExactFPImm::half", - "AArch64ExactFPImm::two">; -def sve_fpimm_zero_one - : SVEExactFPImmOperand<"ZeroOne", "AArch64ExactFPImm::zero", - "AArch64ExactFPImm::one">; - -def sve_incdec_imm : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let ParserMatchClass = Imm1_16Operand; - let EncoderMethod = "getSVEIncDecImm"; - let DecoderMethod = "DecodeSVEIncDecImm"; -} - -// This allows i32 immediate extraction from i64 based arithmetic. -def sve_cnt_mul_imm_i32 : ComplexPattern">; -def sve_cnt_mul_imm_i64 : ComplexPattern">; -def sve_cnt_shl_imm : ComplexPattern">; - -def sve_ext_imm_0_31 : ComplexPattern">; -def sve_ext_imm_0_63 : ComplexPattern">; -def sve_ext_imm_0_127 : ComplexPattern">; -def sve_ext_imm_0_255 : ComplexPattern">; - -def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2), - (int_aarch64_sve_cntp node:$pred, node:$src2), [{ - return N->hasOneUse(); -}]>; - -def step_vector_oneuse : PatFrag<(ops node:$idx), - (step_vector node:$idx), [{ - return N->hasOneUse(); -}]>; - - -//===----------------------------------------------------------------------===// -// SVE PTrue - These are used extensively throughout the pattern matching so -// it's important we define them first. -//===----------------------------------------------------------------------===// - -class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty, - ValueType vt, SDPatternOperator op> -: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern), - asm, "\t$Pd, $pattern", - "", - [(set (vt pprty:$Pd), (op sve_pred_enum:$pattern))]>, Sched<[]> { - bits<4> Pd; - bits<5> pattern; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b011; - let Inst{18-17} = opc{2-1}; - let Inst{16} = opc{0}; - let Inst{15-10} = 0b111000; - let Inst{9-5} = pattern; - let Inst{4} = 0b0; - let Inst{3-0} = Pd; - - let Defs = !if(!eq (opc{0}, 1), [NZCV], []); - let ElementSize = pprty.ElementSize; - let isReMaterializable = 1; -} - -multiclass sve_int_ptrue opc, string asm, SDPatternOperator op> { - def _B : sve_int_ptrue<0b00, opc, asm, PPR8, nxv16i1, op>; - def _H : sve_int_ptrue<0b01, opc, asm, PPR16, nxv8i1, op>; - def _S : sve_int_ptrue<0b10, opc, asm, PPR32, nxv4i1, op>; - def _D : sve_int_ptrue<0b11, opc, asm, PPR64, nxv2i1, op>; - - def : InstAlias(NAME # _B) PPR8:$Pd, 0b11111), 1>; - def : InstAlias(NAME # _H) PPR16:$Pd, 0b11111), 1>; - def : InstAlias(NAME # _S) PPR32:$Pd, 0b11111), 1>; - def : InstAlias(NAME # _D) PPR64:$Pd, 0b11111), 1>; -} - -def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>; - -let Predicates = [HasSVEorStreamingSVE] in { - defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>; - defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>; -} - -//===----------------------------------------------------------------------===// -// SVE pattern match helpers. -//===----------------------------------------------------------------------===// - -class SVE_1_Op_Pat -: Pat<(vtd (op vt1:$Op1)), - (inst $Op1)>; - -class SVE_1_Op_Passthru_Pat -: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), - (inst $Op3, $Op1, $Op2)>; - - -multiclass SVE_1_Op_PassthruUndef_Pat { - def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd undef))), - (inst (IMPLICIT_DEF), $Op1, $Op2)>; - def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, vtd:$Op3)), - (inst $Op3, $Op1, $Op2)>; -} - -// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the -// type of rounding. This is matched by timm0_1 in pattern below and ignored. -class SVE_1_Op_Passthru_Round_Pat -: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), - (inst $Op3, $Op1, $Op2)>; - -multiclass SVE_1_Op_PassthruUndef_Round_Pat{ - def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))), - (inst (IMPLICIT_DEF), $Op1, $Op2)>; - def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)), - (inst $Op3, $Op1, $Op2)>; -} - -class SVE_1_Op_Imm_OptLsl_Reverse_Pat - : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), - (inst $Op1, i32:$imm, i32:$shift)>; - -class SVE_1_Op_Imm_OptLsl_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), - (inst $Op1, i32:$imm, i32:$shift)>; - -class SVE_1_Op_Imm_Arith_All_Active - : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), - (inst $Op1, i32:$imm)>; - -class SVE_1_Op_Imm_Log_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), - (inst $Op1, i64:$imm)>; - -class SVE_2_Op_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2)), - (inst $Op1, $Op2)>; - -class SVE_2_Op_Pred_All_Active -: Pat<(vtd (op (pt (SVEAllActive)), vt1:$Op1, vt2:$Op2)), - (inst $Op1, $Op2)>; - -class SVE_2_Op_Pred_All_Active_Pt -: Pat<(vtd (op (pt (SVEAllActive:$Op1)), vt1:$Op2, vt2:$Op3)), - (inst $Op1, $Op2, $Op3)>; - -class SVE_3_Op_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), - (inst $Op1, $Op2, $Op3)>; - -multiclass SVE_3_Op_Undef_Pat { - def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)), - (inst (IMPLICIT_DEF), $Op1, $Op2)>; - def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)), - (inst $Op1, $Op2, $Op3)>; -} - -class SVE_4_Op_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), - (inst $Op1, $Op2, $Op3, $Op4)>; - -class SVE_2_Op_Imm_Pat -: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))), - (inst $Op1, ImmTy:$Op2)>; - -class SVE_3_Op_Imm_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))), - (inst $Op1, $Op2, ImmTy:$Op3)>; - -class SVE_4_Op_Imm_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))), - (inst $Op1, $Op2, $Op3, ImmTy:$Op4)>; - -def SVEDup0 : ComplexPattern; -def SVEDup0Undef : ComplexPattern; - -let AddedComplexity = 1 in { -class SVE_3_Op_Pat_SelZero -: Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))), - (inst $Op1, $Op2, $Op3)>; - -class SVE_3_Op_Pat_Shift_Imm_SelZero -: Pat<(vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), (i32 (vt3:$Op3)))), - (inst $Op1, $Op2, vt3:$Op3)>; -} - -// -// Common but less generic patterns. -// - -class SVE_1_Op_AllActive_Pat -: Pat<(vtd (op vt1:$Op1)), - (inst (IMPLICIT_DEF), (ptrue 31), $Op1)>; - -class SVE_2_Op_AllActive_Pat -: Pat<(vtd (op vt1:$Op1, vt2:$Op2)), - (inst (ptrue 31), $Op1, $Op2)>; - -class SVE_InReg_Extend -: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)), - (inst $PassThru, $Pg, $Src)>; - -multiclass SVE_InReg_Extend_PassthruUndef { - def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt undef))), - (inst (IMPLICIT_DEF), $Pg, $Src)>; - def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, vt:$PassThru)), - (inst $PassThru, $Pg, $Src)>; -} - -class SVE_Shift_DupImm_Pred_Pat -: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), - (inst $Pg, $Rn, i32:$imm)>; - -class SVE_Shift_DupImm_All_Active_Pat -: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), - (inst $Rn, i32:$imm)>; - -class SVE_2_Op_Fp_Imm_Pat -: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))), - (inst $Pg, $Zs1, imm)>; - -class SVE_2_Op_Fp_Imm_Pat_Zero -: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)), - (vt (AArch64dup (it immL))))), - (inst $Pg, $Zs1, imm)>; - -// -// Pseudo -> Instruction mappings -// -def getSVEPseudoMap : InstrMapping { - let FilterClass = "SVEPseudo2Instr"; - let RowFields = ["PseudoName"]; - let ColFields = ["IsInstr"]; - let KeyCol = ["0"]; - let ValueCols = [["1"]]; -} - -class SVEPseudo2Instr { - string PseudoName = name; - bit IsInstr = instr; -} - -// Lookup e.g. DIV -> DIVR -def getSVERevInstr : InstrMapping { - let FilterClass = "SVEInstr2Rev"; - let RowFields = ["InstrName"]; - let ColFields = ["isReverseInstr"]; - let KeyCol = ["0"]; - let ValueCols = [["1"]]; -} - -// Lookup e.g. DIVR -> DIV -def getSVENonRevInstr : InstrMapping { - let FilterClass = "SVEInstr2Rev"; - let RowFields = ["InstrName"]; - let ColFields = ["isReverseInstr"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - -class SVEInstr2Rev { - string InstrName = !if(name1IsReverseInstr, name1, name2); - bit isReverseInstr = name1IsReverseInstr; -} - -// -// Pseudos for destructive operands -// -let hasNoSchedulingInfo = 1 in { - class PredTwoOpPseudo - : SVEPseudo2Instr, - Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> { - let FalseLanes = flags; - } - - class PredTwoOpImmPseudo - : SVEPseudo2Instr, - Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, immty:$imm), []> { - let FalseLanes = flags; - } - - class PredThreeOpPseudo - : SVEPseudo2Instr, - Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2, zprty:$Zs3), []> { - let FalseLanes = flags; - } -} - -// -// Pseudos for passthru operands -// -let hasNoSchedulingInfo = 1 in { - class PredOneOpPassthruPseudo - : SVEPseudo2Instr, - Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>; -} - -//===----------------------------------------------------------------------===// -// SVE Predicate Misc Group -//===----------------------------------------------------------------------===// - -class sve_int_pfalse opc, string asm> -: I<(outs PPR8:$Pd), (ins), - asm, "\t$Pd", - "", - []>, Sched<[]> { - bits<4> Pd; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = opc{5-4}; - let Inst{21-19} = 0b011; - let Inst{18-16} = opc{3-1}; - let Inst{15-10} = 0b111001; - let Inst{9} = opc{0}; - let Inst{8-4} = 0b00000; - let Inst{3-0} = Pd; - - let isReMaterializable = 1; -} - -multiclass sve_int_pfalse opc, string asm> { - def NAME : sve_int_pfalse; - - def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast(NAME))>; -} - -class sve_int_ptest opc, string asm> -: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), - asm, "\t$Pg, $Pn", - "", - []>, Sched<[]> { - bits<4> Pg; - bits<4> Pn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = opc{5-4}; - let Inst{21-19} = 0b010; - let Inst{18-16} = opc{3-1}; - let Inst{15-14} = 0b11; - let Inst{13-10} = Pg; - let Inst{9} = opc{0}; - let Inst{8-5} = Pn; - let Inst{4-0} = 0b00000; - - let Defs = [NZCV]; - let isCompare = 1; -} - -class sve_int_pfirst_next sz8_64, bits<5> opc, string asm, - PPRRegOp pprty> -: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn), - asm, "\t$Pdn, $Pg, $_Pdn", - "", - []>, Sched<[]> { - bits<4> Pdn; - bits<4> Pg; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b011; - let Inst{18-16} = opc{4-2}; - let Inst{15-11} = 0b11000; - let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; - let Inst{4} = 0; - let Inst{3-0} = Pdn; - - let Constraints = "$Pdn = $_Pdn"; - let Defs = [NZCV]; - let isPTestLike = 1; - let ElementSize = pprty.ElementSize; -} - -multiclass sve_int_pfirst opc, string asm, SDPatternOperator op> { - def _B : sve_int_pfirst_next<0b01, opc, asm, PPR8>; - - def : SVE_2_Op_Pat(NAME # _B)>; -} - -multiclass sve_int_pnext opc, string asm, SDPatternOperator op> { - def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>; - def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>; - def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>; - def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Predicate Count Group -//===----------------------------------------------------------------------===// - -class sve_int_count_r sz8_64, bits<5> opc, string asm, - RegisterOperand dty, PPRRegOp pprty, RegisterOperand sty> -: I<(outs dty:$Rdn), (ins pprty:$Pg, sty:$_Rdn), - asm, "\t$Rdn, $Pg", - "", - []>, Sched<[]> { - bits<5> Rdn; - bits<4> Pg; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b101; - let Inst{18-16} = opc{4-2}; - let Inst{15-11} = 0b10001; - let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; - let Inst{4-0} = Rdn; - - // Signed 32bit forms require their GPR operand printed. - let AsmString = !if(!eq(opc{4,2-0}, 0b0000), - !strconcat(asm, "\t$Rdn, $Pg, $_Rdn"), - !strconcat(asm, "\t$Rdn, $Pg")); - let Constraints = "$Rdn = $_Rdn"; -} - -multiclass sve_int_count_r_s32 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>; - def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>; - def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>; - def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>; - - def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))), - (EXTRACT_SUBREG (!cast(NAME # _B) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; - def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))))), - (!cast(NAME # _B) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>; - - def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))), - (EXTRACT_SUBREG (!cast(NAME # _H) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; - def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))))), - (!cast(NAME # _H) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>; - - def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))), - (EXTRACT_SUBREG (!cast(NAME # _S) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; - def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))))), - (!cast(NAME # _S) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>; - - def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))), - (EXTRACT_SUBREG (!cast(NAME # _D) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; - def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))))), - (!cast(NAME # _D) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>; -} - -multiclass sve_int_count_r_u32 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>; - def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>; - def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>; - def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>; - - def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))), - (!cast(NAME # _B) PPRAny:$Pg, $Rn)>; - def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))), - (!cast(NAME # _H) PPRAny:$Pg, $Rn)>; - def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))), - (!cast(NAME # _S) PPRAny:$Pg, $Rn)>; - def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))), - (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; -} - -multiclass sve_int_count_r_x64 opc, string asm, - SDPatternOperator op, - SDPatternOperator combine_op = null_frag> { - def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>; - def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>; - def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>; - def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>; - - def : Pat<(i64 (op GPR64:$Rn, (nxv16i1 PPRAny:$Pg))), - (!cast(NAME # _B) PPRAny:$Pg, $Rn)>; - def : Pat<(i64 (op GPR64:$Rn, (nxv8i1 PPRAny:$Pg))), - (!cast(NAME # _H) PPRAny:$Pg, $Rn)>; - def : Pat<(i64 (op GPR64:$Rn, (nxv4i1 PPRAny:$Pg))), - (!cast(NAME # _S) PPRAny:$Pg, $Rn)>; - def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))), - (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; - - // combine_op(x, cntp(all_active, p)) ==> inst p, x - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))), - (!cast(NAME # _B) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))), - (!cast(NAME # _H) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 (SVEAllActive)), (nxv4i1 PPRAny:$pred)))), - (!cast(NAME # _S) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))), - (!cast(NAME # _D) PPRAny:$pred, $Rn)>; - - // combine_op(x, cntp(p, p)) ==> inst p, x - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred)))), - (!cast(NAME # _B) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred)))), - (!cast(NAME # _H) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred)))), - (!cast(NAME # _S) PPRAny:$pred, $Rn)>; - def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))), - (!cast(NAME # _D) PPRAny:$pred, $Rn)>; -} - -class sve_int_count_v sz8_64, bits<5> opc, string asm, - ZPRRegOp zprty, PPRRegOp pprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm), - asm, "\t$Zdn, $Pm", - "", - []>, Sched<[]> { - bits<4> Pm; - bits<5> Zdn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b101; - let Inst{18-16} = opc{4-2}; - let Inst{15-11} = 0b10000; - let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_count_v opc, string asm, - SDPatternOperator op = null_frag> { - def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; - def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; - def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : InstAlias(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; - def : InstAlias(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>; - def : InstAlias(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; -} - -class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, - PPRRegOp pprty> -: I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn), - asm, "\t$Rd, $Pg, $Pn", - "", - []>, Sched<[]> { - bits<4> Pg; - bits<4> Pn; - bits<5> Rd; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b100; - let Inst{18-16} = opc{3-1}; - let Inst{15-14} = 0b10; - let Inst{13-10} = Pg; - let Inst{9} = opc{0}; - let Inst{8-5} = Pn; - let Inst{4-0} = Rd; -} - -multiclass sve_int_pcount_pred opc, string asm, - SDPatternOperator int_op> { - def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; - def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; - def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; - def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Element Count Group -//===----------------------------------------------------------------------===// - -class sve_int_count opc, string asm> -: I<(outs GPR64:$Rd), (ins sve_pred_enum:$pattern, sve_incdec_imm:$imm4), - asm, "\t$Rd, $pattern, mul $imm4", - "", - []>, Sched<[]> { - bits<5> Rd; - bits<4> imm4; - bits<5> pattern; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{2-1}; - let Inst{21-20} = 0b10; - let Inst{19-16} = imm4; - let Inst{15-11} = 0b11100; - let Inst{10} = opc{0}; - let Inst{9-5} = pattern; - let Inst{4-0} = Rd; -} - -multiclass sve_int_count opc, string asm, SDPatternOperator op> { - def NAME : sve_int_count; - - def : InstAlias(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR64:$Rd, 0b11111, 1), 2>; - - def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm))), - (!cast(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; - - def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm))), - (!cast(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; - - def : Pat<(i64 (op sve_pred_enum:$pattern)), - (!cast(NAME) sve_pred_enum:$pattern, 1)>; -} - -class sve_int_countvlv opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), - asm, "\t$Zdn, $pattern, mul $imm4", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> pattern; - bits<4> imm4; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{4-3}; - let Inst{21} = 0b1; - let Inst{20} = opc{2}; - let Inst{19-16} = imm4; - let Inst{15-12} = 0b1100; - let Inst{11-10} = opc{1-0}; - let Inst{9-5} = pattern; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_countvlv opc, string asm, ZPRRegOp zprty, - SDPatternOperator op = null_frag, - ValueType vt = OtherVT> { - def NAME : sve_int_countvlv; - - def : InstAlias(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) zprty:$Zdn, 0b11111, 1), 2>; - - def : Pat<(vt (op (vt zprty:$Zn), (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), - (!cast(NAME) $Zn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; -} - -class sve_int_pred_pattern_a opc, string asm> -: I<(outs GPR64:$Rdn), (ins GPR64:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), - asm, "\t$Rdn, $pattern, mul $imm4", - "", - []>, Sched<[]> { - bits<5> Rdn; - bits<5> pattern; - bits<4> imm4; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{2-1}; - let Inst{21-20} = 0b11; - let Inst{19-16} = imm4; - let Inst{15-11} = 0b11100; - let Inst{10} = opc{0}; - let Inst{9-5} = pattern; - let Inst{4-0} = Rdn; - - let Constraints = "$Rdn = $_Rdn"; -} - -multiclass sve_int_pred_pattern_a opc, string asm, - SDPatternOperator op, - SDPatternOperator opcnt> { - let Predicates = [HasSVEorStreamingSVE] in { - def NAME : sve_int_pred_pattern_a; - - def : InstAlias(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR64:$Rdn, 0b11111, 1), 2>; - } - - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in { - def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))), - (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>; - - def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm)))), - (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; - - def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm)))), - (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; - - def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))), - (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1), - sub_32))>; - - def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm_i32 i32:$imm)))), - (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), - sub_32))>; - - def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_shl_imm i32:$imm)))), - (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), - sub_32))>; - } -} - -class sve_int_pred_pattern_b opc, string asm, RegisterOperand dt, - RegisterOperand st> -: I<(outs dt:$Rdn), (ins st:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), - asm, "\t$Rdn, $pattern, mul $imm4", - "", - []>, Sched<[]> { - bits<5> Rdn; - bits<5> pattern; - bits<4> imm4; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{4-3}; - let Inst{21} = 0b1; - let Inst{20} = opc{2}; - let Inst{19-16} = imm4; - let Inst{15-12} = 0b1111; - let Inst{11-10} = opc{1-0}; - let Inst{9-5} = pattern; - let Inst{4-0} = Rdn; - - // Signed 32bit forms require their GPR operand printed. - let AsmString = !if(!eq(opc{2,0}, 0b00), - !strconcat(asm, "\t$Rdn, $_Rdn, $pattern, mul $imm4"), - !strconcat(asm, "\t$Rdn, $pattern, mul $imm4")); - - let Constraints = "$Rdn = $_Rdn"; -} - -multiclass sve_int_pred_pattern_b_s32 opc, string asm, - SDPatternOperator op> { - def NAME : sve_int_pred_pattern_b; - - def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>; - - // NOTE: Register allocation doesn't like tied operands of differing register - // class, hence the extra INSERT_SUBREG complication. - - def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), - (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32), sve_pred_enum:$pattern, sve_incdec_imm:$imm4), sub_32)>; - def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))))), - (!cast(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32), sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; -} - -multiclass sve_int_pred_pattern_b_u32 opc, string asm, - SDPatternOperator op> { - def NAME : sve_int_pred_pattern_b; - - def : InstAlias(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR32z:$Rdn, 0b11111, 1), 2>; - - def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), - (!cast(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; -} - -multiclass sve_int_pred_pattern_b_x64 opc, string asm, - SDPatternOperator op> { - def NAME : sve_int_pred_pattern_b; - - def : InstAlias(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR64z:$Rdn, 0b11111, 1), 2>; - - def : Pat<(i64 (op GPR64:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), - (!cast(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Permute - Cross Lane Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, - ValueType vt, RegisterClass srcRegType, - SDPatternOperator op> -: I<(outs zprty:$Zd), (ins srcRegType:$Rn), - asm, "\t$Zd, $Rn", - "", - [(set (vt zprty:$Zd), (op srcRegType:$Rn))]>, Sched<[]> { - bits<5> Rn; - bits<5> Zd; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-10} = 0b100000001110; - let Inst{9-5} = Rn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_dup_r { - def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, nxv16i8, GPR32sp, op>; - def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, nxv8i16, GPR32sp, op>; - def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, nxv4i32, GPR32sp, op>; - def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, nxv2i64, GPR64sp, op>; - - def : InstAlias<"mov $Zd, $Rn", - (!cast(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Rn", - (!cast(NAME # _H) ZPR16:$Zd, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Rn", - (!cast(NAME # _S) ZPR32:$Zd, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Rn", - (!cast(NAME # _D) ZPR64:$Zd, GPR64sp:$Rn), 1>; -} - -class sve_int_perm_dup_i tsz, Operand immtype, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$idx), - asm, "\t$Zd, $Zn$idx", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<7> idx; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = {?,?}; // imm3h - let Inst{21} = 0b1; - let Inst{20-16} = tsz; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_dup_i { - def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_b, asm, ZPR8> { - let Inst{23-22} = idx{5-4}; - let Inst{20-17} = idx{3-0}; - } - def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_h, asm, ZPR16> { - let Inst{23-22} = idx{4-3}; - let Inst{20-18} = idx{2-0}; - } - def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_s, asm, ZPR32> { - let Inst{23-22} = idx{3-2}; - let Inst{20-19} = idx{1-0}; - } - def _D : sve_int_perm_dup_i<{?,1,0,0,0}, sve_elm_idx_extdup_d, asm, ZPR64> { - let Inst{23-22} = idx{2-1}; - let Inst{20} = idx{0}; - } - def _Q : sve_int_perm_dup_i<{1,0,0,0,0}, sve_elm_idx_extdup_q, asm, ZPR128> { - let Inst{23-22} = idx{1-0}; - } - - def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_b:$idx), 1>; - def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_h:$idx), 1>; - def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_s:$idx), 1>; - def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, sve_elm_idx_extdup_d:$idx), 1>; - def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _Q) ZPR128:$Zd, ZPR128:$Zn, sve_elm_idx_extdup_q:$idx), 1>; - def : InstAlias<"mov $Zd, $Bn", - (!cast(NAME # _B) ZPR8:$Zd, FPR8asZPR:$Bn, 0), 2>; - def : InstAlias<"mov $Zd, $Hn", - (!cast(NAME # _H) ZPR16:$Zd, FPR16asZPR:$Hn, 0), 2>; - def : InstAlias<"mov $Zd, $Sn", - (!cast(NAME # _S) ZPR32:$Zd, FPR32asZPR:$Sn, 0), 2>; - def : InstAlias<"mov $Zd, $Dn", - (!cast(NAME # _D) ZPR64:$Zd, FPR64asZPR:$Dn, 0), 2>; - def : InstAlias<"mov $Zd, $Qn", - (!cast(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>; - - // Duplicate extracted element of vector into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))), - (!cast(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), - (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), - (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), - (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv8f16 (AArch64dup (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), - (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), - (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4f16 (AArch64dup (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), - (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f16 (AArch64dup (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), - (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv4f32 (AArch64dup (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), - (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f32 (AArch64dup (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), - (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv2f64 (AArch64dup (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), - (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; -} - -class sve_int_perm_tbl sz8_64, bits<2> opc, string asm, ZPRRegOp zprty, - RegisterOperand VecList> -: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b001; - let Inst{12-11} = opc; - let Inst{10} = 0b0; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_tbl { - def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8, Z_b>; - def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>; - def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>; - def _D : sve_int_perm_tbl<0b11, 0b10, asm, ZPR64, Z_d>; - - def : InstAlias(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>; - def : InstAlias(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 0>; - def : InstAlias(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>; - def : InstAlias(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pat(NAME # _H)>; -} - -multiclass sve2_int_perm_tbl { - def _B : sve_int_perm_tbl<0b00, 0b01, asm, ZPR8, ZZ_b>; - def _H : sve_int_perm_tbl<0b01, 0b01, asm, ZPR16, ZZ_h>; - def _S : sve_int_perm_tbl<0b10, 0b01, asm, ZPR32, ZZ_s>; - def _D : sve_int_perm_tbl<0b11, 0b01, asm, ZPR64, ZZ_d>; - - def : Pat<(nxv16i8 (op nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), - (nxv16i8 (!cast(NAME # _B) (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0, - nxv16i8:$Op2, zsub1), - nxv16i8:$Op3))>; - - def : Pat<(nxv8i16 (op nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), - (nxv8i16 (!cast(NAME # _H) (REG_SEQUENCE ZPR2, nxv8i16:$Op1, zsub0, - nxv8i16:$Op2, zsub1), - nxv8i16:$Op3))>; - - def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), - (nxv4i32 (!cast(NAME # _S) (REG_SEQUENCE ZPR2, nxv4i32:$Op1, zsub0, - nxv4i32:$Op2, zsub1), - nxv4i32:$Op3))>; - - def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), - (nxv2i64 (!cast(NAME # _D) (REG_SEQUENCE ZPR2, nxv2i64:$Op1, zsub0, - nxv2i64:$Op2, zsub1), - nxv2i64:$Op3))>; - - def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8i16:$Op3)), - (nxv8f16 (!cast(NAME # _H) (REG_SEQUENCE ZPR2, nxv8f16:$Op1, zsub0, - nxv8f16:$Op2, zsub1), - nxv8i16:$Op3))>; - - def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4i32:$Op3)), - (nxv4f32 (!cast(NAME # _S) (REG_SEQUENCE ZPR2, nxv4f32:$Op1, zsub0, - nxv4f32:$Op2, zsub1), - nxv4i32:$Op3))>; - - def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2i64:$Op3)), - (nxv2f64 (!cast(NAME # _D) (REG_SEQUENCE ZPR2, nxv2f64:$Op1, zsub0, - nxv2f64:$Op2, zsub1), - nxv2i64:$Op3))>; - - def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)), - (nxv8bf16 (!cast(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, - nxv8bf16:$Op2, zsub1), - nxv8i16:$Op3))>; -} - -class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b001011; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_int_perm_tbx { - def _B : sve2_int_perm_tbx<0b00, asm, ZPR8>; - def _H : sve2_int_perm_tbx<0b01, asm, ZPR16>; - def _S : sve2_int_perm_tbx<0b10, asm, ZPR32>; - def _D : sve2_int_perm_tbx<0b11, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; -} - -class sve_int_perm_reverse_z sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn), - asm, "\t$Zd, $Zn", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-10} = 0b111000001110; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_reverse_z { - def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>; - def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>; - def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>; - def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>; - - def : SVE_1_Op_Pat(NAME # _B)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; - - def : SVE_1_Op_Pat(NAME # _D)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _D)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; - - def : SVE_1_Op_Pat(NAME # _D)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _H)>; -} - -class sve_int_perm_reverse_p sz8_64, string asm, PPRRegOp pprty> -: I<(outs pprty:$Pd), (ins pprty:$Pn), - asm, "\t$Pd, $Pn", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-9} = 0b1101000100000; - let Inst{8-5} = Pn; - let Inst{4} = 0b0; - let Inst{3-0} = Pd; -} - -multiclass sve_int_perm_reverse_p { - def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>; - def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>; - def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>; - def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>; - - def : SVE_1_Op_Pat(NAME # _B)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_unpk sz16_64, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn), - asm, "\t$Zd, $Zn", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz16_64; - let Inst{21-18} = 0b1100; - let Inst{17-16} = opc; - let Inst{15-10} = 0b001110; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_unpk opc, string asm, SDPatternOperator op> { - def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>; - def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>; - def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>; - - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Rm), - asm, "\t$Zdn, $Rm", - "", - []>, Sched<[]> { - bits<5> Rm; - bits<5> Zdn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-10} = 0b100100001110; - let Inst{9-5} = Rm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; -} - -multiclass sve_int_perm_insrs { - def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>; - def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>; - def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>; - def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, - FPRasZPROperand srcOpType> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcOpType:$Vm), - asm, "\t$Zdn, $Vm", - "", - []>, Sched<[]> { - bits<5> Vm; - bits<5> Zdn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-10} = 0b110100001110; - let Inst{9-5} = Vm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; -} - -multiclass sve_int_perm_insrv { - def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8asZPR>; - def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16asZPR>; - def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32asZPR>; - def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64asZPR>; - - def : Pat<(nxv8f16 (op nxv8f16:$Zn, f16:$Vm)), - (!cast(NAME # _H) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, hsub))>; - def : Pat<(nxv4f32 (op nxv4f32:$Zn, f32:$Vm)), - (!cast(NAME # _S) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, ssub))>; - def : Pat<(nxv2f64 (op nxv2f64:$Zn, f64:$Vm)), - (!cast(NAME # _D) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, dsub))>; - - def : Pat<(nxv8bf16 (op nxv8bf16:$Zn, bf16:$Vm)), - (!cast(NAME # _H) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, hsub))>; - - // Keep integer insertions within the vector unit. - def : Pat<(nxv16i8 (op (nxv16i8 ZPR:$Zn), (i32 (vector_extract (nxv16i8 ZPR:$Vm), 0)))), - (!cast(NAME # _B) $Zn, ZPR:$Vm)>; - def : Pat<(nxv8i16 (op (nxv8i16 ZPR:$Zn), (i32 (vector_extract (nxv8i16 ZPR:$Vm), 0)))), - (!cast(NAME # _H) $Zn, ZPR:$Vm)>; - def : Pat<(nxv4i32 (op (nxv4i32 ZPR:$Zn), (i32 (vector_extract (nxv4i32 ZPR:$Vm), 0)))), - (!cast(NAME # _S) $Zn, ZPR: $Vm)>; - def : Pat<(nxv2i64 (op (nxv2i64 ZPR:$Zn), (i64 (vector_extract (nxv2i64 ZPR:$Vm), 0)))), - (!cast(NAME # _D) $Zn, ZPR:$Vm)>; - -} - -//===----------------------------------------------------------------------===// -// SVE Permute - Extract Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_extract_i -: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_255:$imm8), - asm, "\t$Zdn, $_Zdn, $Zm, $imm8", - "", []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - bits<8> imm8; - let Inst{31-21} = 0b00000101001; - let Inst{20-16} = imm8{7-3}; - let Inst{15-13} = 0b000; - let Inst{12-10} = imm8{2-0}; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_perm_extract_i { - def NAME : sve_int_perm_extract_i; - - def : SVE_3_Op_Imm_Pat(NAME)>; -} - -class sve2_int_perm_extract_i_cons -: I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8), - asm, "\t$Zd, $Zn, $imm8", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<8> imm8; - let Inst{31-21} = 0b00000101011; - let Inst{20-16} = imm8{7-3}; - let Inst{15-13} = 0b000; - let Inst{12-10} = imm8{2-0}; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -//===----------------------------------------------------------------------===// -// SVE Vector Select Group -//===----------------------------------------------------------------------===// - -class sve_int_sel_vvv sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins PPRAny:$Pg, zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Pg, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<4> Pg; - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b11; - let Inst{13-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_sel_vvv { - def _B : sve_int_sel_vvv<0b00, asm, ZPR8>; - def _H : sve_int_sel_vvv<0b01, asm, ZPR16>; - def _S : sve_int_sel_vvv<0b10, asm, ZPR32>; - def _D : sve_int_sel_vvv<0b11, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; - - def : InstAlias<"mov $Zd, $Pg/m, $Zn", - (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Zn", - (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, ZPR16:$Zn, ZPR16:$Zd), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Zn", - (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, ZPR32:$Zn, ZPR32:$Zd), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Zn", - (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, ZPR64:$Zn, ZPR64:$Zd), 1>; -} - - -//===----------------------------------------------------------------------===// -// SVE Predicate Logical Operations Group -//===----------------------------------------------------------------------===// - -class sve_int_pred_log opc, string asm> -: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), - asm, "\t$Pd, $Pg/z, $Pn, $Pm", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pg; - bits<4> Pm; - bits<4> Pn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = opc{3-2}; - let Inst{21-20} = 0b00; - let Inst{19-16} = Pm; - let Inst{15-14} = 0b01; - let Inst{13-10} = Pg; - let Inst{9} = opc{1}; - let Inst{8-5} = Pn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - // SEL has no predication qualifier. - let AsmString = !if(!eq(opc, 0b0011), - !strconcat(asm, "\t$Pd, $Pg, $Pn, $Pm"), - !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm")); - - let Defs = !if(!eq (opc{2}, 1), [NZCV], []); - -} - -multiclass sve_int_pred_log opc, string asm, SDPatternOperator op, - SDPatternOperator op_nopred = null_frag> { - def NAME : sve_int_pred_log; - - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; - def : SVE_2_Op_AllActive_Pat(NAME), PTRUE_B>; - def : SVE_2_Op_AllActive_Pat(NAME), PTRUE_H>; - def : SVE_2_Op_AllActive_Pat(NAME), PTRUE_S>; - def : SVE_2_Op_AllActive_Pat(NAME), PTRUE_D>; -} - -// An instance of sve_int_pred_log_and but uses op_nopred's first operand as the -// general predicate. -multiclass sve_int_pred_log_v2 opc, string asm, SDPatternOperator op, - SDPatternOperator op_nopred> : - sve_int_pred_log { - def : Pat<(nxv16i1 (op_nopred nxv16i1:$Op1, nxv16i1:$Op2)), - (!cast(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv8i1 (op_nopred nxv8i1:$Op1, nxv8i1:$Op2)), - (!cast(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv4i1 (op_nopred nxv4i1:$Op1, nxv4i1:$Op2)), - (!cast(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)), - (!cast(NAME) $Op1, $Op1, $Op2)>; -} - -//===----------------------------------------------------------------------===// -// SVE Logical Mask Immediate Group -//===----------------------------------------------------------------------===// - -class sve_int_log_imm opc, string asm> -: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, logical_imm64:$imms13), - asm, "\t$Zdn, $_Zdn, $imms13", - "", []>, Sched<[]> { - bits<5> Zdn; - bits<13> imms13; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = opc; - let Inst{21-18} = 0b0000; - let Inst{17-5} = imms13; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DecoderMethod = "DecodeSVELogicalImmInstruction"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_log_imm opc, string asm, string alias, SDPatternOperator op> { - def NAME : sve_int_log_imm; - - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - - def : InstAlias(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>; - def : InstAlias(NAME) ZPR16:$Zdn, sve_logical_imm16:$imm), 3>; - def : InstAlias(NAME) ZPR32:$Zdn, sve_logical_imm32:$imm), 2>; - - def : InstAlias(NAME) ZPR8:$Zdn, sve_logical_imm8_not:$imm), 0>; - def : InstAlias(NAME) ZPR16:$Zdn, sve_logical_imm16_not:$imm), 0>; - def : InstAlias(NAME) ZPR32:$Zdn, sve_logical_imm32_not:$imm), 0>; - def : InstAlias(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>; -} - -multiclass sve_int_log_imm_bic { - def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; - def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; - def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; - def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; -} - -class sve_int_dup_mask_imm -: I<(outs ZPR64:$Zd), (ins logical_imm64:$imms), - asm, "\t$Zd, $imms", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<13> imms; - let Inst{31-18} = 0b00000101110000; - let Inst{17-5} = imms; - let Inst{4-0} = Zd; - - let isReMaterializable = 1; - let DecoderMethod = "DecodeSVELogicalImmInstruction"; -} - -multiclass sve_int_dup_mask_imm { - def NAME : sve_int_dup_mask_imm; - - def : InstAlias<"dupm $Zd, $imm", - (!cast(NAME) ZPR8:$Zd, sve_logical_imm8:$imm), 4>; - def : InstAlias<"dupm $Zd, $imm", - (!cast(NAME) ZPR16:$Zd, sve_logical_imm16:$imm), 3>; - def : InstAlias<"dupm $Zd, $imm", - (!cast(NAME) ZPR32:$Zd, sve_logical_imm32:$imm), 2>; - - // All Zd.b forms have a CPY/DUP equivalent, hence no byte alias here. - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME) ZPR16:$Zd, sve_preferred_logical_imm16:$imm), 7>; - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>; - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; - - def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))), - (!cast(NAME) logical_imm64:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Arithmetic - Unpredicated Group. -//===----------------------------------------------------------------------===// - -class sve_int_bin_cons_arit_0 sz8_64, bits<3> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b000; - let Inst{12-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_arit_0 opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>; - def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>; - def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>; - def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Arithmetic - Predicated Group -//===----------------------------------------------------------------------===// - -class sve_fp_2op_i_p_zds sz, bits<3> opc, string asm, - ZPRRegOp zprty, - Operand imm_ty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, imm_ty:$i1), - asm, "\t$Zdn, $Pg/m, $_Zdn, $i1", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bit i1; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-19} = 0b011; - let Inst{18-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-6} = 0b0000; - let Inst{5} = i1; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_2op_i_p_zds opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> { - let DestructiveInstType = DestructiveBinaryImm in { - def _H : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; - def _S : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; - def _D : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; - } - - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D")>; -} - -class sve_fp_2op_p_zds sz, bits<4> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-20} = 0b00; - let Inst{19-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_2op_p_zds opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags, - string revname="", bit isReverseInstr=0> { - let DestructiveInstType = flags in { - def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>, - SVEPseudo2Instr, SVEInstr2Rev; - def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_fp_2op_p_zds_fscale opc, string asm, - SDPatternOperator op> { - def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>; - def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>; - def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_fp_2op_p_zds_zeroing_hsd { - def _ZERO_H : PredTwoOpPseudo; - def _ZERO_S : PredTwoOpPseudo; - def _ZERO_D : PredTwoOpPseudo; - - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; -} - -class sve_fp_ftmad sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, timm32_0_7:$imm3), - asm, "\t$Zdn, $_Zdn, $Zm, $imm3", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - bits<3> imm3; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-19} = 0b010; - let Inst{18-16} = imm3; - let Inst{15-10} = 0b100000; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_fp_ftmad { - def _H : sve_fp_ftmad<0b01, asm, ZPR16>; - def _S : sve_fp_ftmad<0b10, asm, ZPR32>; - def _D : sve_fp_ftmad<0b11, asm, ZPR64>; - - def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 timm32_0_7:$imm))), - (!cast(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, timm32_0_7:$imm)>; - def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 timm32_0_7:$imm))), - (!cast(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, timm32_0_7:$imm)>; - def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 timm32_0_7:$imm))), - (!cast(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, timm32_0_7:$imm)>; -} - -multiclass sve_fp_2op_i_p_zds_hfd { - def _UNDEF_H : PredTwoOpImmPseudo; - def _UNDEF_S : PredTwoOpImmPseudo; - def _UNDEF_D : PredTwoOpImmPseudo; - - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; -} - -multiclass sve_fp_2op_i_p_zds_zeroing_hfd { - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; - - let AddedComplexity = 2 in { - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; - } -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Arithmetic - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve_fp_3op_u_zd sz, bits<3> opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b000; - let Inst{12-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op, - SDPatternOperator predicated_op = null_frag> { - def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pred_All_Active(NAME # _H)>; - def : SVE_2_Op_Pred_All_Active(NAME # _S)>; - def : SVE_2_Op_Pred_All_Active(NAME # _D)>; -} - -multiclass sve_fp_3op_u_zd_ftsmul opc, string asm, SDPatternOperator op> { - def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Fused Multiply-Add Group -//===----------------------------------------------------------------------===// - -class sve_fp_3op_p_zds_a sz, bits<2> opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), - asm, "\t$Zda, $Pg/m, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zda; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15} = 0b0; - let Inst{14-13} = opc; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, - SDPatternOperator op, string revname, - bit isReverseInstr=0> { - let DestructiveInstType = DestructiveTernaryCommWithRev in { - def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>, - SVEPseudo2Instr, SVEInstr2Rev; - def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; -} - -class sve_fp_3op_p_zds_b sz, bits<2> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), - asm, "\t$Zdn, $Pg/m, $Zm, $Za", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Za; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Za; - let Inst{15} = 0b1; - let Inst{14-13} = opc; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_3op_p_zds_b opc, string asm, SDPatternOperator op, - string revname, bit isReverseInstr> { - def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>, - SVEInstr2Rev; - def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>, - SVEInstr2Rev; - def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>, - SVEInstr2Rev; - - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; -} - -multiclass sve_fp_3op_p_zds_zx { - def _UNDEF_H : PredThreeOpPseudo; - def _UNDEF_S : PredThreeOpPseudo; - def _UNDEF_D : PredThreeOpPseudo; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Multiply-Add - Indexed Group -//===----------------------------------------------------------------------===// - -class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, - ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand itype> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop), - asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-11} = 0; - let Inst{10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_fp_fma_by_indexed_elem { - def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{22} = iop{2}; - let Inst{20-19} = iop{1-0}; - let Inst{18-16} = Zm; - } - def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> { - bits<3> Zm; - bits<2> iop; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> { - bits<4> Zm; - bit iop; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b_timm:$idx))), - (!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b_timm:$idx)>; - def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b_timm:$idx))), - (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>; - def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b_timm:$idx))), - (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Floating Point Multiply - Indexed Group -//===----------------------------------------------------------------------===// - -class sve_fp_fmul_by_indexed_elem sz, string asm, ZPRRegOp zprty, - ZPRRegOp zprty2, Operand itype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop), - asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_fp_fmul_by_indexed_elem { - def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{22} = iop{2}; - let Inst{20-19} = iop{1-0}; - let Inst{18-16} = Zm; - } - def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> { - bits<3> Zm; - bits<2> iop; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> { - bits<4> Zm; - bit iop; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b_timm:$idx))), - (!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b_timm:$idx)>; - def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b_timm:$idx))), - (!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b_timm:$idx)>; - def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b_timm:$idx))), - (!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b_timm:$idx)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Complex Multiply-Add Group -//===----------------------------------------------------------------------===// - -class sve_fp_fcmla sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm, - complexrotateop:$imm), - asm, "\t$Zda, $Pg/m, $Zn, $Zm, $imm", - "", []>, Sched<[]> { - bits<5> Zda; - bits<3> Pg; - bits<5> Zn; - bits<5> Zm; - bits<2> imm; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21} = 0; - let Inst{20-16} = Zm; - let Inst{15} = 0; - let Inst{14-13} = imm; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_fcmla { - def _H : sve_fp_fcmla<0b01, asm, ZPR16>; - def _S : sve_fp_fcmla<0b10, asm, ZPR32>; - def _D : sve_fp_fcmla<0b11, asm, ZPR64>; - - def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))), - (!cast(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; - def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))), - (!cast(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; - def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))), - (!cast(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Complex Multiply-Add - Indexed Group -//===----------------------------------------------------------------------===// - -class sve_fp_fcmla_by_indexed_elem sz, string asm, - ZPRRegOp zprty, - ZPRRegOp zprty2, Operand itype> -: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty2:$Zm, itype:$iop, - complexrotateop:$imm), - asm, "\t$Zda, $Zn, $Zm$iop, $imm", - "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<2> imm; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-12} = 0b0001; - let Inst{11-10} = imm; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_fp_fcmla_by_indexed_elem { - def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> { - bits<3> Zm; - bits<2> iop; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> { - bits<4> Zm; - bits<1> iop; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>; - def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Complex Addition Group -//===----------------------------------------------------------------------===// - -class sve_fp_fcadd sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, - complexrotateopodd:$imm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm, $imm", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - bits<3> Pg; - bit imm; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21-17} = 0; - let Inst{16} = imm; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_fcadd { - def _H : sve_fp_fcadd<0b01, asm, ZPR16>; - def _S : sve_fp_fcadd<0b10, asm, ZPR32>; - def _D : sve_fp_fcadd<0b11, asm, ZPR64>; - - def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))), - (!cast(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; - def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))), - (!cast(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; - def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))), - (!cast(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Floating Point Convert Group -//===----------------------------------------------------------------------===// - -class sve2_fp_convert_precision opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<3> Pg; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = opc{3-2}; - let Inst{21-18} = 0b0010; - let Inst{17-16} = opc{1-0}; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_fp_convert_down_narrow { - def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>; - def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast(NAME # _StoH)>; - def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; -} - -multiclass sve2_fp_convert_up_long { - def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>; - def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>; - - def : SVE_3_Op_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; - def : SVE_3_Op_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; -} - -multiclass sve2_fp_convert_down_odd_rounding_top { - def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Floating Point Pairwise Group -//===----------------------------------------------------------------------===// - -class sve2_fp_pairwise_pred sz, bits<3> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zm; - bits<5> Zdn; - let Inst{31-24} = 0b01100100; - let Inst{23-22} = sz; - let Inst{21-19} = 0b010; - let Inst{18-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve2_fp_pairwise_pred opc, string asm, - SDPatternOperator op> { - def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>; - def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>; - def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Floating Point Widening Multiply-Add - Indexed Group -//===----------------------------------------------------------------------===// - -class sve2_fp_mla_long_by_indexed_elem opc, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, - VectorIndexH32b:$iop), - asm, "\t$Zda, $Zn, $Zm$iop", - "", - []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<3> Zm; - bits<3> iop; - let Inst{31-21} = 0b01100100101; - let Inst{20-19} = iop{2-1}; - let Inst{18-16} = Zm; - let Inst{15-14} = 0b01; - let Inst{13} = opc{1}; - let Inst{12} = 0b0; - let Inst{11} = iop{0}; - let Inst{10} = opc{0}; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_fp_mla_long_by_indexed_elem opc, string asm, - SDPatternOperator op> { - def NAME : sve2_fp_mla_long_by_indexed_elem; - def : SVE_4_Op_Imm_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Floating Point Widening Multiply-Add Group -//===----------------------------------------------------------------------===// - -class sve2_fp_mla_long opc, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), - asm, "\t$Zda, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-21} = 0b01100100101; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b10; - let Inst{13} = opc{1}; - let Inst{12-11} = 0b00; - let Inst{10} = opc{0}; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_fp_mla_long opc, string asm, SDPatternOperator op> { - def NAME : sve2_fp_mla_long; - def : SVE_3_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Stack Allocation Group -//===----------------------------------------------------------------------===// - -class sve_int_arith_vl -: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6), - asm, "\t$Rd, $Rn, $imm6", - "", - []>, Sched<[]> { - bits<5> Rd; - bits<5> Rn; - bits<6> imm6; - let Inst{31-23} = 0b000001000; - let Inst{22} = opc; - let Inst{21} = 0b1; - let Inst{20-16} = Rn; - let Inst{15-11} = 0b01010; - let Inst{10-5} = imm6; - let Inst{4-0} = Rd; -} - -class sve_int_read_vl_a opc2, string asm> -: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6), - asm, "\t$Rd, $imm6", - "", - []>, Sched<[]> { - bits<5> Rd; - bits<6> imm6; - let Inst{31-23} = 0b000001001; - let Inst{22} = op; - let Inst{21} = 0b1; - let Inst{20-16} = opc2{4-0}; - let Inst{15-11} = 0b01010; - let Inst{10-5} = imm6; - let Inst{4-0} = Rd; -} - -//===----------------------------------------------------------------------===// -// SVE Permute - In Lane Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_bin_perm_zz opc, bits<2> sz8_64, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b011; - let Inst{12-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_bin_perm_zz opc, string asm, - SDPatternOperator op> { - def _B : sve_int_perm_bin_perm_zz; - def _H : sve_int_perm_bin_perm_zz; - def _S : sve_int_perm_bin_perm_zz; - def _D : sve_int_perm_bin_perm_zz; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pat(NAME # _H)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Unary Operations Group -//===----------------------------------------------------------------------===// - -class sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, - RegisterOperand o_zprtype, ElementSizeEnum Sz> -: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = opc{6-5}; - let Inst{21} = 0b0; - let Inst{20-16} = opc{4-0}; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveUnaryPassthru; - let ElementSize = Sz; -} - -multiclass sve_fp_2op_p_zd opc, string asm, - RegisterOperand i_zprtype, - RegisterOperand o_zprtype, - SDPatternOperator int_op, - SDPatternOperator ir_op, ValueType vt1, - ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd, - SVEPseudo2Instr; - // convert vt1 to a packed type for the intrinsic patterns - defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, - !eq(!cast(vt1), "nxv4f16"): nxv8f16, - !eq(!cast(vt1), "nxv2f32"): nxv4f32, - 1 : vt1); - - // convert vt3 to a packed type for the intrinsic patterns - defvar packedvt3 = !cond(!eq(!cast(vt3), "nxv2f16"): nxv8f16, - !eq(!cast(vt3), "nxv4f16"): nxv8f16, - !eq(!cast(vt3), "nxv2f32"): nxv4f32, - 1 : vt3); - - def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Pat(NAME)>; - - def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; - - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF)>; -} - -multiclass sve_fp_2op_p_zdr opc, string asm, - RegisterOperand i_zprtype, - RegisterOperand o_zprtype, - SDPatternOperator int_op, - SDPatternOperator ir_op, ValueType vt1, - ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd, - SVEPseudo2Instr; - - // convert vt1 to a packed type for the intrinsic patterns - defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, - !eq(!cast(vt1), "nxv4f16"): nxv8f16, - !eq(!cast(vt1), "nxv2f32"): nxv4f32, - 1 : vt1); - - def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Round_Pat(NAME)>; - - def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; - - defm : SVE_1_Op_PassthruUndef_Round_Pat(NAME # _UNDEF)>; -} - -multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { - def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>, - SVEPseudo2Instr; - def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>, - SVEPseudo2Instr; - def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>, - SVEPseudo2Instr; - - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; -} - -multiclass sve2_fp_flogb { - def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>; - def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>; - def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve2_fp_convert_down_odd_rounding { - def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>; - def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Unary Operations - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve_fp_2op_u_zd sz, bits<3> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn), - asm, "\t$Zd, $Zn", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-19} = 0b001; - let Inst{18-16} = opc; - let Inst{15-10} = 0b001100; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_fp_2op_u_zd opc, string asm, SDPatternOperator op> { - def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>; - def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>; - def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>; - - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Arithmetic - Binary Predicated Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_pred_arit_log sz8_64, bits<2> fmt, bits<3> opc, - string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b0; - let Inst{20-19} = fmt; - let Inst{18-16} = opc; - let Inst{15-13} = 0b000; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_bin_pred_log opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>; - def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>; - def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_arit_0 opc, string asm, string Ps, - SDPatternOperator op, - DestructiveInstTypeEnum flags, - string revname="", bit isReverseInstr=0> { - let DestructiveInstType = flags in { - def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>, - SVEPseudo2Instr, SVEInstr2Rev; - def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>, - SVEPseudo2Instr, SVEInstr2Rev; - def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_arit_1 opc, string asm, string Ps, - SDPatternOperator op, - DestructiveInstTypeEnum flags> { - let DestructiveInstType = flags in { - def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>, - SVEPseudo2Instr; - def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>, - SVEPseudo2Instr; - } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_arit_2 opc, string asm, string Ps, - SDPatternOperator op, - DestructiveInstTypeEnum flags> { - let DestructiveInstType = flags in { - def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>, - SVEPseudo2Instr; - def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, - SVEPseudo2Instr; - } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -// Special case for divides which are not defined for 8b/16b elements. -multiclass sve_int_bin_pred_arit_2_div opc, string asm, string Ps, - SDPatternOperator op, - DestructiveInstTypeEnum flags, - string revname="", bit isReverseInstr=0> { - let DestructiveInstType = flags in { - def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Multiply-Add Group -//===----------------------------------------------------------------------===// - -class sve_int_mladdsub_vvv_pred sz8_64, bits<1> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), - asm, "\t$Zdn, $Pg/m, $Zm, $Za", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Za; - bits<5> Zm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b11; - let Inst{13} = opc; - let Inst{12-10} = Pg; - let Inst{9-5} = Za; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_mladdsub_vvv_pred opc, string asm, SDPatternOperator op> { - def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>; - def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>; - def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>; - def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>; - - def : SVE_4_Op_Pat(NAME # _B)>; - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; -} - -class sve_int_mlas_vvv_pred sz8_64, bits<1> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), - asm, "\t$Zda, $Pg/m, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zda; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b01; - let Inst{13} = opc; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_mlas_vvv_pred opc, string asm, SDPatternOperator op, - SDPatternOperator outerop, SDPatternOperator mulop> { - def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; - def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; - def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; - def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>; - - def : SVE_4_Op_Pat(NAME # _B)>; - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; - - def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)), - (!cast(NAME # _B) $pred, $Op1, $Op2, $Op3)>; - def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)), - (!cast(NAME # _H) $pred, $Op1, $Op2, $Op3)>; - def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)), - (!cast(NAME # _S) $pred, $Op1, $Op2, $Op3)>; - def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)), - (!cast(NAME # _D) $pred, $Op1, $Op2, $Op3)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer Multiply-Add - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve2_int_mla sz, bits<5> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15} = 0b0; - let Inst{14-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_mla { - def _B : sve2_int_mla<0b00, { 0b1110, S }, asm, ZPR8, ZPR8>; - def _H : sve2_int_mla<0b01, { 0b1110, S }, asm, ZPR16, ZPR16>; - def _S : sve2_int_mla<0b10, { 0b1110, S }, asm, ZPR32, ZPR32>; - def _D : sve2_int_mla<0b11, { 0b1110, S }, asm, ZPR64, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve2_int_mla_long opc, string asm, SDPatternOperator op> { - def _H : sve2_int_mla<0b01, opc, asm, ZPR16, ZPR8>; - def _S : sve2_int_mla<0b10, opc, asm, ZPR32, ZPR16>; - def _D : sve2_int_mla<0b11, opc, asm, ZPR64, ZPR32>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer Multiply-Add - Indexed Group -//===----------------------------------------------------------------------===// - -class sve2_int_mla_by_indexed_elem sz, bits<6> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2, - ZPRRegOp zprty3, Operand itype> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop), - asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_mla_by_indexed_elem opc, bit S, string asm, - SDPatternOperator op> { - def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{22} = iop{2}; - let Inst{20-19} = iop{1-0}; - let Inst{18-16} = Zm; - } - def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> { - bits<3> Zm; - bits<2> iop; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> { - bits<4> Zm; - bit iop; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : SVE_4_Op_Imm_Pat(NAME # _H)>; - def : SVE_4_Op_Imm_Pat(NAME # _S)>; - def : SVE_4_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer Multiply-Add Long - Indexed Group -//===----------------------------------------------------------------------===// - -multiclass sve2_int_mla_long_by_indexed_elem opc, string asm, - SDPatternOperator op> { - def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} }, - asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{20-19} = iop{2-1}; - let Inst{18-16} = Zm; - let Inst{11} = iop{0}; - } - def _D : sve2_int_mla_by_indexed_elem<0b11, { opc{3}, 0b0, opc{2-1}, ?, opc{0} }, - asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> { - bits<4> Zm; - bits<2> iop; - let Inst{20} = iop{1}; - let Inst{19-16} = Zm; - let Inst{11} = iop{0}; - } - - def : SVE_4_Op_Imm_Pat(NAME # _S)>; - def : SVE_4_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Dot Product Group -//===----------------------------------------------------------------------===// - -class sve_intx_dot -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm, - "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-23} = 0b010001001; - let Inst{22} = sz; - let Inst{21} = 0; - let Inst{20-16} = Zm; - let Inst{15-11} = 0; - let Inst{10} = U; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; -} - -multiclass sve_intx_dot { - def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; - def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; - - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Dot Product Group - Indexed Group -//===----------------------------------------------------------------------===// - -class sve_intx_dot_by_indexed_elem -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop), - asm, "\t$Zda, $Zn, $Zm$iop", - "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - let Inst{31-23} = 0b010001001; - let Inst{22} = sz; - let Inst{21} = 0b1; - let Inst{15-11} = 0; - let Inst{10} = U; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; -} - -multiclass sve_intx_dot_by_indexed_elem { - def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> { - bits<2> iop; - bits<3> Zm; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> { - bits<1> iop; - bits<4> Zm; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : SVE_4_Op_Imm_Pat(NAME # _S)>; - def : SVE_4_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Complex Integer Dot Product Group -//===----------------------------------------------------------------------===// - -class sve2_complex_int_arith sz, bits<4> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm, - complexrotateop:$rot), - asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - bits<2> rot; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-12} = opc; - let Inst{11-10} = rot; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_cintx_dot { - def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>; - def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>; - - def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3), - (i32 complexrotateop:$imm))), - (!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>; - def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3), - (i32 complexrotateop:$imm))), - (!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Complex Multiply-Add Group -//===----------------------------------------------------------------------===// - -multiclass sve2_int_cmla { - def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>; - - def : SVE_4_Op_Imm_Pat(NAME # _B)>; - def : SVE_4_Op_Imm_Pat(NAME # _H)>; - def : SVE_4_Op_Imm_Pat(NAME # _S)>; - def : SVE_4_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Complex Integer Dot Product - Indexed Group -//===----------------------------------------------------------------------===// - -class sve2_complex_int_arith_indexed sz, bits<4> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2, - ZPRRegOp zprty3, Operand itype> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop, - complexrotateop:$rot), - asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<2> rot; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-12} = opc; - let Inst{11-10} = rot; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_cintx_dot_by_indexed_elem { - def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> { - bits<2> iop; - bits<3> Zm; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> { - bit iop; - bits<4> Zm; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3), - (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>; - def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3), - (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Complex Multiply-Add - Indexed Group -//===----------------------------------------------------------------------===// - -multiclass sve2_cmla_by_indexed_elem { - def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS32b> { - bits<2> iop; - bits<3> Zm; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD32b> { - bit iop; - bits<4> Zm; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : Pat<(nxv8i16 (op (nxv8i16 ZPR16:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3), - (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # "_H") ZPR16:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>; - - def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv4i32 ZPR32:$Op2), (nxv4i32 ZPR32:$Op3), - (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))), - (!cast(NAME # "_S") ZPR32:$Op1, ZPR32:$Op2, ZPR32:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer Multiply - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve2_int_mul sz, bits<3> opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b011; - let Inst{12-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_mul opc, string asm, SDPatternOperator op, - SDPatternOperator op_pred = null_frag> { - def _B : sve2_int_mul<0b00, opc, asm, ZPR8>; - def _H : sve2_int_mul<0b01, opc, asm, ZPR16>; - def _S : sve2_int_mul<0b10, opc, asm, ZPR32>; - def _D : sve2_int_mul<0b11, opc, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pred_All_Active(NAME # _B)>; - def : SVE_2_Op_Pred_All_Active(NAME # _H)>; - def : SVE_2_Op_Pred_All_Active(NAME # _S)>; - def : SVE_2_Op_Pred_All_Active(NAME # _D)>; -} - -multiclass sve2_int_mul_single opc, string asm, SDPatternOperator op> { - def _B : sve2_int_mul<0b00, opc, asm, ZPR8>; - - def : SVE_2_Op_Pat(NAME # _B)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer Multiply - Indexed Group -//===----------------------------------------------------------------------===// - -class sve2_int_mul_by_indexed_elem sz, bits<4> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2, - ZPRRegOp zprty3, Operand itype> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm, itype:$iop), - asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{15-14} = 0b11; - let Inst{13-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_mul_by_indexed_elem opc, string asm, - SDPatternOperator op> { - def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{22} = iop{2}; - let Inst{20-19} = iop{1-0}; - let Inst{18-16} = Zm; - } - def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> { - bits<3> Zm; - bits<2> iop; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; - } - def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> { - bits<4> Zm; - bit iop; - let Inst{20} = iop; - let Inst{19-16} = Zm; - } - - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -multiclass sve2_int_mul_long_by_indexed_elem opc, string asm, - SDPatternOperator op> { - def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm, - ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> { - bits<3> Zm; - bits<3> iop; - let Inst{20-19} = iop{2-1}; - let Inst{18-16} = Zm; - let Inst{11} = iop{0}; - } - def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm, - ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> { - bits<4> Zm; - bits<2> iop; - let Inst{20} = iop{1}; - let Inst{19-16} = Zm; - let Inst{11} = iop{0}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Integer - Predicated Group -//===----------------------------------------------------------------------===// - -class sve2_int_arith_pred sz, bits<6> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> { - bits<3> Pg; - bits<5> Zm; - bits<5> Zdn; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21-20} = 0b01; - let Inst{20-16} = opc{5-1}; - let Inst{15-14} = 0b10; - let Inst{13} = opc{0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve2_int_arith_pred opc, string asm, SDPatternOperator op, - string Ps = "", - DestructiveInstTypeEnum flags=DestructiveOther, - string revname="", bit isReverseInstr=0> { - let DestructiveInstType = flags in { - def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>, - SVEPseudo2Instr, SVEInstr2Rev; - def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>, - SVEPseudo2Instr, SVEInstr2Rev; - def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -class sve2_int_sadd_long_accum_pairwise sz, bit U, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zda), (ins PPR3bAny:$Pg, zprty1:$_Zda, zprty2:$Zn), - asm, "\t$Zda, $Pg/m, $Zn", "", []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> Zda; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21-17} = 0b00010; - let Inst{16} = U; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty1.ElementSize; -} - -multiclass sve2_int_sadd_long_accum_pairwise { - def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>; - def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>; - def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -class sve2_int_un_pred_arit sz, bit Q, bits<2> opc, - string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b01000100; - let Inst{23-22} = sz; - let Inst{21-20} = 0b00; - let Inst{19} = Q; - let Inst{18} = 0b0; - let Inst{17-16} = opc; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveUnaryPassthru; - let ElementSize = zprty.ElementSize; -} - -multiclass sve2_int_un_pred_arit_s opc, string asm, - SDPatternOperator op> { - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, - SVEPseudo2Instr; - - def : SVE_3_Op_Pat(NAME # _S)>; - - def _UNDEF_S : PredOneOpPassthruPseudo; - - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; -} - -multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { - def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>, - SVEPseudo2Instr; - def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_B)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_H)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Widening Integer Arithmetic Group -//===----------------------------------------------------------------------===// - -class sve2_wide_int_arith sz, bits<5> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2, ZPRRegOp zprty3> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15} = 0b0; - let Inst{14-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_wide_int_arith_long opc, string asm, - SDPatternOperator op> { - def _H : sve2_wide_int_arith<0b01, opc, asm, ZPR16, ZPR8, ZPR8>; - def _S : sve2_wide_int_arith<0b10, opc, asm, ZPR32, ZPR16, ZPR16>; - def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve2_wide_int_arith_wide opc, string asm, - SDPatternOperator op> { - def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>; - def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>; - def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve2_wide_int_arith_pmul sz, bits<5> opc, string asm, - SDPatternOperator op> { - def NAME : sve2_wide_int_arith; - - // To avoid using 128 bit elements in the IR, the pattern below works with - // llvm intrinsics with the _pair suffix, to reflect that - // _Q is implemented as a pair of _D. - def : SVE_2_Op_Pat(NAME)>; -} - -multiclass sve2_pmul_long opc, string asm, SDPatternOperator op> { - def _H : sve2_wide_int_arith<0b01, {0b1101, opc}, asm, ZPR16, ZPR8, ZPR8>; - def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>; - - // To avoid using 128 bit elements in the IR, the patterns below work with - // llvm intrinsics with the _pair suffix, to reflect that - // _H is implemented as a pair of _B and _D is implemented as a pair of _S. - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Misc Group -//===----------------------------------------------------------------------===// - -class sve2_misc sz, bits<4> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b10; - let Inst{13-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_misc_bitwise opc, string asm, SDPatternOperator op> { - def _B : sve2_misc<0b00, opc, asm, ZPR8, ZPR8>; - def _H : sve2_misc<0b01, opc, asm, ZPR16, ZPR16>; - def _S : sve2_misc<0b10, opc, asm, ZPR32, ZPR32>; - def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve2_misc_int_addsub_long_interleaved opc, string asm, - SDPatternOperator op> { - def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; - def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; - def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-11} = 0b10010; - let Inst{10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_bitwise_xor_interleaved { - def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>; - def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>; - def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>; - def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -class sve2_bitwise_shift_left_long tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2, - Operand immtype> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), - asm, "\t$Zd, $Zn, $imm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> imm; - let Inst{31-23} = 0b010001010; - let Inst{22} = tsz8_64{2}; - let Inst{21} = 0b0; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-12} = 0b1010; - let Inst{11-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_bitwise_shift_left_long opc, string asm, - SDPatternOperator op> { - def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm, - ZPR16, ZPR8, vecshiftL8>; - def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm, - ZPR32, ZPR16, vecshiftL16> { - let Inst{19} = imm{3}; - } - def _D : sve2_bitwise_shift_left_long<{1,?,?}, opc, asm, - ZPR64, ZPR32, vecshiftL32> { - let Inst{20-19} = imm{4-3}; - } - def : SVE_2_Op_Imm_Pat(NAME # _H)>; - def : SVE_2_Op_Imm_Pat(NAME # _S)>; - def : SVE_2_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Accumulate Group -//===----------------------------------------------------------------------===// - -class sve2_int_bin_shift_imm tsz8_64, bit opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm), - asm, "\t$Zd, $Zn, $imm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<6> imm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = tsz8_64{3-2}; - let Inst{21} = 0b0; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-11} = 0b11110; - let Inst{10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_int_bin_shift_imm_left { - def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -multiclass sve2_int_bin_shift_imm_right { - def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm), - asm, "\t$Zda, $Zn, $imm", - "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<6> imm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = tsz8_64{3-2}; - let Inst{21} = 0b0; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-12} = 0b1110; - let Inst{11-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_bin_accum_shift_imm_right opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -class sve2_int_cadd sz, bit opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, complexrotateopodd:$rot), - asm, "\t$Zdn, $_Zdn, $Zm, $rot", "", []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - bit rot; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21-17} = 0b00000; - let Inst{16} = opc; - let Inst{15-11} = 0b11011; - let Inst{10} = rot; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_cadd { - def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>; - def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>; - def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>; - def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>; - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -class sve2_int_absdiff_accum sz, bits<4> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15-14} = 0b11; - let Inst{13-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_absdiff_accum { - def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve2_int_absdiff_accum_long opc, string asm, - SDPatternOperator op> { - def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; - def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; - def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve2_int_addsub_long_carry opc, string asm, - SDPatternOperator op> { - def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm, - ZPR32, ZPR32>; - def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm, - ZPR64, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Narrowing Group -//===----------------------------------------------------------------------===// - -class sve2_int_bin_shift_imm_narrow_bottom tsz8_64, bits<3> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), - asm, "\t$Zd, $Zn, $imm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> imm; - let Inst{31-23} = 0b010001010; - let Inst{22} = tsz8_64{2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-14} = 0b00; - let Inst{13-11} = opc; - let Inst{10} = 0b0; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; - def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def : SVE_2_Op_Imm_Pat(NAME # _B)>; - def : SVE_2_Op_Imm_Pat(NAME # _H)>; - def : SVE_2_Op_Imm_Pat(NAME # _S)>; -} - -class sve2_int_bin_shift_imm_narrow_top tsz8_64, bits<3> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> -: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm), - asm, "\t$Zd, $Zn, $imm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> imm; - let Inst{31-23} = 0b010001010; - let Inst{22} = tsz8_64{2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-14} = 0b00; - let Inst{13-11} = opc; - let Inst{10} = 0b1; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; - def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; -} - -class sve2_int_addsub_narrow_high_bottom sz, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b011; - let Inst{12-11} = opc; // S, R - let Inst{10} = 0b0; // Top - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_addsub_narrow_high_bottom opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; -} - -class sve2_int_addsub_narrow_high_top sz, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), - asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b011; - let Inst{12-11} = opc; // S, R - let Inst{10} = 0b1; // Top - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_int_addsub_narrow_high_top opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; -} - -class sve2_int_sat_extract_narrow_bottom tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty2:$Zn), - asm, "\t$Zd, $Zn", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-23} = 0b010001010; - let Inst{22} = tsz8_64{2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-13} = 0b000010; - let Inst{12-11} = opc; - let Inst{10} = 0b0; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_sat_extract_narrow_bottom opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>; - - def : SVE_1_Op_Pat(NAME # _B)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; -} - -class sve2_int_sat_extract_narrow_top tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn), - asm, "\t$Zd, $Zn", "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-23} = 0b010001010; - let Inst{22} = tsz8_64{2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-13} = 0b000010; - let Inst{12-11} = opc; - let Inst{10} = 0b1; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; -} - -multiclass sve2_int_sat_extract_narrow_top opc, string asm, - SDPatternOperator op> { - def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Arithmetic - Unary Predicated Group -//===----------------------------------------------------------------------===// - -class sve_int_un_pred_arit sz8_64, bits<4> opc, - string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21-20} = 0b01; - let Inst{19} = opc{0}; - let Inst{18-16} = opc{3-1}; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveUnaryPassthru; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_un_pred_arit_0 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>, - SVEPseudo2Instr; - def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_1_Op_Passthru_Pat(NAME # _B)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; -} - -multiclass sve_int_un_pred_arit_0_h opc, string asm, - SDPatternOperator op> { - def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_InReg_Extend(NAME # _H)>; - def : SVE_InReg_Extend(NAME # _S)>; - def : SVE_InReg_Extend(NAME # _D)>; - - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_H)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; -} - -multiclass sve_int_un_pred_arit_0_w opc, string asm, - SDPatternOperator op> { - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_InReg_Extend(NAME # _S)>; - def : SVE_InReg_Extend(NAME # _D)>; - - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; -} - -multiclass sve_int_un_pred_arit_0_d opc, string asm, - SDPatternOperator op> { - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_InReg_Extend(NAME # _D)>; - - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; -} - -multiclass sve_int_un_pred_arit_1 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>, - SVEPseudo2Instr; - def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_1_Op_Passthru_Pat(NAME # _B)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; -} - -multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator op> { - def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, - SVEPseudo2Instr; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>, - SVEPseudo2Instr; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>, - SVEPseudo2Instr; - - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; - - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Wide Immediate - Unpredicated Group -//===----------------------------------------------------------------------===// -class sve_int_dup_imm sz8_64, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins immtype:$imm), - asm, "\t$Zd, $imm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<9> imm; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-14} = 0b11100011; - let Inst{13} = imm{8}; // sh - let Inst{12-5} = imm{7-0}; // imm8 - let Inst{4-0} = Zd; - - let isReMaterializable = 1; -} - -multiclass sve_int_dup_imm { - def _B : sve_int_dup_imm<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8>; - def _H : sve_int_dup_imm<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16>; - def _S : sve_int_dup_imm<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32>; - def _D : sve_int_dup_imm<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64>; - - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME # _B) ZPR8:$Zd, cpy_imm8_opt_lsl_i8:$imm), 1>; - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME # _H) ZPR16:$Zd, cpy_imm8_opt_lsl_i16:$imm), 1>; - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME # _S) ZPR32:$Zd, cpy_imm8_opt_lsl_i32:$imm), 1>; - def : InstAlias<"mov $Zd, $imm", - (!cast(NAME # _D) ZPR64:$Zd, cpy_imm8_opt_lsl_i64:$imm), 1>; - - def : InstAlias<"fmov $Zd, #0.0", - (!cast(NAME # _H) ZPR16:$Zd, 0, 0), 1>; - def : InstAlias<"fmov $Zd, #0.0", - (!cast(NAME # _S) ZPR32:$Zd, 0, 0), 1>; - def : InstAlias<"fmov $Zd, #0.0", - (!cast(NAME # _D) ZPR64:$Zd, 0, 0), 1>; -} - -class sve_int_dup_fpimm sz8_64, Operand fpimmtype, - string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins fpimmtype:$imm8), - asm, "\t$Zd, $imm8", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<8> imm8; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-14} = 0b11100111; - let Inst{13} = 0b0; - let Inst{12-5} = imm8; - let Inst{4-0} = Zd; - - let isReMaterializable = 1; -} - -multiclass sve_int_dup_fpimm { - def _H : sve_int_dup_fpimm<0b01, fpimm16, asm, ZPR16>; - def _S : sve_int_dup_fpimm<0b10, fpimm32, asm, ZPR32>; - def _D : sve_int_dup_fpimm<0b11, fpimm64, asm, ZPR64>; - - def : InstAlias<"fmov $Zd, $imm8", - (!cast(NAME # _H) ZPR16:$Zd, fpimm16:$imm8), 1>; - def : InstAlias<"fmov $Zd, $imm8", - (!cast(NAME # _S) ZPR32:$Zd, fpimm32:$imm8), 1>; - def : InstAlias<"fmov $Zd, $imm8", - (!cast(NAME # _D) ZPR64:$Zd, fpimm64:$imm8), 1>; -} - -class sve_int_arith_imm0 sz8_64, bits<3> opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), - asm, "\t$Zdn, $_Zdn, $imm", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<9> imm; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-19} = 0b100; - let Inst{18-16} = opc; - let Inst{15-14} = 0b11; - let Inst{13} = imm{8}; // sh - let Inst{12-5} = imm{7-0}; // imm8 - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_arith_imm0 opc, string asm, SDPatternOperator op> { - def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; - def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; - def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; - def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>; - - def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _D)>; -} - -multiclass sve_int_arith_imm0_subr opc, string asm, SDPatternOperator op> { - def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; - def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; - def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; - def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>; - - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _D)>; -} - -class sve_int_arith_imm sz8_64, bits<6> opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), - asm, "\t$Zdn, $_Zdn, $imm", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<8> imm; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21-16} = opc; - let Inst{15-13} = 0b110; - let Inst{12-5} = imm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_arith_imm1 opc, string asm, SDPatternOperator op> { - def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, simm8>; - def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, simm8>; - def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>; - def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>; - - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; -} - -multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperator op> { - def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, imm0_255>; - def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, imm0_255>; - def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>; - def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>; - - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; -} - -multiclass sve_int_arith_imm2 { - def _B : sve_int_arith_imm<0b00, 0b110000, asm, ZPR8, simm8>; - def _H : sve_int_arith_imm<0b01, 0b110000, asm, ZPR16, simm8>; - def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>; - def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>; - - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Bitwise Logical - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_cons_log opc, string asm> -: I<(outs ZPR64:$Zd), (ins ZPR64:$Zn, ZPR64:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{1-0}; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b001100; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_log opc, string asm, SDPatternOperator op> { - def NAME : sve_int_bin_cons_log; - - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - - def : InstAlias(NAME) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 1>; - def : InstAlias(NAME) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 1>; - def : InstAlias(NAME) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 1>; -} - -class sve2_int_bitwise_ternary_op_d opc, string asm> -: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Zk), - asm, "\t$Zdn, $_Zdn, $Zm, $Zk", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zk; - bits<5> Zm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{2-1}; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-11} = 0b00111; - let Inst{10} = opc{0}; - let Inst{9-5} = Zk; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_bitwise_ternary_op opc, string asm, SDPatternOperator op> { - def NAME : sve2_int_bitwise_ternary_op_d; - - def : InstAlias(NAME) ZPR8:$Zdn, ZPR8:$Zm, ZPR8:$Zk), 1>; - def : InstAlias(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>; - def : InstAlias(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>; - - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_Pat(NAME)>; -} - -class sve2_int_rotate_right_imm tsz8_64, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, immtype:$imm), - asm, "\t$Zdn, $_Zdn, $Zm, $imm", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - bits<6> imm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = tsz8_64{3-2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-10} = 0b001101; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve2_int_rotate_right_imm { - def _B : sve2_int_rotate_right_imm<{0,0,0,1}, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_rotate_right_imm<{0,0,1,?}, asm, ZPR16, vecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve2_int_rotate_right_imm<{0,1,?,?}, asm, ZPR32, vecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve2_int_rotate_right_imm<{1,?,?,?}, asm, ZPR64, vecshiftR64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Wide Immediate - Predicated Group -//===----------------------------------------------------------------------===// - -class sve_int_dup_fpimm_pred sz, Operand fpimmtype, - string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPRAny:$Pg, fpimmtype:$imm8), - asm, "\t$Zd, $Pg/m, $imm8", - "", - []>, Sched<[]> { - bits<4> Pg; - bits<5> Zd; - bits<8> imm8; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz; - let Inst{21-20} = 0b01; - let Inst{19-16} = Pg; - let Inst{15-13} = 0b110; - let Inst{12-5} = imm8; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_dup_fpimm_pred { - def _H : sve_int_dup_fpimm_pred<0b01, fpimm16, asm, ZPR16>; - def _S : sve_int_dup_fpimm_pred<0b10, fpimm32, asm, ZPR32>; - def _D : sve_int_dup_fpimm_pred<0b11, fpimm64, asm, ZPR64>; - - def : InstAlias<"fmov $Zd, $Pg/m, $imm8", - (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, fpimm16:$imm8), 1>; - def : InstAlias<"fmov $Zd, $Pg/m, $imm8", - (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>; - def : InstAlias<"fmov $Zd, $Pg/m, $imm8", - (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>; -} - -class sve_int_dup_imm_pred sz8_64, bit m, string asm, - ZPRRegOp zprty, string pred_qual, dag iops> -: I<(outs zprty:$Zd), iops, - asm, "\t$Zd, $Pg"#pred_qual#", $imm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<4> Pg; - bits<9> imm; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-20} = 0b01; - let Inst{19-16} = Pg; - let Inst{15} = 0b0; - let Inst{14} = m; - let Inst{13} = imm{8}; // sh - let Inst{12-5} = imm{7-0}; // imm8 - let Inst{4-0} = Zd; - - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_dup_imm_pred_merge_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { - let Constraints = "$Zd = $_Zd" in - def NAME : sve_int_dup_imm_pred; - def : InstAlias<"mov $Zd, $Pg/m, $imm", - (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - intty:$Zd)), - (!cast(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>; -} - -multiclass sve_int_dup_imm_pred_merge { - defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; - - def : InstAlias<"fmov $Zd, $Pg/m, #0.0", - (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; - def : InstAlias<"fmov $Zd, $Pg/m, #0.0", - (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>; - def : InstAlias<"fmov $Zd, $Pg/m, #0.0", - (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>; -} - -multiclass sve_int_dup_imm_pred_zero_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { - def NAME : sve_int_dup_imm_pred; - def : InstAlias<"mov $Zd, $Pg/z, $imm", - (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; - def : Pat<(intty (zext (predty PPRAny:$Ps1))), - (!cast(NAME) PPRAny:$Ps1, 1, 0)>; - def : Pat<(intty (sext (predty PPRAny:$Ps1))), - (!cast(NAME) PPRAny:$Ps1, -1, 0)>; - def : Pat<(intty (anyext (predty PPRAny:$Ps1))), - (!cast(NAME) PPRAny:$Ps1, 1, 0)>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - (intty (AArch64dup (scalarty 0))))), - (!cast(NAME) $Pg, i32:$imm, i32:$shift)>; -} - -multiclass sve_int_dup_imm_pred_zero { - defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Compare - Vectors Group -//===----------------------------------------------------------------------===// - -class sve_int_cmp sz8_64, bits<3> opc, string asm, - PPRRegOp pprty, ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty1:$Zn, zprty2:$Zm), - asm, "\t$Pd, $Pg/z, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00100100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15} = opc{2}; - let Inst{14} = cmp_1; - let Inst{13} = opc{1}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let ElementSize = pprty.ElementSize; - let isPTestLike = 1; -} - -multiclass SVE_SETCC_Pat { - def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, cc)), - (cmp $Op1, $Op2, $Op3)>; - def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)), - (cmp $Op1, $Op3, $Op2)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))), - (cmp $Pg, $Op2, $Op3)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))), - (cmp $Pg, $Op3, $Op2)>; -} - -multiclass SVE_SETCC_Pat_With_Zero { - def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, (SVEDup0), cc)), - (cmp $Op1, $Op2)>; - def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)), - (cmp $Op1, $Op2)>; -} - -multiclass sve_int_cmp_0 opc, string asm, CondCode cc, CondCode invcc> { - def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>; - def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>; - def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>; - def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>; - - defm : SVE_SETCC_Pat(NAME # _B)>; - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _S)>; - defm : SVE_SETCC_Pat(NAME # _D)>; -} - -multiclass sve_int_cmp_0_wide opc, string asm, SDPatternOperator op> { - def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; - def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; - def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; -} - -multiclass sve_int_cmp_1_wide opc, string asm, SDPatternOperator op> { - def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; - def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; - def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Integer Compare - Signed Immediate Group -//===----------------------------------------------------------------------===// - -class sve_int_scmp_vi sz8_64, bits<3> opc, string asm, PPRRegOp pprty, - ZPRRegOp zprty, - Operand immtype> -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm5), - asm, "\t$Pd, $Pg/z, $Zn, $imm5", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zn; - bits<5> imm5; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b0; - let Inst{20-16} = imm5; - let Inst{15} = opc{2}; - let Inst{14} = 0b0; - let Inst{13} = opc{1}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let ElementSize = pprty.ElementSize; - let isPTestLike = 1; -} - -multiclass SVE_SETCC_Imm_Pat { - def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), - (intvt ZPR:$Zs1), - (intvt (AArch64dup (immtype:$imm))), - cc)), - (cmp $Pg, $Zs1, immtype:$imm)>; - def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), - (intvt (AArch64dup (immtype:$imm))), - (intvt ZPR:$Zs1), - commuted_cc)), - (cmp $Pg, $Zs1, immtype:$imm)>; -} - -multiclass sve_int_scmp_vi opc, string asm, CondCode cc, CondCode commuted_cc> { - def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>; - def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>; - def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>; - def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>; - - defm : SVE_SETCC_Imm_Pat(NAME # _B)>; - defm : SVE_SETCC_Imm_Pat(NAME # _H)>; - defm : SVE_SETCC_Imm_Pat(NAME # _S)>; - defm : SVE_SETCC_Imm_Pat(NAME # _D)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Integer Compare - Unsigned Immediate Group -//===----------------------------------------------------------------------===// - -class sve_int_ucmp_vi sz8_64, bits<2> opc, string asm, PPRRegOp pprty, - ZPRRegOp zprty, Operand immtype> -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm7), - asm, "\t$Pd, $Pg/z, $Zn, $imm7", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zn; - bits<7> imm7; - let Inst{31-24} = 0b00100100; - let Inst{23-22} = sz8_64; - let Inst{21} = 1; - let Inst{20-14} = imm7; - let Inst{13} = opc{1}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let ElementSize = pprty.ElementSize; - let isPTestLike = 1; -} - -multiclass sve_int_ucmp_vi opc, string asm, CondCode cc, - CondCode commuted_cc> { - def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>; - def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>; - def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>; - def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127_64b>; - - defm : SVE_SETCC_Imm_Pat(NAME # _B)>; - defm : SVE_SETCC_Imm_Pat(NAME # _H)>; - defm : SVE_SETCC_Imm_Pat(NAME # _S)>; - defm : SVE_SETCC_Imm_Pat(NAME # _D)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Integer Compare - Scalars Group -//===----------------------------------------------------------------------===// - -class sve_int_cterm -: I<(outs), (ins rt:$Rn, rt:$Rm), - asm, "\t$Rn, $Rm", - "", - []>, Sched<[]> { - bits<5> Rm; - bits<5> Rn; - let Inst{31-23} = 0b001001011; - let Inst{22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = opc; - let Inst{3-0} = 0b0000; - - let Defs = [NZCV]; -} - -class sve_int_while_rr sz8_64, bits<4> opc, string asm, - RegisterClass gprty, PPRRegOp pprty> -: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), - asm, "\t$Pd, $Rn, $Rm", - "", []>, Sched<[]> { - bits<4> Pd; - bits<5> Rm; - bits<5> Rn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b000; - let Inst{12-10} = opc{3-1}; - let Inst{9-5} = Rn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let ElementSize = pprty.ElementSize; - let isWhile = 1; -} - -multiclass sve_int_while4_rr opc, string asm, SDPatternOperator op> { - def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; - def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; - def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; - def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_while8_rr opc, string asm, SDPatternOperator op> { - def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; - def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; - def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; - def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve2_int_while_rr sz8_64, bits<1> rw, string asm, - PPRRegOp pprty> -: I<(outs pprty:$Pd), (ins GPR64:$Rn, GPR64:$Rm), - asm, "\t$Pd, $Rn, $Rm", - "", []>, Sched<[]> { - bits<4> Pd; - bits<5> Rm; - bits<5> Rn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b001100; - let Inst{9-5} = Rn; - let Inst{4} = rw; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let ElementSize = pprty.ElementSize; - let isWhile = 1; -} - -multiclass sve2_int_while_rr rw, string asm, string op> { - def _B : sve2_int_while_rr<0b00, rw, asm, PPR8>; - def _H : sve2_int_while_rr<0b01, rw, asm, PPR16>; - def _S : sve2_int_while_rr<0b10, rw, asm, PPR32>; - def _D : sve2_int_while_rr<0b11, rw, asm, PPR64>; - - def : SVE_2_Op_Pat(op # _b), i64, i64, !cast(NAME # _B)>; - def : SVE_2_Op_Pat(op # _h), i64, i64, !cast(NAME # _H)>; - def : SVE_2_Op_Pat(op # _s), i64, i64, !cast(NAME # _S)>; - def : SVE_2_Op_Pat(op # _d), i64, i64, !cast(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Fast Reduction Group -//===----------------------------------------------------------------------===// - -class sve_fp_fast_red sz, bits<3> opc, string asm, - ZPRRegOp zprty, FPRasZPROperand dstOpType> -: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Vd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<5> Zn; - bits<5> Vd; - bits<3> Pg; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-19} = 0b000; - let Inst{18-16} = opc; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Vd; -} - -multiclass sve_fp_fast_red opc, string asm, SDPatternOperator op> { - def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16asZPR>; - def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>; - def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Accumulating Reduction Group -//===----------------------------------------------------------------------===// - -class sve_fp_2op_p_vd sz, bits<3> opc, string asm, - ZPRRegOp zprty, FPRasZPROperand dstOpType> -: I<(outs dstOpType:$Vdn), (ins PPR3bAny:$Pg, dstOpType:$_Vdn, zprty:$Zm), - asm, "\t$Vdn, $Pg, $_Vdn, $Zm", - "", - []>, - Sched<[]> { - bits<3> Pg; - bits<5> Vdn; - bits<5> Zm; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-19} = 0b011; - let Inst{18-16} = opc; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Vdn; - - let Constraints = "$Vdn = $_Vdn"; -} - -multiclass sve_fp_2op_p_vd opc, string asm, SDPatternOperator op> { - def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16asZPR>; - def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>; - def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Compare - Vectors Group -//===----------------------------------------------------------------------===// - -class sve_fp_3op_p_pd sz, bits<3> opc, string asm, PPRRegOp pprty, - ZPRRegOp zprty> -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm), - asm, "\t$Pd, $Pg/z, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21} = 0b0; - let Inst{20-16} = Zm; - let Inst{15} = opc{2}; - let Inst{14} = 0b1; - let Inst{13} = opc{1}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; -} - -multiclass sve_fp_3op_p_pd opc, string asm, SDPatternOperator op> { - def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>; - def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>; - def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_fp_3op_p_pd_cc opc, string asm, - CondCode cc1, CondCode cc2, - CondCode invcc1, CondCode invcc2> { - def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>; - def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>; - def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>; - - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _S)>; - defm : SVE_SETCC_Pat(NAME # _S)>; - defm : SVE_SETCC_Pat(NAME # _D)>; - - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _H)>; - defm : SVE_SETCC_Pat(NAME # _S)>; - defm : SVE_SETCC_Pat(NAME # _S)>; - defm : SVE_SETCC_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Compare - with Zero Group -//===----------------------------------------------------------------------===// - -class sve_fp_2op_p_pd sz, bits<3> opc, string asm, PPRRegOp pprty, - ZPRRegOp zprty> -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Pd, $Pg/z, $Zn, #0.0", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zn; - let Inst{31-24} = 0b01100101; - let Inst{23-22} = sz; - let Inst{21-18} = 0b0100; - let Inst{17-16} = opc{2-1}; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; -} - -multiclass sve_fp_2op_p_pd opc, string asm, - CondCode cc1, CondCode cc2, - CondCode invcc1, CondCode invcc2> { - def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>; - def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>; - def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>; - - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _D)>; - - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; - defm : SVE_SETCC_Pat_With_Zero(NAME # _D)>; -} - - -//===----------------------------------------------------------------------===// -//SVE Index Generation Group -//===----------------------------------------------------------------------===// - -def simm5_8b_tgt : TImmLeaf= -16 && (int8_t)Imm < 16; }]>; -def simm5_16b_tgt : TImmLeaf= -16 && (int16_t)Imm < 16; }]>; -def simm5_32b_tgt : TImmLeaf= -16 && (int32_t)Imm < 16; }]>; -def simm5_64b_tgt : TImmLeaf= -16 && (int64_t)Imm < 16; }]>; -def i64imm_32bit_tgt : TImmLeaf(Imm); -}]>; - -class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, - Operand imm_ty> -: I<(outs zprty:$Zd), (ins imm_ty:$imm5, imm_ty:$imm5b), - asm, "\t$Zd, $imm5, $imm5b", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> imm5; - bits<5> imm5b; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = imm5b; - let Inst{15-10} = 0b010000; - let Inst{9-5} = imm5; - let Inst{4-0} = Zd; -} - -multiclass sve_int_index_ii { - def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; - def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; - def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; - def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; - - def : Pat<(nxv16i8 (step_vector simm5_8b_tgt:$imm5b)), - (!cast(NAME # "_B") (i32 0), (!cast("trunc_imm") $imm5b))>; - def : Pat<(nxv8i16 (step_vector simm5_16b_tgt:$imm5b)), - (!cast(NAME # "_H") (i32 0), (!cast("trunc_imm") $imm5b))>; - def : Pat<(nxv4i32 (step_vector simm5_32b_tgt:$imm5b)), - (!cast(NAME # "_S") (i32 0), simm5_32b:$imm5b)>; - def : Pat<(nxv2i64 (step_vector simm5_64b_tgt:$imm5b)), - (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; -} - -class sve_int_index_ir sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType, Operand imm_ty> -: I<(outs zprty:$Zd), (ins imm_ty:$imm5, srcRegType:$Rm), - asm, "\t$Zd, $imm5, $Rm", - "", []>, Sched<[]> { - bits<5> Rm; - bits<5> Zd; - bits<5> imm5; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b010010; - let Inst{9-5} = imm5; - let Inst{4-0} = Zd; -} - -multiclass sve_int_index_ir { - def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; - def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; - def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; - def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; - - def : Pat<(nxv16i8 (step_vector i8:$imm)), - (!cast(NAME # "_B") (i32 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(nxv8i16 (step_vector i16:$imm)), - (!cast(NAME # "_H") (i32 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(nxv4i32 (step_vector i32:$imm)), - (!cast(NAME # "_S") (i32 0), (!cast("MOVi32imm") $imm))>; - def : Pat<(nxv2i64 (step_vector i64:$imm)), - (!cast(NAME # "_D") (i64 0), (!cast("MOVi64imm") $imm))>; - def : Pat<(nxv2i64 (step_vector i64imm_32bit_tgt:$imm)), - (!cast(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; - - // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), - (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - - // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; -} - -class sve_int_index_ri sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType, Operand imm_ty> -: I<(outs zprty:$Zd), (ins srcRegType:$Rn, imm_ty:$imm5), - asm, "\t$Zd, $Rn, $imm5", - "", []>, Sched<[]> { - bits<5> Rn; - bits<5> Zd; - bits<5> imm5; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = imm5; - let Inst{15-10} = 0b010001; - let Inst{9-5} = Rn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_index_ri { - def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; - def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; - def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; - def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), - (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; -} - -class sve_int_index_rr sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> -: I<(outs zprty:$Zd), (ins srcRegType:$Rn, srcRegType:$Rm), - asm, "\t$Zd, $Rn, $Rm", - "", []>, Sched<[]> { - bits<5> Zd; - bits<5> Rm; - bits<5> Rn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b010011; - let Inst{9-5} = Rn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_index_rr { - def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; - def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; - def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; - def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; - - // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Bitwise Shift - Predicated Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_pred_shift_imm tsz8_64, bits<4> opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<6> imm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = tsz8_64{3-2}; - let Inst{21-20} = 0b00; - let Inst{19-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-8} = tsz8_64{1-0}; - let Inst{7-5} = imm{2-0}; // imm3 - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveBinaryImm; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string Ps, - SDPatternOperator op = null_frag> { - def _B : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { - let Inst{8} = imm{3}; - } - def _S : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { - let Inst{9-8} = imm{4-3}; - } - def _D : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { - let Inst{22} = imm{5}; - let Inst{9-8} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -// As above but shift amount takes the form of a "vector immediate". -multiclass sve_int_bin_pred_shift_imm_left_dup opc, string asm, - string Ps, SDPatternOperator op> -: sve_int_bin_pred_shift_imm_left { - def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { - def _ZERO_B : PredTwoOpImmPseudo; - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; - - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; -} - -multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, - SDPatternOperator op = null_frag> { - def _B : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { - let Inst{8} = imm{3}; - } - def _S : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { - let Inst{9-8} = imm{4-3}; - } - def _D : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { - let Inst{22} = imm{5}; - let Inst{9-8} = imm{4-3}; - } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; -} - -// As above but shift amount takes the form of a "vector immediate". -multiclass sve_int_bin_pred_shift_imm_right_dup opc, string asm, - string Ps, SDPatternOperator op> -: sve_int_bin_pred_shift_imm_right { - def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { - def _ZERO_B : PredTwoOpImmPseudo; - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; - - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; -} - -class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, - string asm, ZPRRegOp zprty, ZPRRegOp zprty2> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty2:$Zm), - asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21-20} = 0b01; - let Inst{19} = wide; - let Inst{18-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_bin_pred_shift opc, string asm, string Ps, - SDPatternOperator op, string revname, bit isReverseInstr = 0> { - let DestructiveInstType = DestructiveBinaryCommWithRev in { - def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>, - SVEPseudo2Instr, SVEInstr2Rev; - def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>, - SVEPseudo2Instr, SVEInstr2Rev; - def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>, - SVEPseudo2Instr, SVEInstr2Rev; - def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>, - SVEPseudo2Instr, SVEInstr2Rev; - } - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_pred_zeroing_bhsd { - def _ZERO_B : PredTwoOpPseudo; - def _ZERO_H : PredTwoOpPseudo; - def _ZERO_S : PredTwoOpPseudo; - def _ZERO_D : PredTwoOpPseudo; - - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; -} - -multiclass sve_int_bin_pred_shift_wide opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; - def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>; - def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; -} - -//===----------------------------------------------------------------------===// -// SVE Shift - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_cons_shift_wide sz8_64, bits<2> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, ZPR64:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_64; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-12} = 0b1000; - let Inst{11-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_shift_wide opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>; - def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>; - def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>; - - def : SVE_2_Op_Pred_All_Active(NAME # _B)>; - def : SVE_2_Op_Pred_All_Active(NAME # _H)>; - def : SVE_2_Op_Pred_All_Active(NAME # _S)>; -} - -class sve_int_bin_cons_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), - asm, "\t$Zd, $Zn, $imm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<6> imm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = tsz8_64{3-2}; - let Inst{21} = 0b1; - let Inst{20-19} = tsz8_64{1-0}; - let Inst{18-16} = imm{2-0}; // imm3 - let Inst{15-12} = 0b1001; - let Inst{11-10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_shift_imm_left opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { - let Inst{19} = imm{3}; - } - def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; -} - -multiclass sve_int_bin_cons_shift_imm_right opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { - let Inst{19} = imm{3}; - } - def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { - let Inst{20-19} = imm{4-3}; - } - def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { - let Inst{22} = imm{5}; - let Inst{20-19} = imm{4-3}; - } - - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Memory - Store Group -//===----------------------------------------------------------------------===// - -class sve_mem_cst_si msz, bits<2> esz, string asm, - RegisterOperand VecList> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), - asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zt; - bits<4> imm4; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = msz; - let Inst{22-21} = esz; - let Inst{20} = 0; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_cst_si msz, bits<2> esz, string asm, - RegisterOperand listty, ZPRRegOp zprty> -{ - def NAME : sve_mem_cst_si; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_est_si sz, bits<2> nregs, RegisterOperand VecList, - string asm, Operand immtype> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), - asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zt; - bits<4> imm4; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = sz; - let Inst{22-21} = nregs; - let Inst{20} = 1; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_est_si sz, bits<2> nregs, RegisterOperand VecList, - string asm, Operand immtype> { - def NAME : sve_mem_est_si; - - def : InstAlias(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_est_ss sz, bits<2> nregs, RegisterOperand VecList, - string asm, RegisterOperand gprty> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = sz; - let Inst{22-21} = nregs; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b011; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -class sve_mem_cst_ss_base dtype, string asm, - RegisterOperand listty, RegisterOperand gprty> -: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-21} = dtype; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b010; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_cst_ss dtype, string asm, - RegisterOperand listty, ZPRRegOp zprty, - RegisterOperand gprty> { - def NAME : sve_mem_cst_ss_base; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; -} - -class sve_mem_cstnt_si msz, string asm, RegisterOperand VecList> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), - asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zt; - bits<4> imm4; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = msz; - let Inst{22-20} = 0b001; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_cstnt_si msz, string asm, RegisterOperand listty, - ZPRRegOp zprty> { - def NAME : sve_mem_cstnt_si; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; - def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_cstnt_ss_base msz, string asm, RegisterOperand listty, - RegisterOperand gprty> -: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = msz; - let Inst{22-21} = 0b00; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b011; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_cstnt_ss msz, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { - def NAME : sve_mem_cstnt_ss_base; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; -} - -class sve2_mem_sstnt_vs_base opc, string asm, - RegisterOperand listty, ZPRRegOp zprty> -: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), - asm, "\t$Zt, $Pg, [$Zn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Zn; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-22} = opc; - let Inst{21} = 0b0; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve2_mem_sstnt_vs_32_ptrs opc, string asm, - SDPatternOperator op, - ValueType vt> { - def _REAL : sve2_mem_sstnt_vs_base; - - def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; - def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; - - def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt), - (!cast(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>; -} - -multiclass sve2_mem_sstnt_vs_64_ptrs opc, string asm, - SDPatternOperator op, - ValueType vt> { - def _REAL : sve2_mem_sstnt_vs_base; - - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; - - def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt), - (!cast(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; -} - -class sve_mem_sst_sv opc, bit xs, bit scaled, string asm, - RegisterOperand VecList, RegisterOperand zprext> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$Zt, $Pg, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-22} = opc; - let Inst{21} = scaled; - let Inst{20-16} = Zm; - let Inst{15} = 0b1; - let Inst{14} = xs; - let Inst{13} = 0; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_32b_sst_sv_32_scaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt > { - def _UXTW_SCALED : sve_mem_sst_sv; - def _SXTW_SCALED : sve_mem_sst_sv; - - def : InstAlias(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt), - (!cast(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt), - (!cast(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -multiclass sve_mem_64b_sst_sv_32_scaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt > { - def _UXTW_SCALED : sve_mem_sst_sv; - def _SXTW_SCALED : sve_mem_sst_sv; - - def : InstAlias(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -multiclass sve_mem_64b_sst_sv_32_unscaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW : sve_mem_sst_sv; - def _SXTW : sve_mem_sst_sv; - - def : InstAlias(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -multiclass sve_mem_32b_sst_sv_32_unscaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW : sve_mem_sst_sv; - def _SXTW : sve_mem_sst_sv; - - def : InstAlias(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt), - (!cast(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt), - (!cast(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -class sve_mem_sst_sv2 msz, bit scaled, string asm, - RegisterOperand zprext> -: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$Zt, $Pg, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = msz; - let Inst{22} = 0b0; - let Inst{21} = scaled; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_sst_sv_64_scaled msz, string asm, - SDPatternOperator op, - RegisterOperand zprext, - ValueType vt> { - def _SCALED_REAL : sve_mem_sst_sv2; - - def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; - - def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt), - (!cast(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; -} - -multiclass sve_mem_sst_sv_64_unscaled msz, string asm, - SDPatternOperator op, - ValueType vt> { - def _REAL : sve_mem_sst_sv2; - - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; - - def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -class sve_mem_sst_vi opc, string asm, ZPRRegOp zprty, - RegisterOperand VecList, Operand imm_ty> -: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), - asm, "\t$Zt, $Pg, [$Zn, $imm5]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> imm5; - bits<5> Zn; - bits<5> Zt; - let Inst{31-25} = 0b1110010; - let Inst{24-23} = opc{2-1}; - let Inst{22} = 0b1; - let Inst{21} = opc{0}; - let Inst{20-16} = imm5; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_32b_sst_vi_ptrs opc, string asm, - Operand imm_ty, - SDPatternOperator op, - ValueType vt> { - def _IMM : sve_mem_sst_vi; - - def : InstAlias(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; - def : InstAlias(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; - def : InstAlias(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; - - def : Pat<(op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt), - (!cast(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; -} - -multiclass sve_mem_64b_sst_vi_ptrs opc, string asm, - Operand imm_ty, - SDPatternOperator op, - ValueType vt> { - def _IMM : sve_mem_sst_vi; - - def : InstAlias(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; - def : InstAlias(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; - def : InstAlias(NAME # _IMM) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; - - def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt), - (!cast(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; -} - -class sve_mem_z_spill -: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9), - asm, "\t$Zt, [$Rn, $imm9, mul vl]", - "", - []>, Sched<[]> { - bits<5> Rn; - bits<5> Zt; - bits<9> imm9; - let Inst{31-22} = 0b1110010110; - let Inst{21-16} = imm9{8-3}; - let Inst{15-13} = 0b010; - let Inst{12-10} = imm9{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayStore = 1; -} - -multiclass sve_mem_z_spill { - def NAME : sve_mem_z_spill; - - def : InstAlias(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_p_spill -: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), - asm, "\t$Pt, [$Rn, $imm9, mul vl]", - "", - []>, Sched<[]> { - bits<4> Pt; - bits<5> Rn; - bits<9> imm9; - let Inst{31-22} = 0b1110010110; - let Inst{21-16} = imm9{8-3}; - let Inst{15-13} = 0b000; - let Inst{12-10} = imm9{2-0}; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = Pt; - - let mayStore = 1; -} - -multiclass sve_mem_p_spill { - def NAME : sve_mem_p_spill; - - def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; -} - -//===----------------------------------------------------------------------===// -// SVE Permute - Predicates Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_bin_perm_pp opc, bits<2> sz8_64, string asm, - PPRRegOp pprty> -: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm), - asm, "\t$Pd, $Pn, $Pm", - "", []>, Sched<[]> { - bits<4> Pd; - bits<4> Pm; - bits<4> Pn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-20} = 0b10; - let Inst{19-16} = Pm; - let Inst{15-13} = 0b010; - let Inst{12-10} = opc; - let Inst{9} = 0b0; - let Inst{8-5} = Pn; - let Inst{4} = 0b0; - let Inst{3-0} = Pd; -} - -multiclass sve_int_perm_bin_perm_pp opc, string asm, - SDPatternOperator op> { - def _B : sve_int_perm_bin_perm_pp; - def _H : sve_int_perm_bin_perm_pp; - def _S : sve_int_perm_bin_perm_pp; - def _D : sve_int_perm_bin_perm_pp; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_punpk -: I<(outs PPR16:$Pd), (ins PPR8:$Pn), - asm, "\t$Pd, $Pn", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pn; - let Inst{31-17} = 0b000001010011000; - let Inst{16} = opc; - let Inst{15-9} = 0b0100000; - let Inst{8-5} = Pn; - let Inst{4} = 0b0; - let Inst{3-0} = Pd; -} - -multiclass sve_int_perm_punpk { - def NAME : sve_int_perm_punpk; - - def : SVE_1_Op_Pat(NAME)>; - def : SVE_1_Op_Pat(NAME)>; - def : SVE_1_Op_Pat(NAME)>; -} - -class sve_int_rdffr_pred -: I<(outs PPR8:$Pd), (ins PPRAny:$Pg), - asm, "\t$Pd, $Pg/z", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pg; - let Inst{31-23} = 0b001001010; - let Inst{22} = s; - let Inst{21-9} = 0b0110001111000; - let Inst{8-5} = Pg; - let Inst{4} = 0; - let Inst{3-0} = Pd; - - let Defs = !if(s, [NZCV], []); - let Uses = [FFR]; -} - -multiclass sve_int_rdffr_pred { - def _REAL : sve_int_rdffr_pred; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>, - PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>; - } -} - -class sve_int_rdffr_unpred : I< - (outs PPR8:$Pd), (ins), - asm, "\t$Pd", - "", - []>, Sched<[]> { - bits<4> Pd; - let Inst{31-4} = 0b0010010100011001111100000000; - let Inst{3-0} = Pd; - - let Uses = [FFR]; -} - -multiclass sve_int_rdffr_unpred { - def _REAL : sve_int_rdffr_unpred; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>, - PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd)>; - } -} - -class sve_int_wrffr -: I<(outs), (ins PPR8:$Pn), - asm, "\t$Pn", - "", - [(op (nxv16i1 PPR8:$Pn))]>, Sched<[]> { - bits<4> Pn; - let Inst{31-9} = 0b00100101001010001001000; - let Inst{8-5} = Pn; - let Inst{4-0} = 0b00000; - - let hasSideEffects = 1; - let Defs = [FFR]; -} - -class sve_int_setffr -: I<(outs), (ins), - asm, "", - "", - [(op)]>, Sched<[]> { - let Inst{31-0} = 0b00100101001011001001000000000000; - - let hasSideEffects = 1; - let Defs = [FFR]; -} - -//===----------------------------------------------------------------------===// -// SVE Permute Vector - Predicated Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_clast_rz sz8_64, bit ab, string asm, - ZPRRegOp zprty, RegisterClass rt> -: I<(outs rt:$Rdn), (ins PPR3bAny:$Pg, rt:$_Rdn, zprty:$Zm), - asm, "\t$Rdn, $Pg, $_Rdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rdn; - bits<5> Zm; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-17} = 0b11000; - let Inst{16} = ab; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Rdn; - - let Constraints = "$Rdn = $_Rdn"; -} - -multiclass sve_int_perm_clast_rz { - def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>; - def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>; - def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>; - def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_clast_vz sz8_64, bit ab, string asm, - ZPRRegOp zprty, RegisterClass rt> -: I<(outs rt:$Vdn), (ins PPR3bAny:$Pg, rt:$_Vdn, zprty:$Zm), - asm, "\t$Vdn, $Pg, $_Vdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Vdn; - bits<5> Zm; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-17} = 0b10101; - let Inst{16} = ab; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Vdn; - - let Constraints = "$Vdn = $_Vdn"; -} - -multiclass sve_int_perm_clast_vz { - def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>; - def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>; - def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>; - def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; -} - -class sve_int_perm_clast_zz sz8_64, bit ab, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-17} = 0b10100; - let Inst{16} = ab; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_perm_clast_zz { - def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>; - def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>; - def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>; - def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; -} - -class sve_int_perm_last_r sz8_64, bit ab, string asm, - ZPRRegOp zprty, RegisterClass resultRegType> -: I<(outs resultRegType:$Rd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Rd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rd; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-17} = 0b10000; - let Inst{16} = ab; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Rd; -} - -multiclass sve_int_perm_last_r { - def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>; - def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>; - def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>; - def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve_int_perm_last_v sz8_64, bit ab, string asm, - ZPRRegOp zprty, RegisterClass dstRegtype> -: I<(outs dstRegtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Vd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Vd; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-17} = 0b10001; - let Inst{16} = ab; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Vd; -} - -multiclass sve_int_perm_last_v { - def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>; - def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>; - def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>; - def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pat(NAME # _H)>; -} - -class sve_int_perm_splice sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $Pg, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zdn; - bits<5> Zm; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-13} = 0b101100100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeNone; -} - -multiclass sve_int_perm_splice { - def _B : sve_int_perm_splice<0b00, asm, ZPR8>; - def _H : sve_int_perm_splice<0b01, asm, ZPR16>; - def _S : sve_int_perm_splice<0b10, asm, ZPR32>; - def _D : sve_int_perm_splice<0b11, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_3_Op_Pat(NAME # _H)>; -} - -class sve2_int_perm_splice_cons sz8_64, string asm, - ZPRRegOp zprty, RegisterOperand VecList> -: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, VecList:$Zn), - asm, "\t$Zd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> Zd; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-13} = 0b101101100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_int_perm_splice_cons { - def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>; - def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>; - def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>; - def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>; -} - -class sve_int_perm_rev sz8_64, bits<2> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<3> Pg; - bits<5> Zn; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-18} = 0b1001; - let Inst{17-16} = opc; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_perm_rev_rbit { - def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>; - def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>; - def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>; - def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>; - - def : SVE_1_Op_Passthru_Pat(NAME # _B)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; -} - -multiclass sve_int_perm_rev_revb { - def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>; - def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>; - def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>; - - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; -} - -multiclass sve_int_perm_rev_revh { - def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>; - def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>; - - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; -} - -multiclass sve_int_perm_rev_revw { - def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>; - - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; -} - -class sve_int_perm_cpy_r sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegType:$Rn), - asm, "\t$Zd, $Pg/m, $Rn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zd; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-13} = 0b101000101; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_perm_cpy_r { - def _B : sve_int_perm_cpy_r<0b00, asm, ZPR8, GPR32sp>; - def _H : sve_int_perm_cpy_r<0b01, asm, ZPR16, GPR32sp>; - def _S : sve_int_perm_cpy_r<0b10, asm, ZPR32, GPR32sp>; - def _D : sve_int_perm_cpy_r<0b11, asm, ZPR64, GPR64sp>; - - def : InstAlias<"mov $Zd, $Pg/m, $Rn", - (!cast(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Rn", - (!cast(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Rn", - (!cast(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Rn", - (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>; - - def : Pat<(nxv16i8 (op nxv16i1:$pg, i32:$splat, nxv16i8:$passthru)), - (!cast(NAME # _B) $passthru, $pg, $splat)>; - def : Pat<(nxv8i16 (op nxv8i1:$pg, i32:$splat, nxv8i16:$passthru)), - (!cast(NAME # _H) $passthru, $pg, $splat)>; - def : Pat<(nxv4i32 (op nxv4i1:$pg, i32:$splat, nxv4i32:$passthru)), - (!cast(NAME # _S) $passthru, $pg, $splat)>; - def : Pat<(nxv2i64 (op nxv2i1:$pg, i64:$splat, nxv2i64:$passthru)), - (!cast(NAME # _D) $passthru, $pg, $splat)>; -} - -class sve_int_perm_cpy_v sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegtype> -: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegtype:$Vn), - asm, "\t$Zd, $Pg/m, $Vn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Vn; - bits<5> Zd; - let Inst{31-24} = 0b00000101; - let Inst{23-22} = sz8_64; - let Inst{21-13} = 0b100000100; - let Inst{12-10} = Pg; - let Inst{9-5} = Vn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_perm_cpy_v { - def _B : sve_int_perm_cpy_v<0b00, asm, ZPR8, FPR8>; - def _H : sve_int_perm_cpy_v<0b01, asm, ZPR16, FPR16>; - def _S : sve_int_perm_cpy_v<0b10, asm, ZPR32, FPR32>; - def _D : sve_int_perm_cpy_v<0b11, asm, ZPR64, FPR64>; - - def : InstAlias<"mov $Zd, $Pg/m, $Vn", - (!cast(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, FPR8:$Vn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Vn", - (!cast(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, FPR16:$Vn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Vn", - (!cast(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, FPR32:$Vn), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $Vn", - (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>; - - def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)), - (!cast(NAME # _H) $passthru, $pg, $splat)>; - def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)), - (!cast(NAME # _S) $passthru, $pg, $splat)>; - def : Pat<(nxv4f32 (op nxv4i1:$pg, f32:$splat, nxv4f32:$passthru)), - (!cast(NAME # _S) $passthru, $pg, $splat)>; - def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)), - (!cast(NAME # _D) $passthru, $pg, $splat)>; - - def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)), - (!cast(NAME # _H) $passthru, $pg, $splat)>; -} - -class sve_int_perm_compact -: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Zd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zd; - bits<5> Zn; - let Inst{31-23} = 0b000001011; - let Inst{22} = sz; - let Inst{21-13} = 0b100001100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_compact { - def _S : sve_int_perm_compact<0b0, asm, ZPR32>; - def _D : sve_int_perm_compact<0b1, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Memory - Contiguous Load Group -//===----------------------------------------------------------------------===// - -class sve_mem_cld_si_base dtype, bit nf, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), - asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zt; - bits<4> imm4; - let Inst{31-25} = 0b1010010; - let Inst{24-21} = dtype; - let Inst{20} = nf; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b101; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Uses = !if(nf, [FFR], []); - let Defs = !if(nf, [FFR], []); -} - -multiclass sve_mem_cld_si_base dtype, bit nf, string asm, - RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve_mem_cld_si_base; - - def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { - def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>; - } -} - -multiclass sve_mem_cld_si dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty> -: sve_mem_cld_si_base; - -class sve_mem_cldnt_si_base msz, string asm, RegisterOperand VecList> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), - asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<5> Zt; - bits<3> Pg; - bits<5> Rn; - bits<4> imm4; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = msz; - let Inst{22-20} = 0b000; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_cldnt_si msz, string asm, RegisterOperand listty, - ZPRRegOp zprty> { - def NAME : sve_mem_cldnt_si_base; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; - def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_cldnt_ss_base msz, string asm, RegisterOperand VecList, - RegisterOperand gprty> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - bits<5> Zt; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = msz; - let Inst{22-21} = 0b00; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b110; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_cldnt_ss msz, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { - def NAME : sve_mem_cldnt_ss_base; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; -} - -class sve_mem_ldqr_si sz, string asm, RegisterOperand VecList> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), - asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> { - bits<5> Zt; - bits<5> Rn; - bits<3> Pg; - bits<4> imm4; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-20} = 0; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_ldqr_si sz, string asm, RegisterOperand listty, - ZPRRegOp zprty> { - def NAME : sve_mem_ldqr_si; - def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), 0>; -} - -class sve_mem_ldqr_ss sz, string asm, RegisterOperand VecList, - RegisterOperand gprty> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> { - bits<5> Zt; - bits<3> Pg; - bits<5> Rn; - bits<5> Rm; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-21} = 0; - let Inst{20-16} = Rm; - let Inst{15-13} = 0; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_ldqr_ss sz, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { - def NAME : sve_mem_ldqr_ss; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; -} - -class sve_mem_ld_dup dtypeh, bits<2> dtypel, string asm, - RegisterOperand VecList, Operand immtype> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), - asm, "\t$Zt, $Pg/z, [$Rn, $imm6]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zt; - bits<6> imm6; - let Inst{31-25} = 0b1000010; - let Inst{24-23} = dtypeh; - let Inst{22} = 1; - let Inst{21-16} = imm6; - let Inst{15} = 0b1; - let Inst{14-13} = dtypel; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_ld_dup dtypeh, bits<2> dtypel, string asm, - RegisterOperand zlistty, ZPRRegOp zprty, Operand immtype> { - def NAME : sve_mem_ld_dup; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), 0>; - def : InstAlias(NAME) zlistty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_cld_ss_base dtype, bit ff, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, - asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<5> Zt; - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - let Inst{31-25} = 0b1010010; - let Inst{24-21} = dtype; - let Inst{20-16} = Rm; - let Inst{15-14} = 0b01; - let Inst{13} = ff; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Uses = !if(ff, [FFR], []); - let Defs = !if(ff, [FFR], []); -} - -multiclass sve_mem_cld_ss dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { - def "" : sve_mem_cld_ss_base; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; -} - -multiclass sve_mem_cldff_ss dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { - def _REAL : sve_mem_cld_ss_base; - - def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; - - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; - - def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm)>; - } -} - -multiclass sve_mem_cldnf_si dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty> -: sve_mem_cld_si_base; - -class sve_mem_eld_si sz, bits<2> nregs, RegisterOperand VecList, - string asm, Operand immtype> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), - asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", - "", - []>, Sched<[]> { - bits<5> Zt; - bits<3> Pg; - bits<5> Rn; - bits<4> imm4; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-21} = nregs; - let Inst{20} = 0; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_eld_si sz, bits<2> nregs, RegisterOperand VecList, - string asm, Operand immtype> { - def NAME : sve_mem_eld_si; - - def : InstAlias(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_eld_ss sz, bits<2> nregs, RegisterOperand VecList, - string asm, RegisterOperand gprty> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Rn; - bits<5> Zt; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-21} = nregs; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b110; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -//===----------------------------------------------------------------------===// -// SVE Memory - 32-bit Gather and Unsized Contiguous Group -//===----------------------------------------------------------------------===// - -// bit xs is '1' if offsets are signed -// bit scaled is '1' if the offsets are scaled -class sve_mem_32b_gld_sv opc, bit xs, bit scaled, string asm, - RegisterOperand zprext> -: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<5> Zt; - let Inst{31-25} = 0b1000010; - let Inst{24-23} = opc{3-2}; - let Inst{22} = xs; - let Inst{21} = scaled; - let Inst{20-16} = Zm; - let Inst{15} = 0b0; - let Inst{14-13} = opc{1-0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Defs = !if(!eq(opc{0}, 1), [FFR], []); - let Uses = !if(!eq(opc{0}, 1), [FFR], []); -} - -multiclass sve_mem_32b_gld_sv_32_scaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv; - def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv; - - def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } - - def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)), - (!cast(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; - def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)), - (!cast(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; -} - -multiclass sve_mem_32b_gld_vs_32_unscaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW_REAL : sve_mem_32b_gld_sv; - def _SXTW_REAL : sve_mem_32b_gld_sv; - - def : InstAlias(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } - - def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)), - (!cast(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)), - (!cast(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - - -class sve_mem_32b_gld_vi opc, string asm, Operand imm_ty> -: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), - asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> Zt; - bits<5> imm5; - let Inst{31-25} = 0b1000010; - let Inst{24-23} = opc{3-2}; - let Inst{22-21} = 0b01; - let Inst{20-16} = imm5; - let Inst{15} = 0b1; - let Inst{14-13} = opc{1-0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Defs = !if(!eq(opc{0}, 1), [FFR], []); - let Uses = !if(!eq(opc{0}, 1), [FFR], []); -} - -multiclass sve_mem_32b_gld_vi_32_ptrs opc, string asm, Operand imm_ty, - SDPatternOperator op, ValueType vt> { - def _IMM_REAL : sve_mem_32b_gld_vi; - - def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; - def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; - def : InstAlias(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _IMM : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), []>, - PseudoInstExpansion<(!cast(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5)>; - } - - def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)), - (!cast(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; -} - -class sve_mem_prfm_si msz, string asm> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, simm6s1:$imm6), - asm, "\t$prfop, $Pg, [$Rn, $imm6, mul vl]", - "", - []>, Sched<[]> { - bits<5> Rn; - bits<3> Pg; - bits<6> imm6; - bits<4> prfop; - let Inst{31-22} = 0b1000010111; - let Inst{21-16} = imm6; - let Inst{15} = 0b0; - let Inst{14-13} = msz; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; - - let hasSideEffects = 1; -} - -multiclass sve_mem_prfm_si msz, string asm> { - def NAME : sve_mem_prfm_si; - - def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_prfm_ss opc, string asm, RegisterOperand gprty> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$prfop, $Pg, [$Rn, $Rm]", - "", - []>, Sched<[]> { - bits<5> Rm; - bits<5> Rn; - bits<3> Pg; - bits<4> prfop; - let Inst{31-25} = 0b1000010; - let Inst{24-23} = opc{2-1}; - let Inst{22-21} = 0b00; - let Inst{20-16} = Rm; - let Inst{15} = 0b1; - let Inst{14} = opc{0}; - let Inst{13} = 0b0; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; - - let hasSideEffects = 1; -} - -class sve_mem_32b_prfm_sv msz, bit xs, string asm, - RegisterOperand zprext> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$prfop, $Pg, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<4> prfop; - let Inst{31-23} = 0b100001000; - let Inst{22} = xs; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15} = 0b0; - let Inst{14-13} = msz; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; - - let hasSideEffects = 1; -} - -multiclass sve_mem_32b_prfm_sv_scaled msz, string asm, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - SDPatternOperator op_sxtw, - SDPatternOperator op_uxtw> { - def _UXTW_SCALED : sve_mem_32b_prfm_sv; - def _SXTW_SCALED : sve_mem_32b_prfm_sv; - - def : Pat<(op_uxtw (nxv4i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv4i32 uxtw_opnd:$Zm), (i32 sve_prfop:$prfop)), - (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - - def : Pat<(op_sxtw (nxv4i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv4i32 sxtw_opnd:$Zm), (i32 sve_prfop:$prfop)), - (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; -} - -class sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), - asm, "\t$prfop, $Pg, [$Zn, $imm5]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> imm5; - bits<4> prfop; - let Inst{31-25} = 0b1000010; - let Inst{24-23} = msz; - let Inst{22-21} = 0b00; - let Inst{20-16} = imm5; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; -} - -multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty, SDPatternOperator op> { - def NAME : sve_mem_32b_prfm_vi; - - def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; - - def : Pat<(op (nxv4i1 PPR_3b:$Pg), (nxv4i32 ZPR32:$Zn), (i64 imm_ty:$imm), (i32 sve_prfop:$prfop)), - (!cast(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>; -} - -class sve_mem_z_fill -: I<(outs ZPRAny:$Zt), (ins GPR64sp:$Rn, simm9:$imm9), - asm, "\t$Zt, [$Rn, $imm9, mul vl]", - "", - []>, Sched<[]> { - bits<5> Rn; - bits<5> Zt; - bits<9> imm9; - let Inst{31-22} = 0b1000010110; - let Inst{21-16} = imm9{8-3}; - let Inst{15-13} = 0b010; - let Inst{12-10} = imm9{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_z_fill { - def NAME : sve_mem_z_fill; - - def : InstAlias(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; -} - -class sve_mem_p_fill -: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), - asm, "\t$Pt, [$Rn, $imm9, mul vl]", - "", - []>, Sched<[]> { - bits<4> Pt; - bits<5> Rn; - bits<9> imm9; - let Inst{31-22} = 0b1000010110; - let Inst{21-16} = imm9{8-3}; - let Inst{15-13} = 0b000; - let Inst{12-10} = imm9{2-0}; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = Pt; - - let mayLoad = 1; -} - -multiclass sve_mem_p_fill { - def NAME : sve_mem_p_fill; - - def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; -} - -class sve2_mem_gldnt_vs_base opc, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, - asm, "\t$Zt, $Pg/z, [$Zn, $Rm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rm; - bits<5> Zn; - bits<5> Zt; - let Inst{31} = 0b1; - let Inst{30} = opc{4}; - let Inst{29-25} = 0b00010; - let Inst{24-23} = opc{3-2}; - let Inst{22-21} = 0b00; - let Inst{20-16} = Rm; - let Inst{15} = 0b1; - let Inst{14-13} = opc{1-0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve2_mem_gldnt_vs_32_ptrs opc, string asm, - SDPatternOperator op, - ValueType vt> { - def _REAL : sve2_mem_gldnt_vs_base; - - def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; - def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; - - def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)), - (!cast(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>; -} - -multiclass sve2_mem_gldnt_vs_64_ptrs opc, string asm, - SDPatternOperator op, - ValueType vt> { - def _REAL : sve2_mem_gldnt_vs_base; - - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; - - def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)), - (!cast(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Memory - 64-bit Gather Group -//===----------------------------------------------------------------------===// - -// bit xs is '1' if offsets are signed -// bit scaled is '1' if the offsets are scaled -// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) -class sve_mem_64b_gld_sv opc, bit xs, bit scaled, bit lsl, string asm, - RegisterOperand zprext> -: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<5> Zt; - let Inst{31-25} = 0b1100010; - let Inst{24-23} = opc{3-2}; - let Inst{22} = xs; - let Inst{21} = scaled; - let Inst{20-16} = Zm; - let Inst{15} = lsl; - let Inst{14-13} = opc{1-0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Defs = !if(!eq(opc{0}, 1), [FFR], []); - let Uses = !if(!eq(opc{0}, 1), [FFR], []); -} - -multiclass sve_mem_64b_gld_sv_32_scaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv; - def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv; - - def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } - - def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), - (!cast(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; - def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), - (!cast(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; -} - -multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, - SDPatternOperator sxtw_op, - SDPatternOperator uxtw_op, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - ValueType vt> { - def _UXTW_REAL : sve_mem_64b_gld_sv; - def _SXTW_REAL : sve_mem_64b_gld_sv; - - def : InstAlias(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; - def : InstAlias(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } - - def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), - (!cast(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; - def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), - (!cast(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -multiclass sve_mem_64b_gld_sv2_64_scaled opc, string asm, - SDPatternOperator op, - RegisterOperand zprext, ValueType vt> { - def _SCALED_REAL : sve_mem_64b_gld_sv; - - def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>; - } - - def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), - (!cast(NAME # _SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; -} - -multiclass sve_mem_64b_gld_vs2_64_unscaled opc, string asm, - SDPatternOperator op, ValueType vt> { - def _REAL : sve_mem_64b_gld_sv; - - def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm)>; - } - - def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), - (!cast(NAME) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; -} - -class sve_mem_64b_gld_vi opc, string asm, Operand imm_ty> -: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), - asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> Zt; - bits<5> imm5; - let Inst{31-25} = 0b1100010; - let Inst{24-23} = opc{3-2}; - let Inst{22-21} = 0b01; - let Inst{20-16} = imm5; - let Inst{15} = 0b1; - let Inst{14-13} = opc{1-0}; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zt; - - let mayLoad = 1; - let Defs = !if(!eq(opc{0}, 1), [FFR], []); - let Uses = !if(!eq(opc{0}, 1), [FFR], []); -} - -multiclass sve_mem_64b_gld_vi_64_ptrs opc, string asm, Operand imm_ty, - SDPatternOperator op, ValueType vt> { - def _IMM_REAL : sve_mem_64b_gld_vi; - - def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; - def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; - def : InstAlias(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _IMM : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), []>, - PseudoInstExpansion<(!cast(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5)>; - } - - def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)), - (!cast(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; -} - -// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) -class sve_mem_64b_prfm_sv msz, bit xs, bit lsl, string asm, - RegisterOperand zprext> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), - asm, "\t$prfop, $Pg, [$Rn, $Zm]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Rn; - bits<5> Zm; - bits<4> prfop; - let Inst{31-23} = 0b110001000; - let Inst{22} = xs; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15} = lsl; - let Inst{14-13} = msz; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; - - let hasSideEffects = 1; -} - -multiclass sve_mem_64b_prfm_sv_ext_scaled msz, string asm, - RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd, - SDPatternOperator op_sxtw, - SDPatternOperator op_uxtw> { - def _UXTW_SCALED : sve_mem_64b_prfm_sv; - def _SXTW_SCALED : sve_mem_64b_prfm_sv; - - def : Pat<(op_uxtw (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 uxtw_opnd:$Zm), (i32 sve_prfop:$prfop)), - (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - - def : Pat<(op_sxtw (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 sxtw_opnd:$Zm), (i32 sve_prfop:$prfop)), - (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - -} - -multiclass sve_mem_64b_prfm_sv_lsl_scaled msz, string asm, - RegisterOperand zprext, SDPatternOperator frag> { - def NAME : sve_mem_64b_prfm_sv; - - def : Pat<(frag (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 zprext:$Zm), (i32 sve_prfop:$prfop)), - (!cast(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>; - -} - -class sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty> -: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), - asm, "\t$prfop, $Pg, [$Zn, $imm5]", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zn; - bits<5> imm5; - bits<4> prfop; - let Inst{31-25} = 0b1100010; - let Inst{24-23} = msz; - let Inst{22-21} = 0b00; - let Inst{20-16} = imm5; - let Inst{15-13} = 0b111; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = 0b0; - let Inst{3-0} = prfop; - - let hasSideEffects = 1; -} - -multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty, SDPatternOperator op> { - def NAME : sve_mem_64b_prfm_vi; - - def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; - - def : Pat<(op (nxv2i1 PPR_3b:$Pg), (nxv2i64 ZPR32:$Zn), (i64 imm_ty:$imm), (i32 sve_prfop:$prfop)), - (!cast(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>; -} - -//===----------------------------------------------------------------------===// -// SVE Compute Vector Address Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_cons_misc_0_a opc, bits<2> msz, string asm, - ZPRRegOp zprty, RegisterOperand zprext> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprext:$Zm), - asm, "\t$Zd, [$Zn, $Zm]", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-12} = 0b1010; - let Inst{11-10} = msz; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_misc_0_a_uxtw opc, string asm> { - def _0 : sve_int_bin_cons_misc_0_a; - def _1 : sve_int_bin_cons_misc_0_a; - def _2 : sve_int_bin_cons_misc_0_a; - def _3 : sve_int_bin_cons_misc_0_a; -} - -multiclass sve_int_bin_cons_misc_0_a_sxtw opc, string asm> { - def _0 : sve_int_bin_cons_misc_0_a; - def _1 : sve_int_bin_cons_misc_0_a; - def _2 : sve_int_bin_cons_misc_0_a; - def _3 : sve_int_bin_cons_misc_0_a; -} - -multiclass sve_int_bin_cons_misc_0_a_32_lsl opc, string asm> { - def _0 : sve_int_bin_cons_misc_0_a; - def _1 : sve_int_bin_cons_misc_0_a; - def _2 : sve_int_bin_cons_misc_0_a; - def _3 : sve_int_bin_cons_misc_0_a; -} - -multiclass sve_int_bin_cons_misc_0_a_64_lsl opc, string asm> { - def _0 : sve_int_bin_cons_misc_0_a; - def _1 : sve_int_bin_cons_misc_0_a; - def _2 : sve_int_bin_cons_misc_0_a; - def _3 : sve_int_bin_cons_misc_0_a; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Misc - Unpredicated Group -//===----------------------------------------------------------------------===// - -class sve_int_bin_cons_misc_0_b sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b101100; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_misc_0_b { - def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>; - def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>; - def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve_int_bin_cons_misc_0_c opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn), - asm, "\t$Zd, $Zn", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = opc{7-6}; - let Inst{21} = 0b1; - let Inst{20-16} = opc{5-1}; - let Inst{15-11} = 0b10111; - let Inst{10} = opc{0}; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_bin_cons_misc_0_c_fexpa { - def _H : sve_int_bin_cons_misc_0_c<0b01000000, asm, ZPR16>; - def _S : sve_int_bin_cons_misc_0_c<0b10000000, asm, ZPR32>; - def _D : sve_int_bin_cons_misc_0_c<0b11000000, asm, ZPR64>; - - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Reduction Group -//===----------------------------------------------------------------------===// - -class sve_int_reduce sz8_32, bits<2> fmt, bits<3> opc, string asm, - ZPRRegOp zprty, FPRasZPROperand dstOpType> -: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), - asm, "\t$Vd, $Pg, $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Vd; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_32; - let Inst{21} = 0b0; - let Inst{20-19} = fmt; - let Inst{18-16} = opc; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Vd; -} - -multiclass sve_int_reduce_0_saddv opc, string asm, - SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; - def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; - def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; -} - -multiclass sve_int_reduce_0_uaddv opc, string asm, - SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; - def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; - def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; - def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_reduce_1 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>; - def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>; - def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>; - def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_reduce_2 opc, string asm, - SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>; - def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>; - def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>; - def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -class sve_int_movprfx_pred sz8_32, bits<3> opc, string asm, - ZPRRegOp zprty, string pg_suffix, dag iops> -: I<(outs zprty:$Zd), iops, - asm, "\t$Zd, $Pg"#pg_suffix#", $Zn", - "", - []>, Sched<[]> { - bits<3> Pg; - bits<5> Zd; - bits<5> Zn; - let Inst{31-24} = 0b00000100; - let Inst{23-22} = sz8_32; - let Inst{21-19} = 0b010; - let Inst{18-16} = opc; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let ElementSize = zprty.ElementSize; -} - -multiclass sve_int_movprfx_pred_merge opc, string asm> { -let Constraints = "$Zd = $_Zd" in { - def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m", - (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>; - def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m", - (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>; - def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m", - (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>; - def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m", - (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>; -} -} - -multiclass sve_int_movprfx_pred_zero opc, string asm> { - def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z", - (ins PPR3bAny:$Pg, ZPR8:$Zn)>; - def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z", - (ins PPR3bAny:$Pg, ZPR16:$Zn)>; - def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z", - (ins PPR3bAny:$Pg, ZPR32:$Zn)>; - def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z", - (ins PPR3bAny:$Pg, ZPR64:$Zn)>; -} - -//===----------------------------------------------------------------------===// -// SVE Propagate Break Group -//===----------------------------------------------------------------------===// - -class sve_int_brkp opc, string asm> -: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), - asm, "\t$Pd, $Pg/z, $Pn, $Pm", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pg; - bits<4> Pm; - bits<4> Pn; - let Inst{31-24} = 0b00100101; - let Inst{23} = 0b0; - let Inst{22} = opc{1}; - let Inst{21-20} = 0b00; - let Inst{19-16} = Pm; - let Inst{15-14} = 0b11; - let Inst{13-10} = Pg; - let Inst{9} = 0b0; - let Inst{8-5} = Pn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Defs = !if(!eq (opc{1}, 1), [NZCV], []); -} - -multiclass sve_int_brkp opc, string asm, SDPatternOperator op> { - def NAME : sve_int_brkp; - - def : SVE_3_Op_Pat(NAME)>; -} - - -//===----------------------------------------------------------------------===// -// SVE Partition Break Group -//===----------------------------------------------------------------------===// - -class sve_int_brkn -: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm), - asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm", - "", - []>, Sched<[]> { - bits<4> Pdm; - bits<4> Pg; - bits<4> Pn; - let Inst{31-23} = 0b001001010; - let Inst{22} = S; - let Inst{21-14} = 0b01100001; - let Inst{13-10} = Pg; - let Inst{9} = 0b0; - let Inst{8-5} = Pn; - let Inst{4} = 0b0; - let Inst{3-0} = Pdm; - - let Constraints = "$Pdm = $_Pdm"; - let Defs = !if(S, [NZCV], []); -} - -multiclass sve_int_brkn opc, string asm, SDPatternOperator op> { - def NAME : sve_int_brkn; - - def : SVE_3_Op_Pat(NAME)>; -} - -class sve_int_break opc, string asm, string suffix, dag iops> -: I<(outs PPR8:$Pd), iops, - asm, "\t$Pd, $Pg"#suffix#", $Pn", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<4> Pg; - bits<4> Pn; - let Inst{31-24} = 0b00100101; - let Inst{23-22} = opc{2-1}; - let Inst{21-14} = 0b01000001; - let Inst{13-10} = Pg; - let Inst{9} = 0b0; - let Inst{8-5} = Pn; - let Inst{4} = opc{0}; - let Inst{3-0} = Pd; - - let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", ""); - let Defs = !if(!eq (opc{1}, 1), [NZCV], []); - -} - -multiclass sve_int_break_m opc, string asm, SDPatternOperator op> { - def NAME : sve_int_break; - - def : SVE_3_Op_Pat(NAME)>; -} - -multiclass sve_int_break_z opc, string asm, SDPatternOperator op> { - def NAME : sve_int_break; - - def : SVE_2_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 String Processing Group -//===----------------------------------------------------------------------===// - -class sve2_char_match -: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm), - asm, "\t$Pd, $Pg/z, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<4> Pd; - bits<3> Pg; - bits<5> Zm; - bits<5> Zn; - let Inst{31-23} = 0b010001010; - let Inst{22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b100; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4} = opc; - let Inst{3-0} = Pd; - - let Defs = [NZCV]; - let isPTestLike = 1; -} - -multiclass sve2_char_match { - def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>; - def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>; - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Histogram Computation - Segment Group -//===----------------------------------------------------------------------===// - -class sve2_hist_gen_segment -: I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - [(set nxv16i8:$Zd, (op nxv16i8:$Zn, nxv16i8:$Zm))]>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-21} = 0b01000101001; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b101000; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -//===----------------------------------------------------------------------===// -// SVE2 Histogram Computation - Vector Group -//===----------------------------------------------------------------------===// - -class sve2_hist_gen_vector -: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Pg/z, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<3> Pg; - bits<5> Zm; - let Inst{31-23} = 0b010001011; - let Inst{22} = sz; - let Inst{21} = 0b1; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b110; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_hist_gen_vector { - def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>; - def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -//===----------------------------------------------------------------------===// -// SVE2 Crypto Extensions Group -//===----------------------------------------------------------------------===// - -class sve2_crypto_cons_bin_op -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zn; - bits<5> Zm; - let Inst{31-21} = 0b01000101001; - let Inst{20-16} = Zm; - let Inst{15-11} = 0b11110; - let Inst{10} = opc; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve2_crypto_cons_bin_op { - def NAME : sve2_crypto_cons_bin_op; - def : SVE_2_Op_Pat(NAME)>; -} - -class sve2_crypto_des_bin_op opc, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm), - asm, "\t$Zdn, $_Zdn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zdn; - bits<5> Zm; - let Inst{31-17} = 0b010001010010001; - let Inst{16} = opc{1}; - let Inst{15-11} = 0b11100; - let Inst{10} = opc{0}; - let Inst{9-5} = Zm; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; -} - -multiclass sve2_crypto_des_bin_op opc, string asm, ZPRRegOp zprty, - SDPatternOperator op, ValueType vt> { - def NAME : sve2_crypto_des_bin_op; - def : SVE_2_Op_Pat(NAME)>; -} - -class sve2_crypto_unary_op -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn), - asm, "\t$Zdn, $_Zdn", - "", - []>, Sched<[]> { - bits<5> Zdn; - let Inst{31-11} = 0b010001010010000011100; - let Inst{10} = opc; - let Inst{9-5} = 0b00000; - let Inst{4-0} = Zdn; - - let Constraints = "$Zdn = $_Zdn"; -} - -multiclass sve2_crypto_unary_op { - def NAME : sve2_crypto_unary_op; - def : SVE_1_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE BFloat16 Group -//===----------------------------------------------------------------------===// - -class sve_bfloat_dot_base opc, string asm, string ops, dag iops> -: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - let Inst{31-21} = 0b01100100011; - let Inst{15-14} = opc; - let Inst{13-10} = 0b0000; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeH; -} - -class sve_bfloat_dot -: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm", - (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> { - bits<5> Zm; - let Inst{20-16} = Zm; -} - -multiclass sve_bfloat_dot { - def NAME : sve_bfloat_dot; - def : SVE_3_Op_Pat(NAME)>; -} - -class sve_bfloat_dot_indexed -: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop", - (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> { - bits<2> iop; - bits<3> Zm; - let Inst{20-19} = iop; - let Inst{18-16} = Zm; -} - -multiclass sve_bfloat_dot_indexed { - def NAME : sve_bfloat_dot_indexed; - def : SVE_4_Op_Imm_Pat(NAME)>; -} - -class sve_bfloat_matmul -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), - asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zm; - bits<5> Zda; - bits<5> Zn; - let Inst{31-21} = 0b01100100011; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b111001; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeH; -} - -multiclass sve_bfloat_matmul { - def NAME : sve_bfloat_matmul; - def : SVE_3_Op_Pat(NAME)>; -} - -class sve_bfloat_matmul_longvecl -: sve_bfloat_matmul { - let Inst{23} = 0b1; - let Inst{14-13} = 0b00; - let Inst{10} = BT; -} - -multiclass sve_bfloat_matmul_longvecl { - def NAME : sve_bfloat_matmul_longvecl; - def : SVE_3_Op_Pat(NAME)>; -} - -class sve_bfloat_matmul_longvecl_idx -: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop", - (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> { - bits<3> iop; - bits<3> Zm; - let Inst{23} = 0b1; - let Inst{20-19} = iop{2-1}; - let Inst{18-16} = Zm; - let Inst{11} = iop{0}; - let Inst{10} = BT; -} - -multiclass sve_bfloat_matmul_longvecl_idx { - def NAME : sve_bfloat_matmul_longvecl_idx; - def : SVE_4_Op_Imm_Pat(NAME)>; -} - -class sve_bfloat_convert -: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn), - asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { - bits<5> Zd; - bits<3> Pg; - bits<5> Zn; - let Inst{31-25} = 0b0110010; - let Inst{24} = N; - let Inst{23-13} = 0b10001010101; - let Inst{12-10} = Pg; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; - - let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; - let hasSideEffects = 1; - let ElementSize = ElementSizeS; -} - -multiclass sve_bfloat_convert { - def NAME : sve_bfloat_convert; - def : SVE_3_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Matrix Multiply Group -//===----------------------------------------------------------------------===// - -class sve_int_matmul uns, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm, - "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = uns; - let Inst{21} = 0; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b100110; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ZPR32.ElementSize; -} - -multiclass sve_int_matmul uns, string asm, SDPatternOperator op> { - def NAME : sve_int_matmul; - - def : SVE_3_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Dot Product Mixed Sign Group -//===----------------------------------------------------------------------===// - -class sve_int_dot_mixed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm, - "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-21} = 0b01000100100; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b011110; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ZPR32.ElementSize; -} - -multiclass sve_int_dot_mixed { - def NAME : sve_int_dot_mixed; - - def : SVE_3_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Integer Dot Product Mixed Sign - Indexed Group -//===----------------------------------------------------------------------===// - -class sve_int_dot_mixed_indexed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS32b:$idx), - asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<3> Zm; - bits<2> idx; - let Inst{31-21} = 0b01000100101; - let Inst{20-19} = idx; - let Inst{18-16} = Zm; - let Inst{15-11} = 0b00011; - let Inst{10} = U; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ZPR32.ElementSize; -} - -multiclass sve_int_dot_mixed_indexed { - def NAME : sve_int_dot_mixed_indexed; - - def : SVE_4_Op_Imm_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Floating Point Matrix Multiply Accumulate Group -//===----------------------------------------------------------------------===// - -class sve_fp_matrix_mla -: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm), - asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zda; - bits<5> Zn; - bits<5> Zm; - let Inst{31-23} = 0b011001001; - let Inst{22} = sz; - let Inst{21} = 1; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b111001; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; -} - -multiclass sve_fp_matrix_mla { - def NAME : sve_fp_matrix_mla; - - def : SVE_3_Op_Pat(NAME)>; -} - -//===----------------------------------------------------------------------===// -// SVE Memory - Contiguous Load And Replicate 256-bit Group -//===----------------------------------------------------------------------===// - -class sve_mem_ldor_si sz, string asm, RegisterOperand VecList> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), - asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> { - bits<5> Zt; - bits<5> Rn; - bits<3> Pg; - bits<4> imm4; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-20} = 0b010; - let Inst{19-16} = imm4; - let Inst{15-13} = 0b001; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_ldor_si sz, string asm, RegisterOperand listty, - ZPRRegOp zprty, ValueType Ty, ValueType PredTy, SDNode Ld1ro> { - def NAME : sve_mem_ldor_si; - def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), 0>; - - // Base addressing mode - def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), GPR64sp:$base)), - (!cast(NAME) PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; - let AddedComplexity = 2 in { - // Reg + Imm addressing mode - def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), (add GPR64:$base, (i64 simm4s32:$imm)))), - (!cast(NAME) $Pg, $base, simm4s32:$imm)>; - } -} - -class sve_mem_ldor_ss sz, string asm, RegisterOperand VecList, - RegisterOperand gprty> -: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), - asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> { - bits<5> Zt; - bits<3> Pg; - bits<5> Rn; - bits<5> Rm; - let Inst{31-25} = 0b1010010; - let Inst{24-23} = sz; - let Inst{22-21} = 0b01; - let Inst{20-16} = Rm; - let Inst{15-13} = 0; - let Inst{12-10} = Pg; - let Inst{9-5} = Rn; - let Inst{4-0} = Zt; - - let mayLoad = 1; -} - -multiclass sve_mem_ldor_ss sz, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty, ValueType Ty, - ValueType PredTy, SDNode Ld1ro, ComplexPattern AddrCP> { - def NAME : sve_mem_ldor_ss; - - def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; - - def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))), - (!cast(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>; -} - -//===----------------------------------------------------------------------===// -// SVE Interleave 128-bit Elements Group -//===----------------------------------------------------------------------===// - -class sve_int_perm_bin_perm_128_zz opc, bit P, string asm> -: I<(outs ZPR128:$Zd), (ins ZPR128:$Zn, ZPR128:$Zm), - asm, "\t$Zd, $Zn, $Zm", - "", - []>, Sched<[]> { - bits<5> Zd; - bits<5> Zm; - bits<5> Zn; - let Inst{31-21} = 0b00000101101; - let Inst{20-16} = Zm; - let Inst{15-13} = 0b000; - let Inst{12-11} = opc; - let Inst{10} = P; - let Inst{9-5} = Zn; - let Inst{4-0} = Zd; -} - -multiclass sve_int_perm_bin_perm_128_zz opc, bit P, string asm, SDPatternOperator op> { - def NAME : sve_int_perm_bin_perm_128_zz; - - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; -} - -/// Addressing modes -def am_sve_indexed_s4 :ComplexPattern", [], [SDNPWantRoot]>; -def am_sve_indexed_s6 :ComplexPattern", [], [SDNPWantRoot]>; - -def am_sve_regreg_lsl0 : ComplexPattern", []>; -def am_sve_regreg_lsl1 : ComplexPattern", []>; -def am_sve_regreg_lsl2 : ComplexPattern", []>; -def am_sve_regreg_lsl3 : ComplexPattern", []>; - -// Predicated pseudo floating point two operand instructions. -multiclass sve_fp_bin_pred_hfd { - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; -} - -// Predicated pseudo integer two operand instructions. -multiclass sve_int_bin_pred_bhsd { - def _UNDEF_B : PredTwoOpPseudo; - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - - def : SVE_3_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; -} - -// As sve_int_bin_pred but when only i32 and i64 vector types are required. -multiclass sve_int_bin_pred_sd { - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; -} - -// Predicated pseudo integer two operand instructions. Second operand is an -// immediate specified by imm_[bhsd]. -multiclass sve_int_shift_pred_bhsd { - def _UNDEF_B : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_H : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_S : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_D : PredTwoOpImmPseudo, FalseLanesUndef>; - - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_D)>; -} - -multiclass sve_int_bin_pred_all_active_bhsd { - def _UNDEF_B : PredTwoOpPseudo; - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_B)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_H)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_S)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_D)>; -} - diff --git a/suite/synctools/tablegen/ARM/ARM-digit.td b/suite/synctools/tablegen/ARM/ARM-digit.td deleted file mode 100644 index d915f49bee..0000000000 --- a/suite/synctools/tablegen/ARM/ARM-digit.td +++ /dev/null @@ -1,1098 +0,0 @@ -//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// ARM Subtarget state. -// - -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", - "true", "Thumb mode">; - -def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", - "true", "Use software floating " - "point features.">; - - -//===----------------------------------------------------------------------===// -// ARM Subtarget features. -// - -// Floating Point, HW Division and Neon Support -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; - -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; - -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision " - "floating point">; - -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; - -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision " - "floating point", - [FeatureFPARMv8]>; - -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports " - "single precision only">; - -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; - -def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasHardwareDivideInThumb", "true", - "Enable divide instructions in Thumb">; - -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", - "Enable divide instructions in ARM mode">; - -// Atomic Support -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb/dsb) instructions">; - -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; - -def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", - "Has full data barrier (dfb) instruction">; - -def FeatureAcquireRelease : SubtargetFeature<"acquire-release", - "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex " - " etc) instructions">; - - -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", - "FP compare + branch is slow">; - -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance " - "Monitor extensions">; - - -// TrustZone Security Extensions -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone " - "security extensions">; - -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M " - "Security Extensions">; - -def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", - "Enable SHA1 and SHA256 support", [FeatureNEON]>; - -def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", - "Enable AES support", [FeatureNEON]>; - -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for " - "Cryptography extensions", - [FeatureNEON, FeatureSHA2, FeatureAES]>; - -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; - -def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", - "Enable support for dot product instructions", - [FeatureNEON]>; - -// Not to be confused with FeatureHasRetAddrStack (return address stack) -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability " - "and Serviceability extensions">; - -// Fast computation of non-negative address offsets -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of " - "positive address offsets">; - -// Fast execution of AES crypto operations -def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - -// Fast execution of bottom and top halves of literal generation -def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", - "CPU fuses literal generation operations">; - -// The way of reading thread pointer -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", - "Reading thread pointer from register">; - -// Cyclone can zero VFP registers in 0 cycles. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; - -// Whether it is profitable to unpredicate certain instructions during if-conversion -def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", "true", - "Is profitable to unpredicate">; - -// Some targets (e.g. Swift) have microcoded VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; - -// Some targets (e.g. Swift) have microcoded VDUP32. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", - "true", - "Has slow VDUP32 - prefer VMOV">; - -// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON -// for scalar FP, as this allows more effective execution domain optimization. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; - -// Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", - "true", "Prefer ISHST barriers">; - -// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", - "true", - "Has muxed AGU and NEON/FPU">; - -// Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", - "true", "VLDM/VSTM starting " - "with an odd register is slow">; - -// Some targets have a renaming dependency when loading into D subregisters. -def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "SlowLoadDSubregister", "true", - "Loading into D subregs is slow">; - -// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. -def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", - "DontWidenVMOVS", "true", - "Don't widen VMOVS to VMOVD">; - -// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different -// VFP register widths. -def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "SplatVFPToNeon", "true", - "Splat register from VFP to NEON", - [FeatureDontWidenVMOVS]>; - -// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", - "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; - -// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. -def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", - "true", "Has VMLx hazards">; - -// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from -// VFP to NEON, as an execution domain optimization. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", - "UseNEONForFPMovs", "true", - "Convert VMOVSR, VMOVRS, " - "VMOVS to NEON">; - -// Some processors benefit from using NEON instructions for scalar -// single-precision FP operations. This affects instruction selection and should -// only be enabled if the handling of denormals is not important. -def FeatureNEONForFP : SubtargetFeature<"neonfp", - "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; - -// On some processors, VLDn instructions that access unaligned data take one -// extra cycle. Take that into account when computing operand latencies. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", - "true", - "Check for VLDn unaligned access">; - -// Some processors have a nonpipelined VFP coprocessor. -def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", - "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; - -// Some processors have FP multiply-accumulate instructions that don't -// play nicely with other VFP / NEON instructions, and it's generally better -// to just not use them. -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; - -// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. -def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; - -// Disable 32-bit to 16-bit narrowing for experimentation. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", - "Prefer 32-bit Thumb instrs">; - -/// Some instructions update CPSR partially, which can add false dependency for -/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is -/// mapped to a separate physical register. Avoid partial CPSR update for these -/// processors. -def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", - "AvoidCPSRPartialUpdate", "true", - "Avoid CPSR partial update for OOO execution">; - -/// Disable +1 predication cost for instructions updating CPSR. -/// Enabled for Cortex-A57. -def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", - "CheapPredicableCPSRDef", - "true", - "Disable +1 predication cost for instructions updating CPSR">; - -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with " - "shifter operand">; - -// Some processors perform return stack prediction. CodeGen should avoid issue -// "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", - "HasRetAddrStack", "true", - "Has return address stack">; - -// Some processors have no branch predictor, which changes the expected cost of -// taking a branch which affects the choice of whether to use predicated -// instructions. -def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", - "HasBranchPredictor", "false", - "Has no branch predictor">; - -/// DSP extension. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in " - "ARM and/or Thumb2">; - -// Multiprocessing extension. -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; - -// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). -def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDivThumb, FeatureHWDivARM]>; - -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; - -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; - -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; - -def FeatureExecuteOnly : SubtargetFeature<"execute-only", - "GenExecuteOnly", "true", - "Enable the generation of " - "execute only code.">; - -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable" - " as GPR">; - -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for " - "32-bit imms">; - -def FeatureNoNegativeImmediates - : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; - -// Use the MachineScheduler for instruction scheduling for the subtarget. -def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", - "Use the MachineScheduler">; - -def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", - "DisablePostRAScheduler", "true", - "Don't schedule again after register allocation">; - -// Enable use of alias analysis during code generation -def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", - "Use alias analysis during codegen">; - -//===----------------------------------------------------------------------===// -// ARM architecture class -// - -// A-series ISA -def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", - "Is application profile ('A' series)">; - -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; - -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; - - -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; - -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; - -//===----------------------------------------------------------------------===// -// ARM ISAa. -// - -def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", - "Support ARM v4T instructions">; - -def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", - "Support ARM v5T instructions", - [HasV4TOps]>; - -def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and " - "v5TExp instructions", - [HasV5TOps]>; - -def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", - "Support ARM v6 instructions", - [HasV5TEOps]>; - -def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", - "Support ARM v6M instructions", - [HasV6Ops]>; - -def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", - "Support ARM v8M Baseline instructions", - [HasV6MOps]>; - -def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", - "Support ARM v6k instructions", - [HasV6Ops]>; - -def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", - "Support ARM v6t2 instructions", - [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; - -def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", - "Support ARM v7 instructions", - [HasV6T2Ops, FeaturePerfMon, - FeatureV7Clrex]>; - -def HasV8MMainlineOps : - SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; - -def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", - "Support ARM v8 instructions", - [HasV7Ops, FeatureAcquireRelease]>; - -def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", - [HasV8Ops]>; - -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", - [HasV8_1aOps]>; - -def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", - [HasV8_2aOps]>; - -def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", - [HasV8_3aOps, FeatureDotProd]>; - -//===----------------------------------------------------------------------===// -// ARM Processor subtarget features. -// - -def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", - "Cortex-A5 ARM processors", []>; -def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", - "Cortex-A7 ARM processors", []>; -def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", - "Cortex-A8 ARM processors", []>; -def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", - "Cortex-A9 ARM processors", []>; -def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", - "Cortex-A12 ARM processors", []>; -def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", - "Cortex-A15 ARM processors", []>; -def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", - "Cortex-A17 ARM processors", []>; -def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", - "Cortex-A32 ARM processors", []>; -def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", - "Cortex-A35 ARM processors", []>; -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", []>; -def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", - "Cortex-A55 ARM processors", []>; -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", []>; -def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", - "Cortex-A72 ARM processors", []>; -def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", - "Cortex-A73 ARM processors", []>; -def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", - "Cortex-A75 ARM processors", []>; - -def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm Krait processors", []>; -def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", - "Qualcomm Kryo processors", []>; -def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", - "Swift ARM processors", []>; - -def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", - "Samsung Exynos-Mx processors", []>; - -def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", - "Cortex-R4 ARM processors", []>; -def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", - "Cortex-R5 ARM processors", []>; -def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", - "Cortex-R7 ARM processors", []>; -def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", - "Cortex-R52 ARM processors", []>; - -def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", - "Cortex-M3 ARM processors", []>; - - -//===----------------------------------------------------------------------===// -// ARM Helper classes. -// - -class Architecture features> - : SubtargetFeature; - -class ProcNoItin Features> - : Processor; - - -//===----------------------------------------------------------------------===// -// ARM architectures -// - -def ARMv2 : Architecture<"armv2", "ARMv2", []>; - -def ARMv2a : Architecture<"armv2a", "ARMv2a", []>; - -def ARMv3 : Architecture<"armv3", "ARMv3", []>; - -def ARMv3m : Architecture<"armv3m", "ARMv3m", []>; - -def ARMv4 : Architecture<"armv4", "ARMv4", []>; - -def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; - -def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; - -def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; - -def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; - -def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, - FeatureDSP]>; - -def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP]>; - -def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; - -def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, - FeatureTrustZone]>; - -def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureAClass]>; - -def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureAClass]>; - -def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, - FeatureDB, - FeatureDSP, - FeatureHWDivThumb, - FeatureRClass]>; - -def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass]>; - -def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass, - FeatureDSP]>; - -def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; - -def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, - FeatureRClass, - FeatureDB, - FeatureDFB, - FeatureDSP, - FeatureCRC, - FeatureMP, - FeatureVirtualization, - FeatureFPARMv8, - FeatureNEON]>; - -def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", - [HasV8MBaselineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureV7Clrex, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", - [HasV8MMainlineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass]>; - -// Aliases -def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; -def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; -def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; -def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; -def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; -def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; - - -//===----------------------------------------------------------------------===// -// ARM schedules. -//===----------------------------------------------------------------------===// -// -include "ARMSchedule.td" - -//===----------------------------------------------------------------------===// -// ARM processors -// - -// Dummy CPU, used to target architectures -def : ProcessorModel<"generic", CortexA8Model, []>; - -// FIXME: Several processors below are not using their own scheduler -// model, but one of similar/previous processor. These should be fixed. - -def : ProcNoItin<"arm8", [ARMv4]>; -def : ProcNoItin<"arm810", [ARMv4]>; -def : ProcNoItin<"strongarm", [ARMv4]>; -def : ProcNoItin<"strongarm110", [ARMv4]>; -def : ProcNoItin<"strongarm1100", [ARMv4]>; -def : ProcNoItin<"strongarm1110", [ARMv4]>; - -def : ProcNoItin<"arm7tdmi", [ARMv4t]>; -def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; -def : ProcNoItin<"arm710t", [ARMv4t]>; -def : ProcNoItin<"arm720t", [ARMv4t]>; -def : ProcNoItin<"arm9", [ARMv4t]>; -def : ProcNoItin<"arm9tdmi", [ARMv4t]>; -def : ProcNoItin<"arm920", [ARMv4t]>; -def : ProcNoItin<"arm920t", [ARMv4t]>; -def : ProcNoItin<"arm922t", [ARMv4t]>; -def : ProcNoItin<"arm940t", [ARMv4t]>; -def : ProcNoItin<"ep9312", [ARMv4t]>; - -def : ProcNoItin<"arm10tdmi", [ARMv5t]>; -def : ProcNoItin<"arm1020t", [ARMv5t]>; - -def : ProcNoItin<"arm9e", [ARMv5te]>; -def : ProcNoItin<"arm926ej-s", [ARMv5te]>; -def : ProcNoItin<"arm946e-s", [ARMv5te]>; -def : ProcNoItin<"arm966e-s", [ARMv5te]>; -def : ProcNoItin<"arm968e-s", [ARMv5te]>; -def : ProcNoItin<"arm10e", [ARMv5te]>; -def : ProcNoItin<"arm1020e", [ARMv5te]>; -def : ProcNoItin<"arm1022e", [ARMv5te]>; -def : ProcNoItin<"xscale", [ARMv5te]>; -def : ProcNoItin<"iwmmxt", [ARMv5te]>; - -def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; - -def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; -def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; -def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4]>; - -def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, - FeatureHasRetAddrStack, - FeatureNonpipelinedVFP, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding]>; - -def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureHasVMLxHazards, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureExpandMLx, - FeaturePreferVMOVSR, - FeatureMuxedUnits, - FeatureNEONForFPMovs, - FeatureCheckVLDnAlign, - FeatureMP]>; - -def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization, - FeatureMP]>; - -def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, - FeatureDontWidenVMOVS, - FeatureSplatVFPToNeon, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureTrustZone, - FeatureVFP4, - FeatureMP, - FeatureCheckVLDnAlign, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureMP, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv -def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureCheckVLDnAlign, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureVFP4, - FeatureHWDivThumb, - FeatureHWDivARM]>; - -def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureHasVMLxHazards, - FeatureProfUnpredicate, - FeaturePrefISHSTBarrier, - FeatureSlowOddRegister, - FeatureSlowLoadDSubreg, - FeatureSlowVGETLNi32, - FeatureSlowVDUP32, - FeatureUseMISched, - FeatureNoPostRASched]>; - -def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureVFP3, - FeatureD16, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, - ProcM3, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, - ProcM3, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, - FeatureVFP4, - FeatureVFPOnlySP, - FeatureD16, - FeatureHasNoBranchPredictor]>; - -def : ProcNoItin<"cortex-m7", [ARMv7em, - FeatureFPARMv8, - FeatureD16]>; - -def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, - FeatureNoMovt]>; - -def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, - FeatureDSP, - FeatureFPARMv8, - FeatureD16, - FeatureVFPOnlySP, - FeatureHasNoBranchPredictor]>; - -def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO]>; - -def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO, - FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; - -def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureCrypto, - FeatureUseMISched, - FeatureZCZeroing, - FeatureNoPostRASched]>; - -def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, - FeatureUseMISched, - FeatureFPAO, - FeatureUseAA]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "ARMRegisterInfo-digit.td" -include "ARMRegisterBanks.td" -include "ARMCallingConv.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "ARMInstrInfo.td" -def ARMInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// - -def ARMAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int PassSubtarget = 1; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def ARMAsmParser : AsmParser { - bit ReportMultipleNearMisses = 1; -} - -def ARMAsmParserVariant : AsmParserVariant { - int Variant = 0; - string Name = "ARM"; - string BreakCharacters = "."; -} - -def ARM : Target { - // Pull in Instruction Info. - let InstructionSet = ARMInstrInfo; - let AssemblyWriters = [ARMAsmWriter]; - let AssemblyParsers = [ARMAsmParser]; - let AssemblyParserVariants = [ARMAsmParserVariant]; - let AllowRegisterRenaming = 1; -} diff --git a/suite/synctools/tablegen/ARM/ARM.td b/suite/synctools/tablegen/ARM/ARM.td deleted file mode 100644 index 2e62a07904..0000000000 --- a/suite/synctools/tablegen/ARM/ARM.td +++ /dev/null @@ -1,1098 +0,0 @@ -//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// ARM Subtarget state. -// - -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", - "true", "Thumb mode">; - -def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", - "true", "Use software floating " - "point features.">; - - -//===----------------------------------------------------------------------===// -// ARM Subtarget features. -// - -// Floating Point, HW Division and Neon Support -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; - -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; - -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision " - "floating point">; - -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; - -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision " - "floating point", - [FeatureFPARMv8]>; - -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports " - "single precision only">; - -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; - -def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasHardwareDivideInThumb", "true", - "Enable divide instructions in Thumb">; - -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", - "Enable divide instructions in ARM mode">; - -// Atomic Support -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb/dsb) instructions">; - -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; - -def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", - "Has full data barrier (dfb) instruction">; - -def FeatureAcquireRelease : SubtargetFeature<"acquire-release", - "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex " - " etc) instructions">; - - -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", - "FP compare + branch is slow">; - -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance " - "Monitor extensions">; - - -// TrustZone Security Extensions -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone " - "security extensions">; - -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M " - "Security Extensions">; - -def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", - "Enable SHA1 and SHA256 support", [FeatureNEON]>; - -def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", - "Enable AES support", [FeatureNEON]>; - -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for " - "Cryptography extensions", - [FeatureNEON, FeatureSHA2, FeatureAES]>; - -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; - -def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", - "Enable support for dot product instructions", - [FeatureNEON]>; - -// Not to be confused with FeatureHasRetAddrStack (return address stack) -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability " - "and Serviceability extensions">; - -// Fast computation of non-negative address offsets -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of " - "positive address offsets">; - -// Fast execution of AES crypto operations -def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - -// Fast execution of bottom and top halves of literal generation -def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", - "CPU fuses literal generation operations">; - -// The way of reading thread pointer -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", - "Reading thread pointer from register">; - -// Cyclone can zero VFP registers in 0 cycles. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; - -// Whether it is profitable to unpredicate certain instructions during if-conversion -def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", "true", - "Is profitable to unpredicate">; - -// Some targets (e.g. Swift) have microcoded VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; - -// Some targets (e.g. Swift) have microcoded VDUP32. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", - "true", - "Has slow VDUP32 - prefer VMOV">; - -// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON -// for scalar FP, as this allows more effective execution domain optimization. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; - -// Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", - "true", "Prefer ISHST barriers">; - -// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", - "true", - "Has muxed AGU and NEON/FPU">; - -// Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", - "true", "VLDM/VSTM starting " - "with an odd register is slow">; - -// Some targets have a renaming dependency when loading into D subregisters. -def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "SlowLoadDSubregister", "true", - "Loading into D subregs is slow">; - -// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. -def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", - "DontWidenVMOVS", "true", - "Don't widen VMOVS to VMOVD">; - -// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different -// VFP register widths. -def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "SplatVFPToNeon", "true", - "Splat register from VFP to NEON", - [FeatureDontWidenVMOVS]>; - -// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", - "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; - -// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. -def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", - "true", "Has VMLx hazards">; - -// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from -// VFP to NEON, as an execution domain optimization. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", - "UseNEONForFPMovs", "true", - "Convert VMOVSR, VMOVRS, " - "VMOVS to NEON">; - -// Some processors benefit from using NEON instructions for scalar -// single-precision FP operations. This affects instruction selection and should -// only be enabled if the handling of denormals is not important. -def FeatureNEONForFP : SubtargetFeature<"neonfp", - "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; - -// On some processors, VLDn instructions that access unaligned data take one -// extra cycle. Take that into account when computing operand latencies. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", - "true", - "Check for VLDn unaligned access">; - -// Some processors have a nonpipelined VFP coprocessor. -def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", - "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; - -// Some processors have FP multiply-accumulate instructions that don't -// play nicely with other VFP / NEON instructions, and it's generally better -// to just not use them. -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; - -// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. -def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; - -// Disable 32-bit to 16-bit narrowing for experimentation. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", - "Prefer 32-bit Thumb instrs">; - -/// Some instructions update CPSR partially, which can add false dependency for -/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is -/// mapped to a separate physical register. Avoid partial CPSR update for these -/// processors. -def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", - "AvoidCPSRPartialUpdate", "true", - "Avoid CPSR partial update for OOO execution">; - -/// Disable +1 predication cost for instructions updating CPSR. -/// Enabled for Cortex-A57. -def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", - "CheapPredicableCPSRDef", - "true", - "Disable +1 predication cost for instructions updating CPSR">; - -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with " - "shifter operand">; - -// Some processors perform return stack prediction. CodeGen should avoid issue -// "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", - "HasRetAddrStack", "true", - "Has return address stack">; - -// Some processors have no branch predictor, which changes the expected cost of -// taking a branch which affects the choice of whether to use predicated -// instructions. -def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", - "HasBranchPredictor", "false", - "Has no branch predictor">; - -/// DSP extension. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in " - "ARM and/or Thumb2">; - -// Multiprocessing extension. -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; - -// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). -def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDivThumb, FeatureHWDivARM]>; - -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; - -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; - -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; - -def FeatureExecuteOnly : SubtargetFeature<"execute-only", - "GenExecuteOnly", "true", - "Enable the generation of " - "execute only code.">; - -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable" - " as GPR">; - -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for " - "32-bit imms">; - -def FeatureNoNegativeImmediates - : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; - -// Use the MachineScheduler for instruction scheduling for the subtarget. -def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", - "Use the MachineScheduler">; - -def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", - "DisablePostRAScheduler", "true", - "Don't schedule again after register allocation">; - -// Enable use of alias analysis during code generation -def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", - "Use alias analysis during codegen">; - -//===----------------------------------------------------------------------===// -// ARM architecture class -// - -// A-series ISA -def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", - "Is application profile ('A' series)">; - -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; - -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; - - -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; - -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; - -//===----------------------------------------------------------------------===// -// ARM ISAa. -// - -def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", - "Support ARM v4T instructions">; - -def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", - "Support ARM v5T instructions", - [HasV4TOps]>; - -def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and " - "v5TExp instructions", - [HasV5TOps]>; - -def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", - "Support ARM v6 instructions", - [HasV5TEOps]>; - -def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", - "Support ARM v6M instructions", - [HasV6Ops]>; - -def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", - "Support ARM v8M Baseline instructions", - [HasV6MOps]>; - -def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", - "Support ARM v6k instructions", - [HasV6Ops]>; - -def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", - "Support ARM v6t2 instructions", - [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; - -def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", - "Support ARM v7 instructions", - [HasV6T2Ops, FeaturePerfMon, - FeatureV7Clrex]>; - -def HasV8MMainlineOps : - SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; - -def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", - "Support ARM v8 instructions", - [HasV7Ops, FeatureAcquireRelease]>; - -def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", - [HasV8Ops]>; - -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", - [HasV8_1aOps]>; - -def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", - [HasV8_2aOps]>; - -def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", - [HasV8_3aOps, FeatureDotProd]>; - -//===----------------------------------------------------------------------===// -// ARM Processor subtarget features. -// - -def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", - "Cortex-A5 ARM processors", []>; -def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", - "Cortex-A7 ARM processors", []>; -def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", - "Cortex-A8 ARM processors", []>; -def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", - "Cortex-A9 ARM processors", []>; -def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", - "Cortex-A12 ARM processors", []>; -def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", - "Cortex-A15 ARM processors", []>; -def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", - "Cortex-A17 ARM processors", []>; -def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", - "Cortex-A32 ARM processors", []>; -def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", - "Cortex-A35 ARM processors", []>; -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", []>; -def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", - "Cortex-A55 ARM processors", []>; -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", []>; -def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", - "Cortex-A72 ARM processors", []>; -def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", - "Cortex-A73 ARM processors", []>; -def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", - "Cortex-A75 ARM processors", []>; - -def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm Krait processors", []>; -def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", - "Qualcomm Kryo processors", []>; -def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", - "Swift ARM processors", []>; - -def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", - "Samsung Exynos-Mx processors", []>; - -def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", - "Cortex-R4 ARM processors", []>; -def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", - "Cortex-R5 ARM processors", []>; -def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", - "Cortex-R7 ARM processors", []>; -def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", - "Cortex-R52 ARM processors", []>; - -def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", - "Cortex-M3 ARM processors", []>; - - -//===----------------------------------------------------------------------===// -// ARM Helper classes. -// - -class Architecture features> - : SubtargetFeature; - -class ProcNoItin Features> - : Processor; - - -//===----------------------------------------------------------------------===// -// ARM architectures -// - -def ARMv2 : Architecture<"armv2", "ARMv2", []>; - -def ARMv2a : Architecture<"armv2a", "ARMv2a", []>; - -def ARMv3 : Architecture<"armv3", "ARMv3", []>; - -def ARMv3m : Architecture<"armv3m", "ARMv3m", []>; - -def ARMv4 : Architecture<"armv4", "ARMv4", []>; - -def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; - -def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; - -def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; - -def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; - -def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, - FeatureDSP]>; - -def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP]>; - -def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; - -def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, - FeatureTrustZone]>; - -def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureAClass]>; - -def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, - FeatureNEON, - FeatureDB, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureAClass]>; - -def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, - FeatureDB, - FeatureDSP, - FeatureHWDivThumb, - FeatureRClass]>; - -def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass]>; - -def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, - FeatureThumb2, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureMClass, - FeatureDSP]>; - -def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC]>; - -def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS]>; - -def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, - FeatureAClass, - FeatureDB, - FeatureFPARMv8, - FeatureNEON, - FeatureDSP, - FeatureTrustZone, - FeatureMP, - FeatureVirtualization, - FeatureCrypto, - FeatureCRC, - FeatureRAS, - FeatureDotProd]>; - -def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, - FeatureRClass, - FeatureDB, - FeatureDFB, - FeatureDSP, - FeatureCRC, - FeatureMP, - FeatureVirtualization, - FeatureFPARMv8, - FeatureNEON]>; - -def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", - [HasV8MBaselineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - FeatureV7Clrex, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass, - FeatureStrictAlign]>; - -def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", - [HasV8MMainlineOps, - FeatureNoARM, - ModeThumb, - FeatureDB, - FeatureHWDivThumb, - Feature8MSecExt, - FeatureAcquireRelease, - FeatureMClass]>; - -// Aliases -def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; -def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; -def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; -def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; -def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; -def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; - - -//===----------------------------------------------------------------------===// -// ARM schedules. -//===----------------------------------------------------------------------===// -// -include "ARMSchedule.td" - -//===----------------------------------------------------------------------===// -// ARM processors -// - -// Dummy CPU, used to target architectures -def : ProcessorModel<"generic", CortexA8Model, []>; - -// FIXME: Several processors below are not using their own scheduler -// model, but one of similar/previous processor. These should be fixed. - -def : ProcNoItin<"arm8", [ARMv4]>; -def : ProcNoItin<"arm810", [ARMv4]>; -def : ProcNoItin<"strongarm", [ARMv4]>; -def : ProcNoItin<"strongarm110", [ARMv4]>; -def : ProcNoItin<"strongarm1100", [ARMv4]>; -def : ProcNoItin<"strongarm1110", [ARMv4]>; - -def : ProcNoItin<"arm7tdmi", [ARMv4t]>; -def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; -def : ProcNoItin<"arm710t", [ARMv4t]>; -def : ProcNoItin<"arm720t", [ARMv4t]>; -def : ProcNoItin<"arm9", [ARMv4t]>; -def : ProcNoItin<"arm9tdmi", [ARMv4t]>; -def : ProcNoItin<"arm920", [ARMv4t]>; -def : ProcNoItin<"arm920t", [ARMv4t]>; -def : ProcNoItin<"arm922t", [ARMv4t]>; -def : ProcNoItin<"arm940t", [ARMv4t]>; -def : ProcNoItin<"ep9312", [ARMv4t]>; - -def : ProcNoItin<"arm10tdmi", [ARMv5t]>; -def : ProcNoItin<"arm1020t", [ARMv5t]>; - -def : ProcNoItin<"arm9e", [ARMv5te]>; -def : ProcNoItin<"arm926ej-s", [ARMv5te]>; -def : ProcNoItin<"arm946e-s", [ARMv5te]>; -def : ProcNoItin<"arm966e-s", [ARMv5te]>; -def : ProcNoItin<"arm968e-s", [ARMv5te]>; -def : ProcNoItin<"arm10e", [ARMv5te]>; -def : ProcNoItin<"arm1020e", [ARMv5te]>; -def : ProcNoItin<"arm1022e", [ARMv5te]>; -def : ProcNoItin<"xscale", [ARMv5te]>; -def : ProcNoItin<"iwmmxt", [ARMv5te]>; - -def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; -def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; - -def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; -def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; -def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, - FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4]>; - -def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, - FeatureMP, - FeatureVFP4, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, - FeatureHasRetAddrStack, - FeatureNonpipelinedVFP, - FeatureTrustZone, - FeatureSlowFPBrcc, - FeatureHasVMLxHazards, - FeatureHasSlowFPVMLx, - FeatureVMLxForwarding]>; - -def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureHasVMLxHazards, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureExpandMLx, - FeaturePreferVMOVSR, - FeatureMuxedUnits, - FeatureNEONForFPMovs, - FeatureCheckVLDnAlign, - FeatureMP]>; - -def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization, - FeatureMP]>; - -def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, - FeatureDontWidenVMOVS, - FeatureSplatVFPToNeon, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureTrustZone, - FeatureVFP4, - FeatureMP, - FeatureCheckVLDnAlign, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, - FeatureHasRetAddrStack, - FeatureTrustZone, - FeatureMP, - FeatureVMLxForwarding, - FeatureVFP4, - FeatureAvoidPartialCPSR, - FeatureVirtualization]>; - -// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv -def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, - FeatureHasRetAddrStack, - FeatureMuxedUnits, - FeatureCheckVLDnAlign, - FeatureVMLxForwarding, - FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureVFP4, - FeatureHWDivThumb, - FeatureHWDivARM]>; - -def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureHasVMLxHazards, - FeatureProfUnpredicate, - FeaturePrefISHSTBarrier, - FeatureSlowOddRegister, - FeatureSlowLoadDSubreg, - FeatureSlowVGETLNi32, - FeatureSlowVDUP32, - FeatureUseMISched, - FeatureNoPostRASched]>; - -def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRetAddrStack, - FeatureSlowFPBrcc, - FeatureHasSlowFPVMLx, - FeatureVFP3, - FeatureD16, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, - FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, - FeatureFP16, - FeatureMP, - FeatureSlowFPBrcc, - FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR]>; - -def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, - ProcM3, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, - ProcM3, - FeatureHasNoBranchPredictor]>; - -def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, - FeatureVFP4, - FeatureVFPOnlySP, - FeatureD16, - FeatureHasNoBranchPredictor]>; - -def : ProcNoItin<"cortex-m7", [ARMv7em, - FeatureFPARMv8, - FeatureD16]>; - -def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, - FeatureNoMovt]>; - -def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, - FeatureDSP, - FeatureFPARMv8, - FeatureD16, - FeatureVFPOnlySP, - FeatureHasNoBranchPredictor]>; - -def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO]>; - -def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO, - FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; - -def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureDotProd]>; - -def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, - FeatureHasRetAddrStack, - FeatureNEONForFP, - FeatureVFP4, - FeatureMP, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, - FeatureCrypto, - FeatureUseMISched, - FeatureZCZeroing, - FeatureNoPostRASched]>; - -def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC]>; - -def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, - FeatureUseMISched, - FeatureFPAO, - FeatureUseAA]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "ARMRegisterInfo.td" -include "ARMRegisterBanks.td" -include "ARMCallingConv.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "ARMInstrInfo.td" -def ARMInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// - -def ARMAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int PassSubtarget = 1; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def ARMAsmParser : AsmParser { - bit ReportMultipleNearMisses = 1; -} - -def ARMAsmParserVariant : AsmParserVariant { - int Variant = 0; - string Name = "ARM"; - string BreakCharacters = "."; -} - -def ARM : Target { - // Pull in Instruction Info. - let InstructionSet = ARMInstrInfo; - let AssemblyWriters = [ARMAsmWriter]; - let AssemblyParsers = [ARMAsmParser]; - let AssemblyParserVariants = [ARMAsmParserVariant]; - let AllowRegisterRenaming = 1; -} diff --git a/suite/synctools/tablegen/ARM/ARMCallingConv.td b/suite/synctools/tablegen/ARM/ARMCallingConv.td deleted file mode 100644 index f173e423f3..0000000000 --- a/suite/synctools/tablegen/ARM/ARMCallingConv.td +++ /dev/null @@ -1,318 +0,0 @@ -//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for ARM architecture. -//===----------------------------------------------------------------------===// - -/// CCIfAlign - Match of the original alignment of the arg -class CCIfAlign: - CCIf; - -//===----------------------------------------------------------------------===// -// ARM APCS Calling Convention -//===----------------------------------------------------------------------===// -def CC_ARM_APCS : CallingConv<[ - - // Handles byval parameters. - CCIfByVal>, - - CCIfType<[i1, i8, i16], CCPromoteToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is passed in R8. - CCIfSwiftError>>, - - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, - - // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack - CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, - - CCIfType<[f32], CCBitConvertToType>, - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - - CCIfType<[i32], CCAssignToStack<4, 4>>, - CCIfType<[f64], CCAssignToStack<8, 4>>, - CCIfType<[v2f64], CCAssignToStack<16, 4>> -]>; - -def RetCC_ARM_APCS : CallingConv<[ - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[f32], CCBitConvertToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is returned in R8. - CCIfSwiftError>>, - - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, - - CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, - - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> -]>; - -//===----------------------------------------------------------------------===// -// ARM APCS Calling Convention for FastCC (when VFP2 or later is available) -//===----------------------------------------------------------------------===// -def FastCC_ARM_APCS : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15]>>, - - // CPRCs may be allocated to co-processor registers or the stack - they - // may never be allocated to core registers. - CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, - CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, - - CCDelegateTo -]>; - -def RetFastCC_ARM_APCS : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15]>>, - CCDelegateTo -]>; - -//===----------------------------------------------------------------------===// -// ARM APCS Calling Convention for GHC -//===----------------------------------------------------------------------===// - -def CC_ARM_APCS_GHC : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, - - CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, - CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, - CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, - - // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim - CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> -]>; - -//===----------------------------------------------------------------------===// -// ARM AAPCS (EABI) Calling Convention, common parts -//===----------------------------------------------------------------------===// - -def CC_ARM_AAPCS_Common : CallingConv<[ - - CCIfType<[i1, i8, i16], CCPromoteToType>, - - // i64/f64 is passed in even pairs of GPRs - // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register - // (and the same is true for f64 if VFP is not enabled) - CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", - CCAssignToReg<[R0, R1, R2, R3]>>>, - - CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>, - CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, - CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, - CCIfType<[v2f64], CCIfAlign<"16", - CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, - CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> -]>; - -def RetCC_ARM_AAPCS_Common : CallingConv<[ - CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> -]>; - -//===----------------------------------------------------------------------===// -// ARM AAPCS (EABI) Calling Convention -//===----------------------------------------------------------------------===// - -def CC_ARM_AAPCS : CallingConv<[ - // Handles byval parameters. - CCIfByVal>, - - // The 'nest' parameter, if any, is passed in R12. - CCIfNest>, - - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is passed in R8. - CCIfSwiftError>>, - - CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, - CCIfType<[f32], CCBitConvertToType>, - CCDelegateTo -]>; - -def RetCC_ARM_AAPCS : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is returned in R8. - CCIfSwiftError>>, - - CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, - CCIfType<[f32], CCBitConvertToType>, - - CCDelegateTo -]>; - -//===----------------------------------------------------------------------===// -// ARM AAPCS-VFP (EABI) Calling Convention -// Also used for FastCC (when VFP2 or later is available) -//===----------------------------------------------------------------------===// - -def CC_ARM_AAPCS_VFP : CallingConv<[ - // Handles byval parameters. - CCIfByVal>, - - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is passed in R8. - CCIfSwiftError>>, - - // HFAs are passed in a contiguous block of registers, or on the stack - CCIfConsecutiveRegs>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15]>>, - CCDelegateTo -]>; - -def RetCC_ARM_AAPCS_VFP : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, - - // Pass SwiftSelf in a callee saved register. - CCIfSwiftSelf>>, - - // A SwiftError is returned in R8. - CCIfSwiftError>>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15]>>, - CCDelegateTo -]>; - -//===----------------------------------------------------------------------===// -// Callee-saved register lists. -//===----------------------------------------------------------------------===// - -def CSR_NoRegs : CalleeSavedRegs<(add)>; -def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; - -def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, - (sequence "D%u", 15, 8))>; - -// R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>; - -// The order of callee-saved registers needs to match the order we actually push -// them in FrameLowering, because this order is what's used by -// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this AAPCS alternative. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, - R11, R10, R9, R8, - (sequence "D%u", 15, 8))>; - -// R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, - R8)>; - -// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' -// and the pointer return value are both passed in R0 in these cases, this can -// be partially modelled by treating R0 as a callee-saved register -// Only the resulting RegMask is used; the SaveList is ignored -def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, - R5, R4, (sequence "D%u", 15, 8), - R0)>; - -// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. -// Also save R7-R4 first to match the stack frame fixed spill areas. -def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; - -// R8 is used to pass swifterror, remove it from CSR. -def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; - -def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, - (sub CSR_AAPCS_ThisReturn, R9))>; - -def CSR_iOS_TLSCall - : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), - (sequence "D%u", 31, 0))>; - -// C++ TLS access function saves all registers except SP. Try to match -// the order of CSRs in CSR_iOS. -def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1), - (sequence "D%u", 31, 0))>; - -// CSRs that are handled by prologue, epilogue. -def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR, R12, R11, R7, R5, R4)>; - -// CSRs that are handled explicitly via copies. -def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, - CSR_iOS_CXX_TLS_PE)>; - -// The "interrupt" attribute is used to generate code that is acceptable in -// exception-handlers of various kinds. It makes us use a different return -// instruction (handled elsewhere) and affects which registers we must return to -// our "caller" in the same state as we receive them. - -// For most interrupts, all registers except SP and LR are shared with -// user-space. We mark LR to be saved anyway, since this is what the ARM backend -// generally does rather than tracking its liveness as a normal register. -def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; - -// The fast interrupt handlers have more private state and get their own copies -// of R8-R12, in addition to SP and LR. As before, mark LR for saving too. - -// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and -// current frame lowering expects to encounter it while processing callee-saved -// registers. -def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; - - diff --git a/suite/synctools/tablegen/ARM/ARMInstrFormats.td b/suite/synctools/tablegen/ARM/ARMInstrFormats.td deleted file mode 100644 index 1d3b1414f0..0000000000 --- a/suite/synctools/tablegen/ARM/ARMInstrFormats.td +++ /dev/null @@ -1,2620 +0,0 @@ -//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// -// ARM Instruction Format Definitions. -// - -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -class Format val> { - bits<6> Value = val; -} - -def Pseudo : Format<0>; -def MulFrm : Format<1>; -def BrFrm : Format<2>; -def BrMiscFrm : Format<3>; - -def DPFrm : Format<4>; -def DPSoRegRegFrm : Format<5>; - -def LdFrm : Format<6>; -def StFrm : Format<7>; -def LdMiscFrm : Format<8>; -def StMiscFrm : Format<9>; -def LdStMulFrm : Format<10>; - -def LdStExFrm : Format<11>; - -def ArithMiscFrm : Format<12>; -def SatFrm : Format<13>; -def ExtFrm : Format<14>; - -def VFPUnaryFrm : Format<15>; -def VFPBinaryFrm : Format<16>; -def VFPConv1Frm : Format<17>; -def VFPConv2Frm : Format<18>; -def VFPConv3Frm : Format<19>; -def VFPConv4Frm : Format<20>; -def VFPConv5Frm : Format<21>; -def VFPLdStFrm : Format<22>; -def VFPLdStMulFrm : Format<23>; -def VFPMiscFrm : Format<24>; - -def ThumbFrm : Format<25>; -def MiscFrm : Format<26>; - -def NGetLnFrm : Format<27>; -def NSetLnFrm : Format<28>; -def NDupFrm : Format<29>; -def NLdStFrm : Format<30>; -def N1RegModImmFrm: Format<31>; -def N2RegFrm : Format<32>; -def NVCVTFrm : Format<33>; -def NVDupLnFrm : Format<34>; -def N2RegVShLFrm : Format<35>; -def N2RegVShRFrm : Format<36>; -def N3RegFrm : Format<37>; -def N3RegVShFrm : Format<38>; -def NVExtFrm : Format<39>; -def NVMulSLFrm : Format<40>; -def NVTBLFrm : Format<41>; -def DPSoRegImmFrm : Format<42>; -def N3RegCplxFrm : Format<43>; - -// Misc flags. - -// The instruction has an Rn register operand. -// UnaryDP - Indicates this is a unary data processing instruction, i.e. -// it doesn't have a Rn operand. -class UnaryDP { bit isUnaryDataProc = 1; } - -// Xform16Bit - Indicates this Thumb2 instruction may be transformed into -// a 16-bit Thumb instruction if certain conditions are met. -class Xform16Bit { bit canXformTo16Bit = 1; } - -//===----------------------------------------------------------------------===// -// ARM Instruction flags. These need to match ARMBaseInstrInfo.h. -// - -// FIXME: Once the JIT is MC-ized, these can go away. -// Addressing mode. -class AddrMode val> { - bits<5> Value = val; -} -def AddrModeNone : AddrMode<0>; -def AddrMode1 : AddrMode<1>; -def AddrMode2 : AddrMode<2>; -def AddrMode3 : AddrMode<3>; -def AddrMode4 : AddrMode<4>; -def AddrMode5 : AddrMode<5>; -def AddrMode6 : AddrMode<6>; -def AddrModeT1_1 : AddrMode<7>; -def AddrModeT1_2 : AddrMode<8>; -def AddrModeT1_4 : AddrMode<9>; -def AddrModeT1_s : AddrMode<10>; -def AddrModeT2_i12 : AddrMode<11>; -def AddrModeT2_i8 : AddrMode<12>; -def AddrModeT2_so : AddrMode<13>; -def AddrModeT2_pc : AddrMode<14>; -def AddrModeT2_i8s4 : AddrMode<15>; -def AddrMode_i12 : AddrMode<16>; -def AddrMode5FP16 : AddrMode<17>; -def AddrModeT2_ldrex : AddrMode<18>; - -// Load / store index mode. -class IndexMode val> { - bits<2> Value = val; -} -def IndexModeNone : IndexMode<0>; -def IndexModePre : IndexMode<1>; -def IndexModePost : IndexMode<2>; -def IndexModeUpd : IndexMode<3>; - -// Instruction execution domain. -class Domain val> { - bits<3> Value = val; -} -def GenericDomain : Domain<0>; -def VFPDomain : Domain<1>; // Instructions in VFP domain only -def NeonDomain : Domain<2>; // Instructions in Neon domain only -def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains -def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 - -//===----------------------------------------------------------------------===// -// ARM special operands. -// - -// ARM imod and iflag operands, used only by the CPS instruction. -def imod_op : Operand { - let PrintMethod = "printCPSIMod"; -} - -def ProcIFlagsOperand : AsmOperandClass { - let Name = "ProcIFlags"; - let ParserMethod = "parseProcIFlagsOperand"; -} -def iflags_op : Operand { - let PrintMethod = "printCPSIFlag"; - let ParserMatchClass = ProcIFlagsOperand; -} - -// ARM Predicate operand. Default to 14 = always (AL). Second part is CC -// register whose default is 0 (no register). -def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; } -def pred : PredicateOperand { - let PrintMethod = "printPredicateOperand"; - let ParserMatchClass = CondCodeOperand; - let DecoderMethod = "DecodePredicateOperand"; -} - -// Selectable predicate operand for CMOV instructions. We can't use a normal -// predicate because the default values interfere with instruction selection. In -// all other respects it is identical though: pseudo-instruction expansion -// relies on the MachineOperands being compatible. -def cmovpred : Operand, PredicateOp, - ComplexPattern { - let MIOperandInfo = (ops i32imm, i32imm); - let PrintMethod = "printPredicateOperand"; -} - -// Conditional code result for instructions whose 's' bit is set, e.g. subs. -def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } -def cc_out : OptionalDefOperand { - let EncoderMethod = "getCCOutOpValue"; - let PrintMethod = "printSBitModifierOperand"; - let ParserMatchClass = CCOutOperand; - let DecoderMethod = "DecodeCCOutOperand"; -} - -// Same as cc_out except it defaults to setting CPSR. -def s_cc_out : OptionalDefOperand { - let EncoderMethod = "getCCOutOpValue"; - let PrintMethod = "printSBitModifierOperand"; - let ParserMatchClass = CCOutOperand; - let DecoderMethod = "DecodeCCOutOperand"; -} - -// ARM special operands for disassembly only. -// -def SetEndAsmOperand : ImmAsmOperand<0,1> { - let Name = "SetEndImm"; - let ParserMethod = "parseSetEndImm"; -} -def setend_op : Operand { - let PrintMethod = "printSetendOperand"; - let ParserMatchClass = SetEndAsmOperand; -} - -def MSRMaskOperand : AsmOperandClass { - let Name = "MSRMask"; - let ParserMethod = "parseMSRMaskOperand"; -} -def msr_mask : Operand { - let PrintMethod = "printMSRMaskOperand"; - let DecoderMethod = "DecodeMSRMask"; - let ParserMatchClass = MSRMaskOperand; -} - -def BankedRegOperand : AsmOperandClass { - let Name = "BankedReg"; - let ParserMethod = "parseBankedRegOperand"; -} -def banked_reg : Operand { - let PrintMethod = "printBankedRegOperand"; - let DecoderMethod = "DecodeBankedReg"; - let ParserMatchClass = BankedRegOperand; -} - -// Shift Right Immediate - A shift right immediate is encoded differently from -// other shift immediates. The imm6 field is encoded like so: -// -// Offset Encoding -// 8 imm6<5:3> = '001', 8 - is encoded in imm6<2:0> -// 16 imm6<5:4> = '01', 16 - is encoded in imm6<3:0> -// 32 imm6<5> = '1', 32 - is encoded in imm6<4:0> -// 64 64 - is encoded in imm6<5:0> -def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; } -def shr_imm8 : Operand, ImmLeaf 0 && Imm <= 8; }]> { - let EncoderMethod = "getShiftRight8Imm"; - let DecoderMethod = "DecodeShiftRight8Imm"; - let ParserMatchClass = shr_imm8_asm_operand; -} -def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; } -def shr_imm16 : Operand, ImmLeaf 0 && Imm <= 16; }]> { - let EncoderMethod = "getShiftRight16Imm"; - let DecoderMethod = "DecodeShiftRight16Imm"; - let ParserMatchClass = shr_imm16_asm_operand; -} -def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; } -def shr_imm32 : Operand, ImmLeaf 0 && Imm <= 32; }]> { - let EncoderMethod = "getShiftRight32Imm"; - let DecoderMethod = "DecodeShiftRight32Imm"; - let ParserMatchClass = shr_imm32_asm_operand; -} -def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; } -def shr_imm64 : Operand, ImmLeaf 0 && Imm <= 64; }]> { - let EncoderMethod = "getShiftRight64Imm"; - let DecoderMethod = "DecodeShiftRight64Imm"; - let ParserMatchClass = shr_imm64_asm_operand; -} - - -// ARM Assembler operand for ldr Rd, =expression which generates an offset -// to a constant pool entry or a MOV depending on the value of expression -def const_pool_asm_operand : AsmOperandClass { let Name = "ConstPoolAsmImm"; } -def const_pool_asm_imm : Operand { - let ParserMatchClass = const_pool_asm_operand; -} - - -//===----------------------------------------------------------------------===// -// ARM Assembler alias templates. -// -// Note: When EmitPriority == 1, the alias will be used for printing -class ARMInstAlias - : InstAlias, Requires<[IsARM]>; -class ARMInstSubst - : InstAlias, - Requires<[IsARM,UseNegativeImmediates]>; -class tInstAlias - : InstAlias, Requires<[IsThumb]>; -class tInstSubst - : InstAlias, - Requires<[IsThumb,UseNegativeImmediates]>; -class t2InstAlias - : InstAlias, Requires<[IsThumb2]>; -class t2InstSubst - : InstAlias, - Requires<[IsThumb2,UseNegativeImmediates]>; -class VFP2InstAlias - : InstAlias, Requires<[HasVFP2]>; -class VFP2DPInstAlias - : InstAlias, Requires<[HasVFP2,HasDPVFP]>; -class VFP3InstAlias - : InstAlias, Requires<[HasVFP3]>; -class NEONInstAlias - : InstAlias, Requires<[HasNEON]>; - - -class VFP2MnemonicAlias : MnemonicAlias, - Requires<[HasVFP2]>; -class NEONMnemonicAlias : MnemonicAlias, - Requires<[HasNEON]>; - -//===----------------------------------------------------------------------===// -// ARM Instruction templates. -// - - -class InstTemplate - : Instruction { - let Namespace = "ARM"; - - AddrMode AM = am; - int Size = sz; - IndexMode IM = im; - bits<2> IndexModeBits = IM.Value; - Format F = f; - bits<6> Form = F.Value; - Domain D = d; - bit isUnaryDataProc = 0; - bit canXformTo16Bit = 0; - // The instruction is a 16-bit flag setting Thumb instruction. Used - // by the parser to determine whether to require the 'S' suffix on the - // mnemonic (when not in an IT block) or preclude it (when in an IT block). - bit thumbArithFlagSetting = 0; - - // If this is a pseudo instruction, mark it isCodeGenOnly. - let isCodeGenOnly = !eq(!cast(f), "Pseudo"); - - // The layout of TSFlags should be kept in sync with ARMBaseInfo.h. - let TSFlags{4-0} = AM.Value; - let TSFlags{6-5} = IndexModeBits; - let TSFlags{12-7} = Form; - let TSFlags{13} = isUnaryDataProc; - let TSFlags{14} = canXformTo16Bit; - let TSFlags{17-15} = D.Value; - let TSFlags{18} = thumbArithFlagSetting; - - let Constraints = cstr; - let Itinerary = itin; -} - -class Encoding { - field bits<32> Inst; - // Mask of bits that cause an encoding to be UNPREDICTABLE. - // If a bit is set, then if the corresponding bit in the - // target encoding differs from its value in the "Inst" field, - // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; -} - -class InstARM - : InstTemplate, Encoding { - let DecoderNamespace = "ARM"; -} - -// This Encoding-less class is used by Thumb1 to specify the encoding bits later -// on by adding flavors to specific instructions. -class InstThumb - : InstTemplate { - let DecoderNamespace = "Thumb"; -} - -// Pseudo-instructions for alternate assembly syntax (never used by codegen). -// These are aliases that require C++ handling to convert to the target -// instruction, while InstAliases can be handled directly by tblgen. -class AsmPseudoInst - : InstTemplate { - let OutOperandList = oops; - let InOperandList = iops; - let Pattern = []; - let isCodeGenOnly = 0; // So we get asm matcher for it. - let AsmString = asm; - let isPseudo = 1; -} - -class ARMAsmPseudo - : AsmPseudoInst, Requires<[IsARM]>; -class tAsmPseudo - : AsmPseudoInst, Requires<[IsThumb]>; -class t2AsmPseudo - : AsmPseudoInst, Requires<[IsThumb2]>; -class VFP2AsmPseudo - : AsmPseudoInst, Requires<[HasVFP2]>; -class NEONAsmPseudo - : AsmPseudoInst, Requires<[HasNEON]>; - -// Pseudo instructions for the code generator. -class PseudoInst pattern> - : InstTemplate { - let OutOperandList = oops; - let InOperandList = iops; - let Pattern = pattern; - let isCodeGenOnly = 1; - let isPseudo = 1; -} - -// PseudoInst that's ARM-mode only. -class ARMPseudoInst pattern> - : PseudoInst { - let Size = sz; - list Predicates = [IsARM]; -} - -// PseudoInst that's Thumb-mode only. -class tPseudoInst pattern> - : PseudoInst { - let Size = sz; - list Predicates = [IsThumb]; -} - -// PseudoInst that's in ARMv8-M baseline (Somewhere between Thumb and Thumb2) -class t2basePseudoInst pattern> - : PseudoInst { - let Size = sz; - list Predicates = [IsThumb,HasV8MBaseline]; -} - -// PseudoInst that's Thumb2-mode only. -class t2PseudoInst pattern> - : PseudoInst { - let Size = sz; - list Predicates = [IsThumb2]; -} - -class ARMPseudoExpand pattern, - dag Result> - : ARMPseudoInst, - PseudoInstExpansion; - -class tPseudoExpand pattern, - dag Result> - : tPseudoInst, - PseudoInstExpansion; - -class t2PseudoExpand pattern, - dag Result> - : t2PseudoInst, - PseudoInstExpansion; - -// Almost all ARM instructions are predicable. -class I pattern> - : InstARM { - bits<4> p; - let Inst{31-28} = p; - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - list Predicates = [IsARM]; -} - -// A few are not predicable -class InoP pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = !strconcat(opc, asm); - let Pattern = pattern; - let isPredicable = 0; - list Predicates = [IsARM]; -} - -// Same as I except it can optionally modify CPSR. Note it's modeled as an input -// operand since by default it's a zero register. It will become an implicit def -// once it's "flipped". -class sI pattern> - : InstARM { - bits<4> p; // Predicate operand - bits<1> s; // condition-code set flag ('1' if the insn should set the flags) - let Inst{31-28} = p; - let Inst{20} = s; - - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, "${s}${p}", asm); - let Pattern = pattern; - list Predicates = [IsARM]; -} - -// Special cases -class XI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - list Predicates = [IsARM]; -} - -class AI pattern> - : I; -class AsI pattern> - : sI; -class AXI pattern> - : XI; -class AXIM pattern> - : XI; -class AInoP pattern> - : InoP; - -// Ctrl flow instructions -class ABI opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - let Inst{27-24} = opcod; -} -class ABXI opcod, dag oops, dag iops, InstrItinClass itin, - string asm, list pattern> - : XI { - let Inst{27-24} = opcod; -} - -// BR_JT instructions -class JTI pattern> - : XI; - -class AIldr_ex_or_acq opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - bits<4> Rt; - bits<4> addr; - let Inst{27-23} = 0b00011; - let Inst{22-21} = opcod; - let Inst{20} = 1; - let Inst{19-16} = addr; - let Inst{15-12} = Rt; - let Inst{11-10} = 0b11; - let Inst{9-8} = opcod2; - let Inst{7-0} = 0b10011111; -} -class AIstr_ex_or_rel opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - bits<4> Rt; - bits<4> addr; - let Inst{27-23} = 0b00011; - let Inst{22-21} = opcod; - let Inst{20} = 0; - let Inst{19-16} = addr; - let Inst{11-10} = 0b11; - let Inst{9-8} = opcod2; - let Inst{7-4} = 0b1001; - let Inst{3-0} = Rt; -} -// Atomic load/store instructions -class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIldr_ex_or_acq; - -class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIstr_ex_or_rel { - bits<4> Rd; - let Inst{15-12} = Rd; -} - -// Exclusive load/store instructions - -class AIldaex opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIldr_ex_or_acq, - Requires<[IsARM, HasAcquireRelease, HasV7Clrex]>; - -class AIstlex opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIstr_ex_or_rel, - Requires<[IsARM, HasAcquireRelease, HasV7Clrex]> { - bits<4> Rd; - let Inst{15-12} = Rd; -} - -class AIswp pattern> - : AI { - bits<4> Rt; - bits<4> Rt2; - bits<4> addr; - let Inst{27-23} = 0b00010; - let Inst{22} = b; - let Inst{21-20} = 0b00; - let Inst{19-16} = addr; - let Inst{15-12} = Rt; - let Inst{11-4} = 0b00001001; - let Inst{3-0} = Rt2; - - let Unpredictable{11-8} = 0b1111; - let DecoderMethod = "DecodeSwap"; -} -// Acquire/Release load/store instructions -class AIldracq opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIldr_ex_or_acq, - Requires<[IsARM, HasAcquireRelease]>; - -class AIstrrel opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AIstr_ex_or_rel, - Requires<[IsARM, HasAcquireRelease]> { - let Inst{15-12} = 0b1111; -} - -// addrmode1 instructions -class AI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - let Inst{24-21} = opcod; - let Inst{27-26} = 0b00; -} -class AsI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list pattern> - : sI { - let Inst{24-21} = opcod; - let Inst{27-26} = 0b00; -} -class AXI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list pattern> - : XI { - let Inst{24-21} = opcod; - let Inst{27-26} = 0b00; -} - -// loads - -// LDR/LDRB/STR/STRB/... -class AI2ldst op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am, - Format f, InstrItinClass itin, string opc, string asm, - list pattern> - : I { - let Inst{27-25} = op; - let Inst{24} = 1; // 24 == P - // 23 == U - let Inst{22} = isByte; - let Inst{21} = 0; // 21 == W - let Inst{20} = isLd; -} -// Indexed load/stores -class AI2ldstidx pattern> - : I { - bits<4> Rt; - let Inst{27-26} = 0b01; - let Inst{24} = isPre; // P bit - let Inst{22} = isByte; // B bit - let Inst{21} = isPre; // W bit - let Inst{20} = isLd; // L bit - let Inst{15-12} = Rt; -} -class AI2stridx_reg pattern> - : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, - pattern> { - // AM2 store w/ two operands: (GPR, am2offset) - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> Rn; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{19-16} = Rn; - let Inst{11-5} = offset{11-5}; - let Inst{4} = 0; - let Inst{3-0} = offset{3-0}; -} - -class AI2stridx_imm pattern> - : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, - pattern> { - // AM2 store w/ two operands: (GPR, am2offset) - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> Rn; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{19-16} = Rn; - let Inst{11-0} = offset{11-0}; -} - - -// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB -// but for now use this class for STRT and STRBT. -class AI2stridxT pattern> - : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, - pattern> { - // AM2 store w/ two operands: (GPR, am2offset) - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 - // {12} isAdd - // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; - let Inst{23} = addr{12}; - let Inst{19-16} = addr{17-14}; - let Inst{11-0} = addr{11-0}; -} - -// addrmode3 instructions -class AI3ld op, bit op20, dag oops, dag iops, Format f, - InstrItinClass itin, string opc, string asm, list pattern> - : I { - bits<14> addr; - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = 1; // P bit - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{21} = 0; // W bit - let Inst{20} = op20; // L bit - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Rt; // Rt - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{7-4} = op; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - - let DecoderMethod = "DecodeAddrMode3Instruction"; -} - -class AI3ldstidx op, bit op20, bit isPre, dag oops, dag iops, - IndexMode im, Format f, InstrItinClass itin, string opc, - string asm, string cstr, list pattern> - : I { - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = isPre; // P bit - let Inst{21} = isPre; // W bit - let Inst{20} = op20; // L bit - let Inst{15-12} = Rt; // Rt - let Inst{7-4} = op; -} - -// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB -// but for now use this class for LDRSBT, LDRHT, LDSHT. -class AI3ldstidxT op, bit isLoad, dag oops, dag iops, - IndexMode im, Format f, InstrItinClass itin, string opc, - string asm, string cstr, list pattern> - : I { - // {13} 1 == imm8, 0 == Rm - // {12-9} Rn - // {8} isAdd - // {7-4} imm7_4/zero - // {3-0} imm3_0/Rm - bits<4> addr; - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = 0; // P bit - let Inst{21} = 1; - let Inst{20} = isLoad; // L bit - let Inst{19-16} = addr; // Rn - let Inst{15-12} = Rt; // Rt - let Inst{7-4} = op; -} - -// stores -class AI3str op, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - bits<14> addr; - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = 1; // P bit - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{21} = 0; // W bit - let Inst{20} = 0; // L bit - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Rt; // Rt - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{7-4} = op; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} - -// addrmode4 instructions -class AXI4 pattern> - : XI { - bits<4> p; - bits<16> regs; - bits<4> Rn; - let Inst{31-28} = p; - let Inst{27-25} = 0b100; - let Inst{22} = 0; // S bit - let Inst{19-16} = Rn; - let Inst{15-0} = regs; -} - -// Unsigned multiply, multiply-accumulate instructions. -class AMul1I opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - let Inst{7-4} = 0b1001; - let Inst{20} = 0; // S bit - let Inst{27-21} = opcod; -} -class AsMul1I opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : sI { - let Inst{7-4} = 0b1001; - let Inst{27-21} = opcod; -} - -// Most significant word multiply -class AMul2I opcod, bits<4> opc7_4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : I { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{7-4} = opc7_4; - let Inst{20} = 1; - let Inst{27-21} = opcod; - let Inst{19-16} = Rd; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} -// MSW multiple w/ Ra operand -class AMul2Ia opcod, bits<4> opc7_4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AMul2I { - bits<4> Ra; - let Inst{15-12} = Ra; -} - -// SMUL / SMULW / SMLA / SMLAW -class AMulxyIbase opcod, bits<2> bit6_5, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : I { - bits<4> Rn; - bits<4> Rm; - let Inst{4} = 0; - let Inst{7} = 1; - let Inst{20} = 0; - let Inst{27-21} = opcod; - let Inst{6-5} = bit6_5; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} -class AMulxyI opcod, bits<2> bit6_5, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AMulxyIbase { - bits<4> Rd; - let Inst{19-16} = Rd; -} - -// AMulxyI with Ra operand -class AMulxyIa opcod, bits<2> bit6_5, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AMulxyI { - bits<4> Ra; - let Inst{15-12} = Ra; -} -// SMLAL* -class AMulxyI64 opcod, bits<2> bit6_5, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AMulxyIbase { - bits<4> RdLo; - bits<4> RdHi; - let Inst{19-16} = RdHi; - let Inst{15-12} = RdLo; -} - -// Extend instructions. -class AExtI opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - // All AExtI instructions have Rd and Rm register operands. - bits<4> Rd; - bits<4> Rm; - let Inst{15-12} = Rd; - let Inst{3-0} = Rm; - let Inst{7-4} = 0b0111; - let Inst{9-8} = 0b00; - let Inst{27-20} = opcod; - - let Unpredictable{9-8} = 0b11; -} - -// Misc Arithmetic instructions. -class AMiscA1I opcod, bits<4> opc7_4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : I { - bits<4> Rd; - bits<4> Rm; - let Inst{27-20} = opcod; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-8} = 0b1111; - let Inst{7-4} = opc7_4; - let Inst{3-0} = Rm; -} - -// Division instructions. -class ADivA1I opcod, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : I { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{27-23} = 0b01110; - let Inst{22-20} = opcod; - let Inst{19-16} = Rd; - let Inst{15-12} = 0b1111; - let Inst{11-8} = Rm; - let Inst{7-4} = 0b0001; - let Inst{3-0} = Rn; -} - -// PKH instructions -def PKHLSLAsmOperand : ImmAsmOperand<0,31> { - let Name = "PKHLSLImm"; - let ParserMethod = "parsePKHLSLImm"; -} -def pkh_lsl_amt: Operand, ImmLeaf= 0 && Imm < 32; }]>{ - let PrintMethod = "printPKHLSLShiftImm"; - let ParserMatchClass = PKHLSLAsmOperand; -} -def PKHASRAsmOperand : AsmOperandClass { - let Name = "PKHASRImm"; - let ParserMethod = "parsePKHASRImm"; -} -def pkh_asr_amt: Operand, ImmLeaf 0 && Imm <= 32; }]>{ - let PrintMethod = "printPKHASRShiftImm"; - let ParserMatchClass = PKHASRAsmOperand; -} - -class APKHI opcod, bit tb, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : I { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - bits<5> sh; - let Inst{27-20} = opcod; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-7} = sh; - let Inst{6} = tb; - let Inst{5-4} = 0b01; - let Inst{3-0} = Rm; -} - -//===----------------------------------------------------------------------===// - -// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode. -class ARMPat : Pat { - list Predicates = [IsARM]; -} -class ARMV5TPat : Pat { - list Predicates = [IsARM, HasV5T]; -} -class ARMV5TEPat : Pat { - list Predicates = [IsARM, HasV5TE]; -} -// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. -class ARMV5MOPat : Pat { - list Predicates = [IsARM, HasV5TE, UseMulOps]; -} -class ARMV6Pat : Pat { - list Predicates = [IsARM, HasV6]; -} -class VFPPat : Pat { - list Predicates = [HasVFP2]; -} -class VFPNoNEONPat : Pat { - list Predicates = [HasVFP2, DontUseNEONForFP]; -} -class Thumb2DSPPat : Pat { - list Predicates = [IsThumb2, HasDSP]; -} -class Thumb2DSPMulPat : Pat { - list Predicates = [IsThumb2, UseMulOps, HasDSP]; -} -class FP16Pat : Pat { - list Predicates = [HasFP16]; -} -class FullFP16Pat : Pat { - list Predicates = [HasFullFP16]; -} -//===----------------------------------------------------------------------===// -// Thumb Instruction Format Definitions. -// - -class ThumbI pattern> - : InstThumb { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - list Predicates = [IsThumb]; -} - -// TI - Thumb instruction. -class TI pattern> - : ThumbI; - -// Two-address instructions -class TIt pattern> - : ThumbI; - -// tBL, tBX 32-bit instructions -class TIx2 opcod1, bits<2> opcod2, bit opcod3, - dag oops, dag iops, InstrItinClass itin, string asm, - list pattern> - : ThumbI, - Encoding { - let Inst{31-27} = opcod1; - let Inst{15-14} = opcod2; - let Inst{12} = opcod3; -} - -// BR_JT instructions -class TJTI pattern> - : ThumbI; - -// Thumb1 only -class Thumb1I pattern> - : InstThumb { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - list Predicates = [IsThumb, IsThumb1Only]; -} - -class T1I pattern> - : Thumb1I; -class T1Ix2 pattern> - : Thumb1I; - -// Two-address instructions -class T1It pattern> - : Thumb1I; - -// Thumb1 instruction that can either be predicated or set CPSR. -class Thumb1sI pattern> - : InstThumb { - let OutOperandList = !con(oops, (outs s_cc_out:$s)); - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${s}${p}", asm); - let Pattern = pattern; - let thumbArithFlagSetting = 1; - list Predicates = [IsThumb, IsThumb1Only]; - let DecoderNamespace = "ThumbSBit"; -} - -class T1sI pattern> - : Thumb1sI; - -// Two-address instructions -class T1sIt pattern> - : Thumb1sI; - -// Thumb1 instruction that can be predicated. -class Thumb1pI pattern> - : InstThumb { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - list Predicates = [IsThumb, IsThumb1Only]; -} - -class T1pI pattern> - : Thumb1pI; - -// Two-address instructions -class T1pIt pattern> - : Thumb1pI; - -class T1pIs pattern> - : Thumb1pI; - -class Encoding16 : Encoding { - let Inst{31-16} = 0x0000; -} - -// A6.2 16-bit Thumb instruction encoding -class T1Encoding opcode> : Encoding16 { - let Inst{15-10} = opcode; -} - -// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding. -class T1General opcode> : Encoding16 { - let Inst{15-14} = 0b00; - let Inst{13-9} = opcode; -} - -// A6.2.2 Data-processing encoding. -class T1DataProcessing opcode> : Encoding16 { - let Inst{15-10} = 0b010000; - let Inst{9-6} = opcode; -} - -// A6.2.3 Special data instructions and branch and exchange encoding. -class T1Special opcode> : Encoding16 { - let Inst{15-10} = 0b010001; - let Inst{9-6} = opcode; -} - -// A6.2.4 Load/store single data item encoding. -class T1LoadStore opA, bits<3> opB> : Encoding16 { - let Inst{15-12} = opA; - let Inst{11-9} = opB; -} -class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP relative - -class T1BranchCond opcode> : Encoding16 { - let Inst{15-12} = opcode; -} - -// Helper classes to encode Thumb1 loads and stores. For immediates, the -// following bits are used for "opA" (see A6.2.4): -// -// 0b0110 => Immediate, 4 bytes -// 0b1000 => Immediate, 2 bytes -// 0b0111 => Immediate, 1 byte -class T1pILdStEncode opcode, dag oops, dag iops, AddrMode am, - InstrItinClass itin, string opc, string asm, - list pattern> - : Thumb1pI, - T1LoadStore<0b0101, opcode> { - bits<3> Rt; - bits<8> addr; - let Inst{8-6} = addr{5-3}; // Rm - let Inst{5-3} = addr{2-0}; // Rn - let Inst{2-0} = Rt; -} -class T1pILdStEncodeImm opA, bit opB, dag oops, dag iops, AddrMode am, - InstrItinClass itin, string opc, string asm, - list pattern> - : Thumb1pI, - T1LoadStore { - bits<3> Rt; - bits<8> addr; - let Inst{10-6} = addr{7-3}; // imm5 - let Inst{5-3} = addr{2-0}; // Rn - let Inst{2-0} = Rt; -} - -// A6.2.5 Miscellaneous 16-bit instructions encoding. -class T1Misc opcode> : Encoding16 { - let Inst{15-12} = 0b1011; - let Inst{11-5} = opcode; -} - -// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable. -class Thumb2I pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - list Predicates = [IsThumb2]; - let DecoderNamespace = "Thumb2"; -} - -// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an -// input operand since by default it's a zero register. It will become an -// implicit def once it's "flipped". -// -// FIXME: This uses unified syntax so {s} comes before {p}. We should make it -// more consistent. -class Thumb2sI pattern> - : InstARM { - bits<1> s; // condition-code set flag ('1' if the insn should set the flags) - let Inst{20} = s; - - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, "${s}${p}", asm); - let Pattern = pattern; - list Predicates = [IsThumb2]; - let DecoderNamespace = "Thumb2"; -} - -// Special cases -class Thumb2XI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - list Predicates = [IsThumb2]; - let DecoderNamespace = "Thumb2"; -} - -class ThumbXI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - list Predicates = [IsThumb, IsThumb1Only]; - let DecoderNamespace = "Thumb"; -} - -class T2I pattern> - : Thumb2I; -class T2Ii12 pattern> - : Thumb2I; -class T2Ii8 pattern> - : Thumb2I; -class T2Iso pattern> - : Thumb2I; -class T2Ipc pattern> - : Thumb2I; -class T2Ii8s4 pattern> - : Thumb2I { - bits<4> Rt; - bits<4> Rt2; - bits<13> addr; - let Inst{31-25} = 0b1110100; - let Inst{24} = P; - let Inst{23} = addr{8}; - let Inst{22} = 1; - let Inst{21} = W; - let Inst{20} = isLoad; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = Rt{3-0}; - let Inst{11-8} = Rt2{3-0}; - let Inst{7-0} = addr{7-0}; -} -class T2Ii8s4post pattern> - : Thumb2I { - bits<4> Rt; - bits<4> Rt2; - bits<4> addr; - bits<9> imm; - let Inst{31-25} = 0b1110100; - let Inst{24} = P; - let Inst{23} = imm{8}; - let Inst{22} = 1; - let Inst{21} = W; - let Inst{20} = isLoad; - let Inst{19-16} = addr; - let Inst{15-12} = Rt{3-0}; - let Inst{11-8} = Rt2{3-0}; - let Inst{7-0} = imm{7-0}; -} - -class T2sI pattern> - : Thumb2sI; - -class T2XI pattern> - : Thumb2XI; -class T2JTI pattern> - : Thumb2XI; - -// Move to/from coprocessor instructions -class T2Cop opc, dag oops, dag iops, string opcstr, string asm, - list pattern> - : T2I , Requires<[IsThumb2]> { - let Inst{31-28} = opc; -} - -// Two-address instructions -class T2XIt pattern> - : Thumb2XI; - -// T2Ipreldst - Thumb2 pre-indexed load / store instructions. -class T2Ipreldst opcod, bit load, bit pre, - dag oops, dag iops, - AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - list Predicates = [IsThumb2]; - let DecoderNamespace = "Thumb2"; - - bits<4> Rt; - bits<13> addr; - let Inst{31-27} = 0b11111; - let Inst{26-25} = 0b00; - let Inst{24} = signed; - let Inst{23} = 0; - let Inst{22-21} = opcod; - let Inst{20} = load; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = Rt{3-0}; - let Inst{11} = 1; - // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed - let Inst{10} = pre; // The P bit. - let Inst{9} = addr{8}; // Sign bit - let Inst{8} = 1; // The W bit. - let Inst{7-0} = addr{7-0}; - - let DecoderMethod = "DecodeT2LdStPre"; -} - -// T2Ipostldst - Thumb2 post-indexed load / store instructions. -class T2Ipostldst opcod, bit load, bit pre, - dag oops, dag iops, - AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - list Predicates = [IsThumb2]; - let DecoderNamespace = "Thumb2"; - - bits<4> Rt; - bits<4> Rn; - bits<9> offset; - let Inst{31-27} = 0b11111; - let Inst{26-25} = 0b00; - let Inst{24} = signed; - let Inst{23} = 0; - let Inst{22-21} = opcod; - let Inst{20} = load; - let Inst{19-16} = Rn; - let Inst{15-12} = Rt{3-0}; - let Inst{11} = 1; - // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed - let Inst{10} = pre; // The P bit. - let Inst{9} = offset{8}; // Sign bit - let Inst{8} = 1; // The W bit. - let Inst{7-0} = offset{7-0}; - - let DecoderMethod = "DecodeT2LdStPre"; -} - -// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode. -class T1Pat : Pat { - list Predicates = [IsThumb, IsThumb1Only]; -} - -// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode. -class T2v6Pat : Pat { - list Predicates = [IsThumb2, HasV6T2]; -} - -// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode. -class T2Pat : Pat { - list Predicates = [IsThumb2]; -} - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM VFP Instruction templates. -// - -// Almost all VFP instructions are predicable. -class VFPI pattern> - : InstARM { - bits<4> p; - let Inst{31-28} = p; - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", asm); - let Pattern = pattern; - let PostEncoderMethod = "VFPThumb2PostEncoder"; - let DecoderNamespace = "VFP"; - list Predicates = [HasVFP2]; -} - -// Special cases -class VFPXI pattern> - : InstARM { - bits<4> p; - let Inst{31-28} = p; - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; - let Pattern = pattern; - let PostEncoderMethod = "VFPThumb2PostEncoder"; - let DecoderNamespace = "VFP"; - list Predicates = [HasVFP2]; -} - -class VFPAI pattern> - : VFPI { - let PostEncoderMethod = "VFPThumb2PostEncoder"; -} - -// ARM VFP addrmode5 loads and stores -class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, - InstrItinClass itin, - string opc, string asm, list pattern> - : VFPI { - // Instruction operands. - bits<5> Dd; - bits<13> addr; - - // Encode instruction operands. - let Inst{23} = addr{8}; // U (add = (U == '1')) - let Inst{22} = Dd{4}; - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Dd{3-0}; - let Inst{7-0} = addr{7-0}; // imm8 - - let Inst{27-24} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - - // Loads & stores operate on both NEON and VFP pipelines. - let D = VFPNeonDomain; -} - -class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, - InstrItinClass itin, - string opc, string asm, list pattern> - : VFPI { - // Instruction operands. - bits<5> Sd; - bits<13> addr; - - // Encode instruction operands. - let Inst{23} = addr{8}; // U (add = (U == '1')) - let Inst{22} = Sd{0}; - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Sd{4-1}; - let Inst{7-0} = addr{7-0}; // imm8 - - let Inst{27-24} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision - - // Loads & stores operate on both NEON and VFP pipelines. - let D = VFPNeonDomain; -} - -class AHI5 opcod1, bits<2> opcod2, dag oops, dag iops, - InstrItinClass itin, - string opc, string asm, list pattern> - : VFPI { - list Predicates = [HasFullFP16]; - - // Instruction operands. - bits<5> Sd; - bits<13> addr; - - // Encode instruction operands. - let Inst{23} = addr{8}; // U (add = (U == '1')) - let Inst{22} = Sd{0}; - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Sd{4-1}; - let Inst{7-0} = addr{7-0}; // imm8 - - let Inst{27-24} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1001; // Half precision - - // Loads & stores operate on both NEON and VFP pipelines. - let D = VFPNeonDomain; -} - -// VFP Load / store multiple pseudo instructions. -class PseudoVFPLdStM pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let Pattern = pattern; - list Predicates = [HasVFP2]; -} - -// Load / store multiple - -// Unknown precision -class AXXI4 pattern> - : VFPXI { - // Instruction operands. - bits<4> Rn; - bits<13> regs; - - // Encode instruction operands. - let Inst{19-16} = Rn; - let Inst{22} = 0; - let Inst{15-12} = regs{11-8}; - let Inst{7-1} = regs{7-1}; - - let Inst{27-25} = 0b110; - let Inst{11-8} = 0b1011; - let Inst{0} = 1; -} - -// Double precision -class AXDI4 pattern> - : VFPXI { - // Instruction operands. - bits<4> Rn; - bits<13> regs; - - // Encode instruction operands. - let Inst{19-16} = Rn; - let Inst{22} = regs{12}; - let Inst{15-12} = regs{11-8}; - let Inst{7-1} = regs{7-1}; - - let Inst{27-25} = 0b110; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - let Inst{0} = 0; -} - -// Single Precision -class AXSI4 pattern> - : VFPXI { - // Instruction operands. - bits<4> Rn; - bits<13> regs; - - // Encode instruction operands. - let Inst{19-16} = Rn; - let Inst{22} = regs{8}; - let Inst{15-12} = regs{12-9}; - let Inst{7-0} = regs{7-0}; - - let Inst{27-25} = 0b110; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision -} - -// Double precision, unary -class ADuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { - // Instruction operands. - bits<5> Dd; - bits<5> Dm; - - // Encode instruction operands. - let Inst{3-0} = Dm{3-0}; - let Inst{5} = Dm{4}; - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; - - let Predicates = [HasVFP2, HasDPVFP]; -} - -// Double precision, unary, not-predicated -class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, - string asm, list pattern> - : VFPXI { - // Instruction operands. - bits<5> Dd; - bits<5> Dm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Dm{3-0}; - let Inst{5} = Dm{4}; - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; -} - -// Double precision, binary -class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, - list pattern> - : VFPAI { - // Instruction operands. - bits<5> Dd; - bits<5> Dn; - bits<5> Dm; - - // Encode instruction operands. - let Inst{3-0} = Dm{3-0}; - let Inst{5} = Dm{4}; - let Inst{19-16} = Dn{3-0}; - let Inst{7} = Dn{4}; - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - let Inst{6} = op6; - let Inst{4} = op4; - - let Predicates = [HasVFP2, HasDPVFP]; -} - -// FP, binary, not predicated -class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, - InstrItinClass itin, string asm, list pattern> - : VFPXI -{ - // Instruction operands. - bits<5> Dd; - bits<5> Dn; - bits<5> Dm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Dm{3-0}; - let Inst{5} = Dm{4}; - let Inst{19-16} = Dn{3-0}; - let Inst{7} = Dn{4}; - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // double precision - let Inst{6} = opcod3; - let Inst{4} = 0; - - let Predicates = [HasVFP2, HasDPVFP]; -} - -// Single precision, unary, predicated -class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { - // Instruction operands. - bits<5> Sd; - bits<5> Sm; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; -} - -// Single precision, unary, non-predicated -class ASuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, - string asm, list pattern> - : VFPXI { - // Instruction operands. - bits<5> Sd; - bits<5> Sm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; -} - -// Single precision unary, if no NEON. Same as ASuI except not available if -// NEON is enabled. -class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : ASuI { - list Predicates = [HasVFP2,DontUseNEONForFP]; -} - -// Single precision, binary -class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { - // Instruction operands. - bits<5> Sd; - bits<5> Sn; - bits<5> Sm; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{19-16} = Sn{4-1}; - let Inst{7} = Sn{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision - let Inst{6} = op6; - let Inst{4} = op4; -} - -// Single precision, binary, not predicated -class ASbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, - InstrItinClass itin, string asm, list pattern> - : VFPXI -{ - // Instruction operands. - bits<5> Sd; - bits<5> Sn; - bits<5> Sm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{19-16} = Sn{4-1}; - let Inst{7} = Sn{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 0; // Single precision - let Inst{6} = opcod3; - let Inst{4} = 0; -} - -// Single precision binary, if no NEON. Same as ASbI except not available if -// NEON is enabled. -class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, - list pattern> - : ASbI { - list Predicates = [HasVFP2,DontUseNEONForFP]; - - // Instruction operands. - bits<5> Sd; - bits<5> Sn; - bits<5> Sm; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{19-16} = Sn{4-1}; - let Inst{7} = Sn{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; -} - -// Half precision, unary, predicated -class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { - list Predicates = [HasFullFP16]; - - // Instruction operands. - bits<5> Sd; - bits<5> Sm; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-8} = 0b1001; // Half precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; -} - -// Half precision, unary, non-predicated -class AHuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, - bit opcod5, dag oops, dag iops, InstrItinClass itin, - string asm, list pattern> - : VFPXI { - list Predicates = [HasFullFP16]; - - // Instruction operands. - bits<5> Sd; - bits<5> Sm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-8} = 0b1001; // Half precision - let Inst{7-6} = opcod4; - let Inst{4} = opcod5; -} - -// Half precision, binary -class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { - list Predicates = [HasFullFP16]; - - // Instruction operands. - bits<5> Sd; - bits<5> Sn; - bits<5> Sm; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{19-16} = Sn{4-1}; - let Inst{7} = Sn{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1001; // Half precision - let Inst{6} = op6; - let Inst{4} = op4; -} - -// Half precision, binary, not predicated -class AHbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, - InstrItinClass itin, string asm, list pattern> - : VFPXI { - list Predicates = [HasFullFP16]; - - // Instruction operands. - bits<5> Sd; - bits<5> Sn; - bits<5> Sm; - - let Inst{31-28} = 0b1111; - - // Encode instruction operands. - let Inst{3-0} = Sm{4-1}; - let Inst{5} = Sm{0}; - let Inst{19-16} = Sn{4-1}; - let Inst{7} = Sn{0}; - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1001; // Half precision - let Inst{6} = opcod3; - let Inst{4} = 0; -} - -// VFP conversion instructions -class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, - dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list pattern> - : VFPAI { - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{19-16} = opcod3; - let Inst{11-8} = opcod4; - let Inst{6} = 1; - let Inst{4} = 0; -} - -// VFP conversion between floating-point and fixed-point -class AVConv1XI op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, - dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list pattern> - : AVConv1I { - bits<5> fbits; - // size (fixed-point number): sx == 0 ? 16 : 32 - let Inst{7} = op5; // sx - let Inst{5} = fbits{0}; - let Inst{3-0} = fbits{4-1}; -} - -// VFP conversion instructions, if no NEON -class AVConv1In opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AVConv1I { - list Predicates = [HasVFP2,DontUseNEONForFP]; -} - -class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, - InstrItinClass itin, - string opc, string asm, list pattern> - : VFPAI { - let Inst{27-20} = opcod1; - let Inst{11-8} = opcod2; - let Inst{4} = 1; -} - -class AVConv2I opcod1, bits<4> opcod2, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AVConvXI; - -class AVConv3I opcod1, bits<4> opcod2, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AVConvXI; - -class AVConv4I opcod1, bits<4> opcod2, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AVConvXI; - -class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list pattern> - : AVConvXI; - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM NEON Instruction templates. -// - -class NeonI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); - let Pattern = pattern; - list Predicates = [HasNEON]; - let DecoderNamespace = "NEON"; -} - -// Same as NeonI except it does not have a "data type" specifier. -class NeonXI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", "\t", asm); - let Pattern = pattern; - list Predicates = [HasNEON]; - let DecoderNamespace = "NEON"; -} - -// Same as NeonI except it is not predicated -class NeonInp pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = iops; - let AsmString = !strconcat(opc, ".", dt, "\t", asm); - let Pattern = pattern; - list Predicates = [HasNEON]; - let DecoderNamespace = "NEON"; - - let Inst{31-28} = 0b1111; -} - -class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NeonI { - let Inst{31-24} = 0b11110100; - let Inst{23} = op23; - let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{7-4} = op7_4; - - let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder"; - let DecoderNamespace = "NEONLoadStore"; - - bits<5> Vd; - bits<6> Rn; - bits<4> Rm; - - let Inst{22} = Vd{4}; - let Inst{15-12} = Vd{3-0}; - let Inst{19-16} = Rn{3-0}; - let Inst{3-0} = Rm{3-0}; -} - -class NLdStLn op21_20, bits<4> op11_8, bits<4> op7_4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NLdSt { - bits<3> lane; -} - -class PseudoNLdSt - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - list Predicates = [HasNEON]; -} - -class PseudoNeonI pattern> - : InstARM { - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let Pattern = pattern; - list Predicates = [HasNEON]; -} - -class NDataI pattern> - : NeonI { - let Inst{31-25} = 0b1111001; - let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; - let DecoderNamespace = "NEONData"; -} - -class NDataXI pattern> - : NeonXI { - let Inst{31-25} = 0b1111001; - let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; - let DecoderNamespace = "NEONData"; -} - -// NEON "one register and a modified immediate" format. -class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, - bit op5, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, - list pattern> - : NDataI { - let Inst{23} = op23; - let Inst{21-19} = op21_19; - let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{5} = op5; - let Inst{4} = op4; - - // Instruction operands. - bits<5> Vd; - bits<13> SIMM; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{24} = SIMM{7}; - let Inst{18-16} = SIMM{6-4}; - let Inst{3-0} = SIMM{3-0}; - let DecoderMethod = "DecodeNEONModImmInstruction"; -} - -// NEON 2 vector register format. -class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NDataI { - let Inst{24-23} = op24_23; - let Inst{21-20} = op21_20; - let Inst{19-18} = op19_18; - let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; - - // Instruction operands. - bits<5> Vd; - bits<5> Vm; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = Vm{4}; -} - -// Same as N2V but not predicated. -class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, - dag oops, dag iops, InstrItinClass itin, string OpcodeStr, - string Dt, list pattern> - : NeonInp { - bits<5> Vd; - bits<5> Vm; - - // Encode instruction operands - let Inst{22} = Vd{4}; - let Inst{15-12} = Vd{3-0}; - let Inst{5} = Vm{4}; - let Inst{3-0} = Vm{3-0}; - - // Encode constant bits - let Inst{27-23} = 0b00111; - let Inst{21-20} = 0b11; - let Inst{19-18} = op19_18; - let Inst{17-16} = op17_16; - let Inst{11} = 0; - let Inst{10-8} = op10_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{4} = 0; - - let DecoderNamespace = "NEON"; -} - -// Same as N2V except it doesn't have a datatype suffix. -class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : NDataXI { - let Inst{24-23} = op24_23; - let Inst{21-20} = op21_20; - let Inst{19-18} = op19_18; - let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; - - // Instruction operands. - bits<5> Vd; - bits<5> Vm; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = Vm{4}; -} - -// NEON 2 vector register with immediate. -class N2VImm op11_8, bit op7, bit op6, bit op4, - dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NDataI { - let Inst{24} = op24; - let Inst{23} = op23; - let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{4} = op4; - - // Instruction operands. - bits<5> Vd; - bits<5> Vm; - bits<6> SIMM; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = Vm{4}; - let Inst{21-16} = SIMM{5-0}; -} - -// NEON 3 vector register format. - -class N3VCommon op21_20, bits<4> op11_8, bit op6, - bit op4, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, string cstr, - list pattern> - : NDataI { - let Inst{24} = op24; - let Inst{23} = op23; - let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; -} - -class N3V op21_20, bits<4> op11_8, bit op6, bit op4, - dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : N3VCommon { - // Instruction operands. - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{19-16} = Vn{3-0}; - let Inst{7} = Vn{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = Vm{4}; -} - -class N3Vnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, - bit op4, dag oops, dag iops,Format f, InstrItinClass itin, - string OpcodeStr, string Dt, list pattern> - : NeonInp { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - - // Encode instruction operands - let Inst{22} = Vd{4}; - let Inst{15-12} = Vd{3-0}; - let Inst{19-16} = Vn{3-0}; - let Inst{7} = Vn{4}; - let Inst{5} = Vm{4}; - let Inst{3-0} = Vm{3-0}; - - // Encode constant bits - let Inst{27-23} = op27_23; - let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; -} - -class N3VLane32 op21_20, bits<4> op11_8, bit op6, - bit op4, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, string cstr, - list pattern> - : N3VCommon { - - // Instruction operands. - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - bit lane; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{19-16} = Vn{3-0}; - let Inst{7} = Vn{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = lane; -} - -class N3VLane16 op21_20, bits<4> op11_8, bit op6, - bit op4, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, string cstr, - list pattern> - : N3VCommon { - - // Instruction operands. - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - bits<2> lane; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{19-16} = Vn{3-0}; - let Inst{7} = Vn{4}; - let Inst{2-0} = Vm{2-0}; - let Inst{5} = lane{1}; - let Inst{3} = lane{0}; -} - -// Same as N3V except it doesn't have a data type suffix. -class N3VX op21_20, bits<4> op11_8, bit op6, - bit op4, - dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : NDataXI { - let Inst{24} = op24; - let Inst{23} = op23; - let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; - - // Instruction operands. - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - - let Inst{15-12} = Vd{3-0}; - let Inst{22} = Vd{4}; - let Inst{19-16} = Vn{3-0}; - let Inst{7} = Vn{4}; - let Inst{3-0} = Vm{3-0}; - let Inst{5} = Vm{4}; -} - -// NEON VMOVs between scalar and core registers. -class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string dt, string asm, list pattern> - : InstARM { - let Inst{27-20} = opcod1; - let Inst{11-8} = opcod2; - let Inst{6-5} = opcod3; - let Inst{4} = 1; - // A8.6.303, A8.6.328, A8.6.329 - let Inst{3-0} = 0b0000; - - let OutOperandList = oops; - let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); - let Pattern = pattern; - list Predicates = [HasNEON]; - - let PostEncoderMethod = "NEONThumb2DupPostEncoder"; - let DecoderNamespace = "NEONDup"; - - bits<5> V; - bits<4> R; - bits<4> p; - bits<4> lane; - - let Inst{31-28} = p{3-0}; - let Inst{7} = V{4}; - let Inst{19-16} = V{3-0}; - let Inst{15-12} = R{3-0}; -} -class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, list pattern> - : NVLaneOp; -class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, list pattern> - : NVLaneOp; -class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, list pattern> - : NVLaneOp; - -// Vector Duplicate Lane (from scalar to all elements) -class NVDupLane op19_16, bit op6, dag oops, dag iops, - InstrItinClass itin, string opc, string dt, string asm, - list pattern> - : NDataI { - let Inst{24-23} = 0b11; - let Inst{21-20} = 0b11; - let Inst{19-16} = op19_16; - let Inst{11-7} = 0b11000; - let Inst{6} = op6; - let Inst{4} = 0; - - bits<5> Vd; - bits<5> Vm; - - let Inst{22} = Vd{4}; - let Inst{15-12} = Vd{3-0}; - let Inst{5} = Vm{4}; - let Inst{3-0} = Vm{3-0}; -} - -// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON -// for single-precision FP. -class NEONFPPat : Pat { - list Predicates = [HasNEON,UseNEONForFP]; -} - -// VFP/NEON Instruction aliases for type suffices. -// Note: When EmitPriority == 1, the alias will be used for printing -class VFPDataTypeInstAlias : - InstAlias, Requires<[HasVFP2]>; - -// Note: When EmitPriority == 1, the alias will be used for printing -multiclass VFPDTAnyInstAlias { - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; -} - -// Note: When EmitPriority == 1, the alias will be used for printing -multiclass NEONDTAnyInstAlias { - let Predicates = [HasNEON] in { - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; - def : VFPDataTypeInstAlias; -} -} - -// The same alias classes using AsmPseudo instead, for the more complex -// stuff in NEON that InstAlias can't quite handle. -// Note that we can't use anonymous defm references here like we can -// above, as we care about the ultimate instruction enum names generated, unlike -// for instalias defs. -class NEONDataTypeAsmPseudoInst : - AsmPseudoInst, Requires<[HasNEON]>; - -// Extension of NEON 3-vector data processing instructions in coprocessor 8 -// encoding space, introduced in ARMv8.3-A. -class N3VCP8 op24_23, bits<2> op21_20, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NeonInp { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - - let DecoderNamespace = "VFPV8"; - // These have the same encodings in ARM and Thumb2 - let PostEncoderMethod = ""; - - let Inst{31-25} = 0b1111110; - let Inst{24-23} = op24_23; - let Inst{22} = Vd{4}; - let Inst{21-20} = op21_20; - let Inst{19-16} = Vn{3-0}; - let Inst{15-12} = Vd{3-0}; - let Inst{11-8} = 0b1000; - let Inst{7} = Vn{4}; - let Inst{6} = op6; - let Inst{5} = Vm{4}; - let Inst{4} = op4; - let Inst{3-0} = Vm{3-0}; -} - -// Extension of NEON 2-vector-and-scalar data processing instructions in -// coprocessor 8 encoding space, introduced in ARMv8.3-A. -class N3VLaneCP8 op21_20, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list pattern> - : NeonInp { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - - let DecoderNamespace = "VFPV8"; - // These have the same encodings in ARM and Thumb2 - let PostEncoderMethod = ""; - - let Inst{31-24} = 0b11111110; - let Inst{23} = op23; - let Inst{22} = Vd{4}; - let Inst{21-20} = op21_20; - let Inst{19-16} = Vn{3-0}; - let Inst{15-12} = Vd{3-0}; - let Inst{11-8} = 0b1000; - let Inst{7} = Vn{4}; - let Inst{6} = op6; - // Bit 5 set by sub-classes - let Inst{4} = op4; - let Inst{3-0} = Vm{3-0}; -} - -// Operand types for complex instructions -class ComplexRotationOperand - : AsmOperandClass { - let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; - let DiagnosticString = "complex rotation must be " # Diag; - let Name = "ComplexRotation" # Type; -} -def complexrotateop : Operand { - let ParserMatchClass = ComplexRotationOperand<90, 0, "Even", "0, 90, 180 or 270">; - let PrintMethod = "printComplexRotationOp<90, 0>"; -} -def complexrotateopodd : Operand { - let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd", "90 or 270">; - let PrintMethod = "printComplexRotationOp<180, 90>"; -} - -// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. -def : TokenAlias<".s8", ".i8">; -def : TokenAlias<".u8", ".i8">; -def : TokenAlias<".s16", ".i16">; -def : TokenAlias<".u16", ".i16">; -def : TokenAlias<".s32", ".i32">; -def : TokenAlias<".u32", ".i32">; -def : TokenAlias<".s64", ".i64">; -def : TokenAlias<".u64", ".i64">; - -def : TokenAlias<".i8", ".8">; -def : TokenAlias<".i16", ".16">; -def : TokenAlias<".i32", ".32">; -def : TokenAlias<".i64", ".64">; - -def : TokenAlias<".p8", ".8">; -def : TokenAlias<".p16", ".16">; - -def : TokenAlias<".f32", ".32">; -def : TokenAlias<".f64", ".64">; -def : TokenAlias<".f", ".f32">; -def : TokenAlias<".d", ".f64">; diff --git a/suite/synctools/tablegen/ARM/ARMInstrInfo.td b/suite/synctools/tablegen/ARM/ARMInstrInfo.td deleted file mode 100644 index d4c342cee5..0000000000 --- a/suite/synctools/tablegen/ARM/ARMInstrInfo.td +++ /dev/null @@ -1,6167 +0,0 @@ -//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the ARM instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM specific DAG Nodes. -// - -// Type profiles. -def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; -def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; -def SDT_ARMStructByVal : SDTypeProfile<0, 4, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; - -def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; - -def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; - -def SDT_ARMCMov : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisVT<3, i32>]>; - -def SDT_ARMBrcond : SDTypeProfile<0, 2, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; - -def SDT_ARMBrJT : SDTypeProfile<0, 2, - [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; - -def SDT_ARMBr2JT : SDTypeProfile<0, 3, - [SDTCisPtrTy<0>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; - -def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, - [SDTCisVT<0, i32>, - SDTCisVT<1, i32>, SDTCisVT<2, i32>, - SDTCisVT<3, i32>, SDTCisVT<4, i32>, - SDTCisVT<5, OtherVT>]>; - -def SDT_ARMAnd : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; - -def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; - -def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, - SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; - -def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; -def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, - SDTCisInt<2>]>; -def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>; - -def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; - -def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, - SDTCisInt<1>]>; - -def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; - -def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; - -def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; - -def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>, - SDTCisVT<4, i32>]>; - -def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; - -// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR -def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, - SDTCisVT<1, i32>, - SDTCisVT<4, i32>]>; - -def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisSameAs<0, 4>, - SDTCisSameAs<0, 5>]>; - -def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; -def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; -def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; -def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; - -def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; - -def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; -def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; - -// Node definitions. -def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; -def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; -def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>; - -def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; -def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPSideEffect, - SDNPOptInGlue, SDNPOutGlue]>; -def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , - SDT_ARMStructByVal, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad]>; - -def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, - [SDNPInGlue]>; - -def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; - -def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; - -def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; - -def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, - [SDNPHasChain]>; -def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, - [SDNPHasChain]>; - -def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, - [SDNPHasChain]>; - -def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, - [SDNPOutGlue]>; - -def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp, - [SDNPOutGlue]>; - -def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutGlue, SDNPCommutative]>; - -def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; - -def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; -def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; -def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; - -def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, - [SDNPCommutative]>; -def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; -def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; -def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; - -def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; -def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", - SDT_ARMEH_SJLJ_Setjmp, - [SDNPHasChain, SDNPSideEffect]>; -def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, - [SDNPHasChain, SDNPSideEffect]>; -def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", - SDT_ARMEH_SJLJ_SetupDispatch, - [SDNPHasChain, SDNPSideEffect]>; - -def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, - [SDNPHasChain, SDNPSideEffect]>; -def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, - [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; - -def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; - -def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, - SDNPMayStore, SDNPMayLoad]>; - -def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; -def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; -def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; -def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; -def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; -def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; - -//===----------------------------------------------------------------------===// -// ARM Instruction Predicate Definitions. -// -def HasV4T : Predicate<"Subtarget->hasV4TOps()">, - AssemblerPredicate<"HasV4TOps", "armv4t">; -def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; -def HasV5T : Predicate<"Subtarget->hasV5TOps()">, - AssemblerPredicate<"HasV5TOps", "armv5t">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, - AssemblerPredicate<"HasV5TEOps", "armv5te">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">, - AssemblerPredicate<"HasV6Ops", "armv6">; -def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; -def HasV6M : Predicate<"Subtarget->hasV6MOps()">, - AssemblerPredicate<"HasV6MOps", - "armv6m or armv6t2">; -def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">, - AssemblerPredicate<"HasV8MBaselineOps", - "armv8m.base">; -def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">, - AssemblerPredicate<"HasV8MMainlineOps", - "armv8m.main">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, - AssemblerPredicate<"HasV6T2Ops", "armv6t2">; -def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV6K : Predicate<"Subtarget->hasV6KOps()">, - AssemblerPredicate<"HasV6KOps", "armv6k">; -def NoV6K : Predicate<"!Subtarget->hasV6KOps()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">, - AssemblerPredicate<"HasV7Ops", "armv7">; -def HasV8 : Predicate<"Subtarget->hasV8Ops()">, - AssemblerPredicate<"HasV8Ops", "armv8">; -def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, - AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; -def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, - AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; -def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, - AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; -def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, - AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; -def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, - AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; -def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, - AssemblerPredicate<"FeatureVFP2", "VFP2">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, - AssemblerPredicate<"FeatureVFP3", "VFP3">; -def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, - AssemblerPredicate<"FeatureVFP4", "VFP4">; -def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">, - AssemblerPredicate<"!FeatureVFPOnlySP", - "double precision VFP">; -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; -def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<"FeatureNEON", "NEON">; -def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, - AssemblerPredicate<"FeatureSHA2", "sha2">; -def HasAES : Predicate<"Subtarget->hasAES()">, - AssemblerPredicate<"FeatureAES", "aes">; -def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<"FeatureCrypto", "crypto">; -def HasDotProd : Predicate<"Subtarget->hasDotProd()">, - AssemblerPredicate<"FeatureDotProd", "dotprod">; -def HasCRC : Predicate<"Subtarget->hasCRC()">, - AssemblerPredicate<"FeatureCRC", "crc">; -def HasRAS : Predicate<"Subtarget->hasRAS()">, - AssemblerPredicate<"FeatureRAS", "ras">; -def HasFP16 : Predicate<"Subtarget->hasFP16()">, - AssemblerPredicate<"FeatureFP16","half-float conversions">; -def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, - AssemblerPredicate<"FeatureFullFP16","full half-float">; -def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, - AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; -def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, - AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; -def HasDSP : Predicate<"Subtarget->hasDSP()">, - AssemblerPredicate<"FeatureDSP", "dsp">; -def HasDB : Predicate<"Subtarget->hasDataBarrier()">, - AssemblerPredicate<"FeatureDB", - "data-barriers">; -def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">, - AssemblerPredicate<"FeatureDFB", - "full-data-barrier">; -def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, - AssemblerPredicate<"FeatureV7Clrex", - "v7 clrex">; -def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">, - AssemblerPredicate<"FeatureAcquireRelease", - "acquire/release">; -def HasMP : Predicate<"Subtarget->hasMPExtension()">, - AssemblerPredicate<"FeatureMP", - "mp-extensions">; -def HasVirtualization: Predicate<"false">, - AssemblerPredicate<"FeatureVirtualization", - "virtualization-extensions">; -def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, - AssemblerPredicate<"FeatureTrustZone", - "TrustZone">; -def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">, - AssemblerPredicate<"Feature8MSecExt", - "ARMv8-M Security Extensions">; -def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; -def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; -def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">, - AssemblerPredicate<"ModeThumb", "thumb">; -def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">, - AssemblerPredicate<"ModeThumb,FeatureThumb2", - "thumb2">; -def IsMClass : Predicate<"Subtarget->isMClass()">, - AssemblerPredicate<"FeatureMClass", "armv*m">; -def IsNotMClass : Predicate<"!Subtarget->isMClass()">, - AssemblerPredicate<"!FeatureMClass", - "!armv*m">; -def IsARM : Predicate<"!Subtarget->isThumb()">, - AssemblerPredicate<"!ModeThumb", "arm-mode">; -def IsMachO : Predicate<"Subtarget->isTargetMachO()">; -def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; -def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; -def IsWindows : Predicate<"Subtarget->isTargetWindows()">; -def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">; -def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">; -def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">; -def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, - AssemblerPredicate<"FeatureNaClTrap", "NaCl">; -def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; - -def UseNegativeImmediates : - Predicate<"false">, - AssemblerPredicate<"!FeatureNoNegativeImmediates", - "NegativeImmediates">; - -// FIXME: Eventually this will be just "hasV6T2Ops". -let RecomputePerFunction = 1 in { - def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; - def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; - def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; - def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; -} -def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; -def UseMulOps : Predicate<"Subtarget->useMulOps()">; - -// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. -// But only select them if more precision in FP computation is allowed. -// Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast && " - " Subtarget->hasVFP4()) && " - "!Subtarget->isTargetDarwin()">; -def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast &&" - " Subtarget->hasVFP4()) || " - "Subtarget->isTargetDarwin()">; - -def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; -def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; - -def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">; -def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">; - -def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" - "!Subtarget->useNEONForSinglePrecisionFP()">; -def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" - "Subtarget->useNEONForSinglePrecisionFP()">; - -let RecomputePerFunction = 1 in { - def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; - def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; -} - -def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; - -//===----------------------------------------------------------------------===// -// ARM Flag Definitions. - -class RegConstraint { - string Constraints = C; -} - -//===----------------------------------------------------------------------===// -// ARM specific transformation functions and pattern fragments. -// - -// imm_neg_XFORM - Return the negation of an i32 immediate value. -def imm_neg_XFORM : SDNodeXFormgetTargetConstant(-(int)N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -// imm_not_XFORM - Return the complement of a i32 immediate value. -def imm_not_XFORM : SDNodeXFormgetTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. -def imm16_31 : ImmLeaf= 16 && (int32_t)Imm < 32; -}]>; - -// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. -def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) - return true; - - if (N->getOpcode() != ISD::SRA) - return false; - if (N->getOperand(0).getOpcode() != ISD::SHL) - return false; - - auto *ShiftVal = dyn_cast(N->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; - - ShiftVal = dyn_cast(N->getOperand(0)->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; - - return true; -}]>; - -/// Split a 32-bit immediate into two 16 bit parts. -def hi16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() >> 16, SDLoc(N), - MVT::i32); -}]>; - -def lo16AllZero : PatLeaf<(i32 imm), [{ - // Returns true if all low 16-bits are 0. - return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; -}], hi16>; - -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; -class UnOpFrag : PatFrag<(ops node:$Src), res>; - -// An 'and' node with a single use. -def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ - return N->hasOneUse(); -}]>; - -// An 'xor' node with a single use. -def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{ - return N->hasOneUse(); -}]>; - -// An 'fmul' node with a single use. -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ - return N->hasOneUse(); -}]>; - -// An 'fadd' node which checks for single non-hazardous use. -def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{ - return hasNoVMLxHazardUse(N); -}]>; - -// An 'fsub' node which checks for single non-hazardous use. -def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ - return hasNoVMLxHazardUse(N); -}]>; - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// Immediate operands with a shared generic asm render method. -class ImmAsmOperand : AsmOperandClass { - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; - let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; -} - -class ImmAsmOperandMinusOne : AsmOperandClass { - let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; - let DiagnosticType = "ImmRange" # Low # "_" # High; - let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; -} - -// Operands that are part of a memory addressing mode. -class MemOperand : Operand { let OperandType = "OPERAND_MEMORY"; } - -// Branch target. -// FIXME: rename brtarget to t2_brtarget -def brtarget : Operand { - let EncoderMethod = "getBranchTargetOpValue"; - let OperandType = "OPERAND_PCREL"; - let DecoderMethod = "DecodeT2BROperand"; -} - -// Branches targeting ARM-mode must be divisible by 4 if they're a raw -// immediate. -def ARMBranchTarget : AsmOperandClass { - let Name = "ARMBranchTarget"; -} - -// Branches targeting Thumb-mode must be divisible by 2 if they're a raw -// immediate. -def ThumbBranchTarget : AsmOperandClass { - let Name = "ThumbBranchTarget"; -} - -def arm_br_target : Operand { - let ParserMatchClass = ARMBranchTarget; - let EncoderMethod = "getARMBranchTargetOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -// Call target for ARM. Handles conditional/unconditional -// FIXME: rename bl_target to t2_bltarget? -def arm_bl_target : Operand { - let ParserMatchClass = ARMBranchTarget; - let EncoderMethod = "getARMBLTargetOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -// Target for BLX *from* ARM mode. -def arm_blx_target : Operand { - let ParserMatchClass = ThumbBranchTarget; - let EncoderMethod = "getARMBLXTargetOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -// A list of registers separated by comma. Used by load/store multiple. -def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; } -def reglist : Operand { - let EncoderMethod = "getRegisterListOpValue"; - let ParserMatchClass = RegListAsmOperand; - let PrintMethod = "printRegisterList"; - let DecoderMethod = "DecodeRegListOperand"; -} - -def GPRPairOp : RegisterOperand; - -def DPRRegListAsmOperand : AsmOperandClass { - let Name = "DPRRegList"; - let DiagnosticType = "DPR_RegList"; -} -def dpr_reglist : Operand { - let EncoderMethod = "getRegisterListOpValue"; - let ParserMatchClass = DPRRegListAsmOperand; - let PrintMethod = "printRegisterList"; - let DecoderMethod = "DecodeDPRRegListOperand"; -} - -def SPRRegListAsmOperand : AsmOperandClass { - let Name = "SPRRegList"; - let DiagnosticString = "operand must be a list of registers in range [s0, s31]"; -} -def spr_reglist : Operand { - let EncoderMethod = "getRegisterListOpValue"; - let ParserMatchClass = SPRRegListAsmOperand; - let PrintMethod = "printRegisterList"; - let DecoderMethod = "DecodeSPRRegListOperand"; -} - -// An operand for the CONSTPOOL_ENTRY pseudo-instruction. -def cpinst_operand : Operand { - let PrintMethod = "printCPInstOperand"; -} - -// Local PC labels. -def pclabel : Operand { - let PrintMethod = "printPCLabel"; -} - -// ADR instruction labels. -def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } -def adrlabel : Operand { - let EncoderMethod = "getAdrLabelOpValue"; - let ParserMatchClass = AdrLabelAsmOperand; - let PrintMethod = "printAdrLabelOperand<0>"; -} - -def neon_vcvt_imm32 : Operand { - let EncoderMethod = "getNEONVcvtImm32OpValue"; - let DecoderMethod = "DecodeVCVTImmOperand"; -} - -// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm_XFORM: SDNodeXFormgetZExtValue()){ - default: llvm_unreachable(nullptr); - case 0: return CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - case 8: return CurDAG->getTargetConstant(1, SDLoc(N), MVT::i32); - case 16: return CurDAG->getTargetConstant(2, SDLoc(N), MVT::i32); - case 24: return CurDAG->getTargetConstant(3, SDLoc(N), MVT::i32); - } -}]>; -def RotImmAsmOperand : AsmOperandClass { - let Name = "RotImm"; - let ParserMethod = "parseRotImm"; -} -def rot_imm : Operand, PatLeaf<(i32 imm), [{ - int32_t v = N->getZExtValue(); - return v == 8 || v == 16 || v == 24; }], - rot_imm_XFORM> { - let PrintMethod = "printRotImmOperand"; - let ParserMatchClass = RotImmAsmOperand; -} - -// shift_imm: An integer that encodes a shift amount and the type of shift -// (asr or lsl). The 6-bit immediate encodes as: -// {5} 0 ==> lsl -// 1 asr -// {4-0} imm5 shift amount. -// asr #32 encoded as imm5 == 0. -def ShifterImmAsmOperand : AsmOperandClass { - let Name = "ShifterImm"; - let ParserMethod = "parseShifterImm"; -} -def shift_imm : Operand { - let PrintMethod = "printShiftImmOperand"; - let ParserMatchClass = ShifterImmAsmOperand; -} - -// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm. -def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } -def so_reg_reg : Operand, // reg reg imm - ComplexPattern { - let EncoderMethod = "getSORegRegOpValue"; - let PrintMethod = "printSORegRegOperand"; - let DecoderMethod = "DecodeSORegRegOperand"; - let ParserMatchClass = ShiftedRegAsmOperand; - let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm); -} - -def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; } -def so_reg_imm : Operand, // reg imm - ComplexPattern { - let EncoderMethod = "getSORegImmOpValue"; - let PrintMethod = "printSORegImmOperand"; - let DecoderMethod = "DecodeSORegImmOperand"; - let ParserMatchClass = ShiftedImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); -} - -// FIXME: Does this need to be distinct from so_reg? -def shift_so_reg_reg : Operand, // reg reg imm - ComplexPattern { - let EncoderMethod = "getSORegRegOpValue"; - let PrintMethod = "printSORegRegOperand"; - let DecoderMethod = "DecodeSORegRegOperand"; - let ParserMatchClass = ShiftedRegAsmOperand; - let MIOperandInfo = (ops GPR, GPR, i32imm); -} - -// FIXME: Does this need to be distinct from so_reg? -def shift_so_reg_imm : Operand, // reg reg imm - ComplexPattern { - let EncoderMethod = "getSORegImmOpValue"; - let PrintMethod = "printSORegImmOperand"; - let DecoderMethod = "DecodeSORegImmOperand"; - let ParserMatchClass = ShiftedImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); -} - -// mod_imm: match a 32-bit immediate operand, which can be encoded into -// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM -// - "Modified Immediate Constants"). Within the MC layer we keep this -// immediate in its encoded form. -def ModImmAsmOperand: AsmOperandClass { - let Name = "ModImm"; - let ParserMethod = "parseModImm"; -} -def mod_imm : Operand, ImmLeaf { - let EncoderMethod = "getModImmOpValue"; - let PrintMethod = "printModImmOperand"; - let ParserMatchClass = ModImmAsmOperand; -} - -// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder -// method and such, as they are only used on aliases (Pat<> and InstAlias<>). -// The actual parsing, encoding, decoding are handled by the destination -// instructions, which use mod_imm. - -def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; } -def mod_imm_not : Operand, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], imm_not_XFORM> { - let ParserMatchClass = ModImmNotAsmOperand; -} - -def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; } -def mod_imm_neg : Operand, PatLeaf<(imm), [{ - unsigned Value = -(unsigned)N->getZExtValue(); - return Value && ARM_AM::getSOImmVal(Value) != -1; - }], imm_neg_XFORM> { - let ParserMatchClass = ModImmNegAsmOperand; -} - -/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() -def arm_i32imm : PatLeaf<(imm), [{ - if (Subtarget->useMovt(*MF)) - return true; - return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); -}]>; - -/// imm0_1 predicate - Immediate in the range [0,1]. -def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; } -def imm0_1 : Operand { let ParserMatchClass = Imm0_1AsmOperand; } - -/// imm0_3 predicate - Immediate in the range [0,3]. -def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; } -def imm0_3 : Operand { let ParserMatchClass = Imm0_3AsmOperand; } - -/// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: ImmAsmOperand<0,7> { - let Name = "Imm0_7"; -} -def imm0_7 : Operand, ImmLeaf= 0 && Imm < 8; -}]> { - let ParserMatchClass = Imm0_7AsmOperand; -} - -/// imm8_255 predicate - Immediate in the range [8,255]. -def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; } -def imm8_255 : Operand, ImmLeaf= 8 && Imm < 256; -}]> { - let ParserMatchClass = Imm8_255AsmOperand; -} - -/// imm8 predicate - Immediate is exactly 8. -def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } -def imm8 : Operand, ImmLeaf { - let ParserMatchClass = Imm8AsmOperand; -} - -/// imm16 predicate - Immediate is exactly 16. -def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; } -def imm16 : Operand, ImmLeaf { - let ParserMatchClass = Imm16AsmOperand; -} - -/// imm32 predicate - Immediate is exactly 32. -def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; } -def imm32 : Operand, ImmLeaf { - let ParserMatchClass = Imm32AsmOperand; -} - -def imm8_or_16 : ImmLeaf; - -/// imm1_7 predicate - Immediate in the range [1,7]. -def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; } -def imm1_7 : Operand, ImmLeaf 0 && Imm < 8; }]> { - let ParserMatchClass = Imm1_7AsmOperand; -} - -/// imm1_15 predicate - Immediate in the range [1,15]. -def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; } -def imm1_15 : Operand, ImmLeaf 0 && Imm < 16; }]> { - let ParserMatchClass = Imm1_15AsmOperand; -} - -/// imm1_31 predicate - Immediate in the range [1,31]. -def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; } -def imm1_31 : Operand, ImmLeaf 0 && Imm < 32; }]> { - let ParserMatchClass = Imm1_31AsmOperand; -} - -/// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand<0,15> { - let Name = "Imm0_15"; -} -def imm0_15 : Operand, ImmLeaf= 0 && Imm < 16; -}]> { - let ParserMatchClass = Imm0_15AsmOperand; -} - -/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; } -def imm0_31 : Operand, ImmLeaf= 0 && Imm < 32; -}]> { - let ParserMatchClass = Imm0_31AsmOperand; -} - -/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. -def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; } -def imm0_32 : Operand, ImmLeaf= 0 && Imm < 33; -}]> { - let ParserMatchClass = Imm0_32AsmOperand; -} - -/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. -def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; } -def imm0_63 : Operand, ImmLeaf= 0 && Imm < 64; -}]> { - let ParserMatchClass = Imm0_63AsmOperand; -} - -/// imm0_239 predicate - Immediate in the range [0,239]. -def Imm0_239AsmOperand : ImmAsmOperand<0,239> { - let Name = "Imm0_239"; -} -def imm0_239 : Operand, ImmLeaf= 0 && Imm < 240; }]> { - let ParserMatchClass = Imm0_239AsmOperand; -} - -/// imm0_255 predicate - Immediate in the range [0,255]. -def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; } -def imm0_255 : Operand, ImmLeaf= 0 && Imm < 256; }]> { - let ParserMatchClass = Imm0_255AsmOperand; -} - -/// imm0_65535 - An immediate is in the range [0,65535]. -def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; } -def imm0_65535 : Operand, ImmLeaf= 0 && Imm < 65536; -}]> { - let ParserMatchClass = Imm0_65535AsmOperand; -} - -// imm0_65535_neg - An immediate whose negative value is in the range [0.65535]. -def imm0_65535_neg : Operand, ImmLeaf= 0 && -Imm < 65536; -}]>; - -// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference -// a relocatable expression. -// -// FIXME: This really needs a Thumb version separate from the ARM version. -// While the range is the same, and can thus use the same match class, -// the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: AsmOperandClass { - let Name = "Imm0_65535Expr"; - let RenderMethod = "addImmOperands"; - let DiagnosticString = "operand must be an immediate in the range [0,0xffff] or a relocatable expression"; -} - -def imm0_65535_expr : Operand { - let EncoderMethod = "getHiLo16ImmOpValue"; - let ParserMatchClass = Imm0_65535ExprAsmOperand; -} - -def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; } -def imm256_65535_expr : Operand { - let ParserMatchClass = Imm256_65535ExprAsmOperand; -} - -/// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { - let Name = "Imm24bit"; - let DiagnosticString = "operand must be an immediate in the range [0,0xffffff]"; -} -def imm24b : Operand, ImmLeaf= 0 && Imm <= 0xffffff; -}]> { - let ParserMatchClass = Imm24bitAsmOperand; -} - - -/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield -/// e.g., 0xf000ffff -def BitfieldAsmOperand : AsmOperandClass { - let Name = "Bitfield"; - let ParserMethod = "parseBitfield"; -} - -def bf_inv_mask_imm : Operand, - PatLeaf<(imm), [{ - return ARM::isBitFieldInvertedMask(N->getZExtValue()); -}] > { - let EncoderMethod = "getBitfieldInvertedMaskOpValue"; - let PrintMethod = "printBitfieldInvMaskImmOperand"; - let DecoderMethod = "DecodeBitfieldMaskOperand"; - let ParserMatchClass = BitfieldAsmOperand; - let GISelPredicateCode = [{ - // There's better methods of implementing this check. IntImmLeaf<> would be - // equivalent and have less boilerplate but we need a test for C++ - // predicates and this one causes new rules to be imported into GlobalISel - // without requiring additional features first. - const auto &MO = MI.getOperand(1); - if (!MO.isCImm()) - return false; - return ARM::isBitFieldInvertedMask(MO.getCImm()->getZExtValue()); - }]; -} - -def imm1_32_XFORM: SDNodeXFormgetTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), - MVT::i32); -}]>; -def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> { - let Name = "Imm1_32"; -} -def imm1_32 : Operand, PatLeaf<(imm), [{ - uint64_t Imm = N->getZExtValue(); - return Imm > 0 && Imm <= 32; - }], - imm1_32_XFORM> { - let PrintMethod = "printImmPlusOneOperand"; - let ParserMatchClass = Imm1_32AsmOperand; -} - -def imm1_16_XFORM: SDNodeXFormgetTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), - MVT::i32); -}]>; -def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } -def imm1_16 : Operand, ImmLeaf 0 && Imm <= 16; - }], - imm1_16_XFORM> { - let PrintMethod = "printImmPlusOneOperand"; - let ParserMatchClass = Imm1_16AsmOperand; -} - -// Define ARM specific addressing modes. -// addrmode_imm12 := reg +/- imm12 -// -def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } -class AddrMode_Imm12 : MemOperand, - ComplexPattern { - // 12-bit immediate operand. Note that instructions using this encode - // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other - // immediate values are as normal. - - let EncoderMethod = "getAddrModeImm12OpValue"; - let DecoderMethod = "DecodeAddrModeImm12Operand"; - let ParserMatchClass = MemImm12OffsetAsmOperand; - let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); -} - -def addrmode_imm12 : AddrMode_Imm12 { - let PrintMethod = "printAddrModeImm12Operand"; -} - -def addrmode_imm12_pre : AddrMode_Imm12 { - let PrintMethod = "printAddrModeImm12Operand"; -} - -// ldst_so_reg := reg +/- reg shop imm -// -def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } -def ldst_so_reg : MemOperand, - ComplexPattern { - let EncoderMethod = "getLdStSORegOpValue"; - // FIXME: Simplify the printer - let PrintMethod = "printAddrMode2Operand"; - let DecoderMethod = "DecodeSORegMemOperand"; - let ParserMatchClass = MemRegOffsetAsmOperand; - let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift); -} - -// postidx_imm8 := +/- [0,255] -// -// 9 bit value: -// {8} 1 is imm8 is non-negative. 0 otherwise. -// {7-0} [0,255] imm8 value. -def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } -def postidx_imm8 : MemOperand { - let PrintMethod = "printPostIdxImm8Operand"; - let ParserMatchClass = PostIdxImm8AsmOperand; - let MIOperandInfo = (ops i32imm); -} - -// postidx_imm8s4 := +/- [0,1020] -// -// 9 bit value: -// {8} 1 is imm8 is non-negative. 0 otherwise. -// {7-0} [0,255] imm8 value, scaled by 4. -def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } -def postidx_imm8s4 : MemOperand { - let PrintMethod = "printPostIdxImm8s4Operand"; - let ParserMatchClass = PostIdxImm8s4AsmOperand; - let MIOperandInfo = (ops i32imm); -} - - -// postidx_reg := +/- reg -// -def PostIdxRegAsmOperand : AsmOperandClass { - let Name = "PostIdxReg"; - let ParserMethod = "parsePostIdxReg"; -} -def postidx_reg : MemOperand { - let EncoderMethod = "getPostIdxRegOpValue"; - let DecoderMethod = "DecodePostIdxReg"; - let PrintMethod = "printPostIdxRegOperand"; - let ParserMatchClass = PostIdxRegAsmOperand; - let MIOperandInfo = (ops GPRnopc, i32imm); -} - -def PostIdxRegShiftedAsmOperand : AsmOperandClass { - let Name = "PostIdxRegShifted"; - let ParserMethod = "parsePostIdxReg"; -} -def am2offset_reg : MemOperand, - ComplexPattern { - let EncoderMethod = "getAddrMode2OffsetOpValue"; - let PrintMethod = "printAddrMode2OffsetOperand"; - // When using this for assembly, it's always as a post-index offset. - let ParserMatchClass = PostIdxRegShiftedAsmOperand; - let MIOperandInfo = (ops GPRnopc, i32imm); -} - -// FIXME: am2offset_imm should only need the immediate, not the GPR. Having -// the GPR is purely vestigal at this point. -def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } -def am2offset_imm : MemOperand, - ComplexPattern { - let EncoderMethod = "getAddrMode2OffsetOpValue"; - let PrintMethod = "printAddrMode2OffsetOperand"; - let ParserMatchClass = AM2OffsetImmAsmOperand; - let MIOperandInfo = (ops GPRnopc, i32imm); -} - - -// addrmode3 := reg +/- reg -// addrmode3 := reg +/- imm8 -// -// FIXME: split into imm vs. reg versions. -def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } -class AddrMode3 : MemOperand, - ComplexPattern { - let EncoderMethod = "getAddrMode3OpValue"; - let ParserMatchClass = AddrMode3AsmOperand; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); -} - -def addrmode3 : AddrMode3 -{ - let PrintMethod = "printAddrMode3Operand"; -} - -def addrmode3_pre : AddrMode3 -{ - let PrintMethod = "printAddrMode3Operand"; -} - -// FIXME: split into imm vs. reg versions. -// FIXME: parser method to handle +/- register. -def AM3OffsetAsmOperand : AsmOperandClass { - let Name = "AM3Offset"; - let ParserMethod = "parseAM3Offset"; -} -def am3offset : MemOperand, - ComplexPattern { - let EncoderMethod = "getAddrMode3OffsetOpValue"; - let PrintMethod = "printAddrMode3OffsetOperand"; - let ParserMatchClass = AM3OffsetAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); -} - -// ldstm_mode := {ia, ib, da, db} -// -def ldstm_mode : OptionalDefOperand { - let EncoderMethod = "getLdStmModeOpValue"; - let PrintMethod = "printLdStmModeOperand"; -} - -// addrmode5 := reg +/- imm8*4 -// -def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } -class AddrMode5 : MemOperand, - ComplexPattern { - let EncoderMethod = "getAddrMode5OpValue"; - let DecoderMethod = "DecodeAddrMode5Operand"; - let ParserMatchClass = AddrMode5AsmOperand; - let MIOperandInfo = (ops GPR:$base, i32imm); -} - -def addrmode5 : AddrMode5 { - let PrintMethod = "printAddrMode5Operand"; -} - -def addrmode5_pre : AddrMode5 { - let PrintMethod = "printAddrMode5Operand"; -} - -// addrmode5fp16 := reg +/- imm8*2 -// -def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; } -class AddrMode5FP16 : Operand, - ComplexPattern { - let EncoderMethod = "getAddrMode5FP16OpValue"; - let DecoderMethod = "DecodeAddrMode5FP16Operand"; - let ParserMatchClass = AddrMode5FP16AsmOperand; - let MIOperandInfo = (ops GPR:$base, i32imm); -} - -def addrmode5fp16 : AddrMode5FP16 { - let PrintMethod = "printAddrMode5FP16Operand"; -} - -// addrmode6 := reg with optional alignment -// -def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } -def addrmode6 : MemOperand, - ComplexPattern{ - let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm:$align); - let EncoderMethod = "getAddrMode6AddressOpValue"; - let DecoderMethod = "DecodeAddrMode6Operand"; - let ParserMatchClass = AddrMode6AsmOperand; -} - -def am6offset : MemOperand, - ComplexPattern { - let PrintMethod = "printAddrMode6OffsetOperand"; - let MIOperandInfo = (ops GPR); - let EncoderMethod = "getAddrMode6OffsetOpValue"; - let DecoderMethod = "DecodeGPRRegisterClass"; -} - -// Special version of addrmode6 to handle alignment encoding for VST1/VLD1 -// (single element from one lane) for size 32. -def addrmode6oneL32 : MemOperand, - ComplexPattern{ - let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); - let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; -} - -// Base class for addrmode6 with specific alignment restrictions. -class AddrMode6Align : MemOperand, - ComplexPattern{ - let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm:$align); - let EncoderMethod = "getAddrMode6AddressOpValue"; - let DecoderMethod = "DecodeAddrMode6Operand"; -} - -// Special version of addrmode6 to handle no allowed alignment encoding for -// VLD/VST instructions and checking the alignment is not specified. -def AddrMode6AlignNoneAsmOperand : AsmOperandClass { - let Name = "AlignedMemoryNone"; - let DiagnosticString = "alignment must be omitted"; -} -def addrmode6alignNone : AddrMode6Align { - // The alignment specifier can only be omitted. - let ParserMatchClass = AddrMode6AlignNoneAsmOperand; -} - -// Special version of addrmode6 to handle 16-bit alignment encoding for -// VLD/VST instructions and checking the alignment value. -def AddrMode6Align16AsmOperand : AsmOperandClass { - let Name = "AlignedMemory16"; - let DiagnosticString = "alignment must be 16 or omitted"; -} -def addrmode6align16 : AddrMode6Align { - // The alignment specifier can only be 16 or omitted. - let ParserMatchClass = AddrMode6Align16AsmOperand; -} - -// Special version of addrmode6 to handle 32-bit alignment encoding for -// VLD/VST instructions and checking the alignment value. -def AddrMode6Align32AsmOperand : AsmOperandClass { - let Name = "AlignedMemory32"; - let DiagnosticString = "alignment must be 32 or omitted"; -} -def addrmode6align32 : AddrMode6Align { - // The alignment specifier can only be 32 or omitted. - let ParserMatchClass = AddrMode6Align32AsmOperand; -} - -// Special version of addrmode6 to handle 64-bit alignment encoding for -// VLD/VST instructions and checking the alignment value. -def AddrMode6Align64AsmOperand : AsmOperandClass { - let Name = "AlignedMemory64"; - let DiagnosticString = "alignment must be 64 or omitted"; -} -def addrmode6align64 : AddrMode6Align { - // The alignment specifier can only be 64 or omitted. - let ParserMatchClass = AddrMode6Align64AsmOperand; -} - -// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding -// for VLD/VST instructions and checking the alignment value. -def AddrMode6Align64or128AsmOperand : AsmOperandClass { - let Name = "AlignedMemory64or128"; - let DiagnosticString = "alignment must be 64, 128 or omitted"; -} -def addrmode6align64or128 : AddrMode6Align { - // The alignment specifier can only be 64, 128 or omitted. - let ParserMatchClass = AddrMode6Align64or128AsmOperand; -} - -// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment -// encoding for VLD/VST instructions and checking the alignment value. -def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { - let Name = "AlignedMemory64or128or256"; - let DiagnosticString = "alignment must be 64, 128, 256 or omitted"; -} -def addrmode6align64or128or256 : AddrMode6Align { - // The alignment specifier can only be 64, 128, 256 or omitted. - let ParserMatchClass = AddrMode6Align64or128or256AsmOperand; -} - -// Special version of addrmode6 to handle alignment encoding for VLD-dup -// instructions, specifically VLD4-dup. -def addrmode6dup : MemOperand, - ComplexPattern{ - let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); - let EncoderMethod = "getAddrMode6DupAddressOpValue"; - // FIXME: This is close, but not quite right. The alignment specifier is - // different. - let ParserMatchClass = AddrMode6AsmOperand; -} - -// Base class for addrmode6dup with specific alignment restrictions. -class AddrMode6DupAlign : MemOperand, - ComplexPattern{ - let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); - let EncoderMethod = "getAddrMode6DupAddressOpValue"; -} - -// Special version of addrmode6 to handle no allowed alignment encoding for -// VLD-dup instruction and checking the alignment is not specified. -def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { - let Name = "DupAlignedMemoryNone"; - let DiagnosticString = "alignment must be omitted"; -} -def addrmode6dupalignNone : AddrMode6DupAlign { - // The alignment specifier can only be omitted. - let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand; -} - -// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup -// instruction and checking the alignment value. -def AddrMode6dupAlign16AsmOperand : AsmOperandClass { - let Name = "DupAlignedMemory16"; - let DiagnosticString = "alignment must be 16 or omitted"; -} -def addrmode6dupalign16 : AddrMode6DupAlign { - // The alignment specifier can only be 16 or omitted. - let ParserMatchClass = AddrMode6dupAlign16AsmOperand; -} - -// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup -// instruction and checking the alignment value. -def AddrMode6dupAlign32AsmOperand : AsmOperandClass { - let Name = "DupAlignedMemory32"; - let DiagnosticString = "alignment must be 32 or omitted"; -} -def addrmode6dupalign32 : AddrMode6DupAlign { - // The alignment specifier can only be 32 or omitted. - let ParserMatchClass = AddrMode6dupAlign32AsmOperand; -} - -// Special version of addrmode6 to handle 64-bit alignment encoding for VLD -// instructions and checking the alignment value. -def AddrMode6dupAlign64AsmOperand : AsmOperandClass { - let Name = "DupAlignedMemory64"; - let DiagnosticString = "alignment must be 64 or omitted"; -} -def addrmode6dupalign64 : AddrMode6DupAlign { - // The alignment specifier can only be 64 or omitted. - let ParserMatchClass = AddrMode6dupAlign64AsmOperand; -} - -// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding -// for VLD instructions and checking the alignment value. -def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { - let Name = "DupAlignedMemory64or128"; - let DiagnosticString = "alignment must be 64, 128 or omitted"; -} -def addrmode6dupalign64or128 : AddrMode6DupAlign { - // The alignment specifier can only be 64, 128 or omitted. - let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand; -} - -// addrmodepc := pc + reg -// -def addrmodepc : MemOperand, - ComplexPattern { - let PrintMethod = "printAddrModePCOperand"; - let MIOperandInfo = (ops GPR, i32imm); -} - -// addr_offset_none := reg -// -def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } -def addr_offset_none : MemOperand, - ComplexPattern { - let PrintMethod = "printAddrMode7Operand"; - let DecoderMethod = "DecodeAddrMode7Operand"; - let ParserMatchClass = MemNoOffsetAsmOperand; - let MIOperandInfo = (ops GPR:$base); -} - -def nohash_imm : Operand { - let PrintMethod = "printNoHashImmediate"; -} - -def CoprocNumAsmOperand : AsmOperandClass { - let Name = "CoprocNum"; - let ParserMethod = "parseCoprocNumOperand"; -} -def p_imm : Operand { - let PrintMethod = "printPImmediate"; - let ParserMatchClass = CoprocNumAsmOperand; - let DecoderMethod = "DecodeCoprocessor"; -} - -def CoprocRegAsmOperand : AsmOperandClass { - let Name = "CoprocReg"; - let ParserMethod = "parseCoprocRegOperand"; -} -def c_imm : Operand { - let PrintMethod = "printCImmediate"; - let ParserMatchClass = CoprocRegAsmOperand; -} -def CoprocOptionAsmOperand : AsmOperandClass { - let Name = "CoprocOption"; - let ParserMethod = "parseCoprocOptionOperand"; -} -def coproc_option_imm : Operand { - let PrintMethod = "printCoprocOptionImm"; - let ParserMatchClass = CoprocOptionAsmOperand; -} - -//===----------------------------------------------------------------------===// - -include "ARMInstrFormats.td" - -//===----------------------------------------------------------------------===// -// Multiclass helpers... -// - -/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a -/// binop that produces a value. -let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AsI1_bin_irs opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - SDPatternOperator opnode, bit Commutable = 0> { - // The register-immediate version is re-materializable. This is useful - // in particular for taking the address of a local. - let isReMaterializable = 1 in { - def ri : AsI1, - Sched<[WriteALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; - } - } - def rr : AsI1, - Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{25} = 0; - let isCommutable = Commutable; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; - } - - def rsi : AsI1, - Sched<[WriteALUsi, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - - def rsr : AsI1, - Sched<[WriteALUsr, ReadALUsr]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } -} - -/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are -/// reversed. The 'rr' form is only defined for the disassembler; for codegen -/// it is equivalent to the AsI1_bin_irs counterpart. -let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AsI1_rbin_irs opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - SDNode opnode, bit Commutable = 0> { - // The register-immediate version is re-materializable. This is useful - // in particular for taking the address of a local. - let isReMaterializable = 1 in { - def ri : AsI1, - Sched<[WriteALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; - } - } - def rr : AsI1, - Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - } - - def rsi : AsI1, - Sched<[WriteALUsi, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - - def rsr : AsI1, - Sched<[WriteALUsr, ReadALUsr]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } -} - -/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. -/// -/// These opcodes will be converted to the real non-S opcodes by -/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand. -let hasPostISelHook = 1, Defs = [CPSR] in { -multiclass AsI1_bin_s_irs { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), - 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>, - Sched<[WriteALU, ReadALU]>; - - def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), - 4, iir, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let isCommutable = Commutable; - } - def rsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), - 4, iis, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_imm:$shift))]>, - Sched<[WriteALUsi, ReadALU]>; - - def rsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), - 4, iis, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_reg:$shift))]>, - Sched<[WriteALUSsr, ReadALUsr]>; -} -} - -/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG -/// operands are reversed. -let hasPostISelHook = 1, Defs = [CPSR] in { -multiclass AsI1_rbin_s_is { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), - 4, iii, - [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>, - Sched<[WriteALU, ReadALU]>; - - def rsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), - 4, iis, - [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, - GPR:$Rn))]>, - Sched<[WriteALUsi, ReadALU]>; - - def rsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), - 4, iis, - [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, - GPR:$Rn))]>, - Sched<[WriteALUSsr, ReadALUsr]>; -} -} - -/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test -/// patterns. Similar to AsI1_bin_irs except the instruction does not produce -/// a explicit result, only implicitly set CPSR. -let isCompare = 1, Defs = [CPSR] in { -multiclass AI1_cmp_irs opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - SDPatternOperator opnode, bit Commutable = 0, - string rrDecoderMethod = ""> { - def ri : AI1, - Sched<[WriteCMP, ReadALU]> { - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-0} = imm; - - let Unpredictable{15-12} = 0b1111; - } - def rr : AI1, - Sched<[WriteCMP, ReadALU, ReadALU]> { - bits<4> Rn; - bits<4> Rm; - let isCommutable = Commutable; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; - let DecoderMethod = rrDecoderMethod; - - let Unpredictable{15-12} = 0b1111; - } - def rsi : AI1, - Sched<[WriteCMPsi, ReadALU]> { - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{15-12} = 0b1111; - } - def rsr : AI1, - Sched<[WriteCMPsr, ReadALU]> { - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{15-12} = 0b1111; - } - -} -} - -/// AI_ext_rrot - A unary operation with two forms: one whose operand is a -/// register and one whose operand is a register rotated by 8/16/24. -/// FIXME: Remove the 'r' variant. Its rot_imm is zero. -class AI_ext_rrot opcod, string opc, PatFrag opnode> - : AExtI, - Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { - bits<4> Rd; - bits<4> Rm; - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{3-0} = Rm; -} - -class AI_ext_rrot_np opcod, string opc> - : AExtI, - Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{11-10} = rot; - } - -/// AI_exta_rrot - A binary operation with two forms: one whose operand is a -/// register and one whose operand is a register rotated by 8/16/24. -class AI_exta_rrot opcod, string opc, PatFrag opnode> - : AExtI, - Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { - bits<4> Rd; - bits<4> Rm; - bits<4> Rn; - bits<2> rot; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{9-4} = 0b000111; - let Inst{3-0} = Rm; -} - -class AI_exta_rrot_np opcod, string opc> - : AExtI, - Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { - bits<4> Rn; - bits<2> rot; - let Inst{19-16} = Rn; - let Inst{11-10} = rot; -} - -/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. -let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_adde_sube_irs opcod, string opc, SDNode opnode, - bit Commutable = 0> { - let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1, - Requires<[IsARM]>, - Sched<[WriteALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; - } - def rr : AsI1, - Requires<[IsARM]>, - Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let isCommutable = Commutable; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - } - def rsi : AsI1, - Requires<[IsARM]>, - Sched<[WriteALUsi, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - def rsr : AsI1, - Requires<[IsARM]>, - Sched<[WriteALUsr, ReadALUsr]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } - } -} - -/// AI1_rsc_irs - Define instructions and patterns for rsc -let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_rsc_irs opcod, string opc, SDNode opnode> { - let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1, - Requires<[IsARM]>, - Sched<[WriteALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; - } - def rr : AsI1, - Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - } - def rsi : AsI1, - Requires<[IsARM]>, - Sched<[WriteALUsi, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - def rsr : AsI1, - Requires<[IsARM]>, - Sched<[WriteALUsr, ReadALUsr]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } - } -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in { -multiclass AI_ldr1 { - // Note: We use the complex addrmode_imm12 rather than just an input - // GPR and a constrained immediate so that we can use this to match - // frame index references and avoid matching constant pool references. - def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr), - AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", - [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> { - bits<4> Rt; - bits<17> addr; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = addr{11-0}; // imm12 - } - def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift), - AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", - [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> { - bits<4> Rt; - bits<17> shift; - let shift{4} = 0; // Inst{4} = 0 - let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{19-16} = shift{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = shift{11-0}; - } -} -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in { -multiclass AI_ldr1nopc { - // Note: We use the complex addrmode_imm12 rather than just an input - // GPR and a constrained immediate so that we can use this to match - // frame index references and avoid matching constant pool references. - def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), - (ins addrmode_imm12:$addr), - AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", - [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> { - bits<4> Rt; - bits<17> addr; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = addr{11-0}; // imm12 - } - def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), - (ins ldst_so_reg:$shift), - AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", - [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> { - bits<4> Rt; - bits<17> shift; - let shift{4} = 0; // Inst{4} = 0 - let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{19-16} = shift{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = shift{11-0}; - } -} -} - - -multiclass AI_str1 { - // Note: We use the complex addrmode_imm12 rather than just an input - // GPR and a constrained immediate so that we can use this to match - // frame index references and avoid matching constant pool references. - def i12 : AI2ldst<0b010, 0, isByte, (outs), - (ins GPR:$Rt, addrmode_imm12:$addr), - AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", - [(opnode GPR:$Rt, addrmode_imm12:$addr)]> { - bits<4> Rt; - bits<17> addr; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = addr{11-0}; // imm12 - } - def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift), - AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", - [(opnode GPR:$Rt, ldst_so_reg:$shift)]> { - bits<4> Rt; - bits<17> shift; - let shift{4} = 0; // Inst{4} = 0 - let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{19-16} = shift{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = shift{11-0}; - } -} - -multiclass AI_str1nopc { - // Note: We use the complex addrmode_imm12 rather than just an input - // GPR and a constrained immediate so that we can use this to match - // frame index references and avoid matching constant pool references. - def i12 : AI2ldst<0b010, 0, isByte, (outs), - (ins GPRnopc:$Rt, addrmode_imm12:$addr), - AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", - [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> { - bits<4> Rt; - bits<17> addr; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = addr{11-0}; // imm12 - } - def rs : AI2ldst<0b011, 0, isByte, (outs), - (ins GPRnopc:$Rt, ldst_so_reg:$shift), - AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", - [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> { - bits<4> Rt; - bits<17> shift; - let shift{4} = 0; // Inst{4} = 0 - let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{19-16} = shift{16-13}; // Rn - let Inst{15-12} = Rt; - let Inst{11-0} = shift{11-0}; - } -} - - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions. -// - -/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in -/// the function. The first operand is the ID# for this instruction, the second -/// is the index into the MachineConstantPool that this is, the third is the -/// size in bytes of this constant pool entry. -let hasSideEffects = 0, isNotDuplicable = 1 in -def CONSTPOOL_ENTRY : -PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, []>; - -/// A jumptable consisting of direct 32-bit addresses of the destination basic -/// blocks (either absolute, or relative to the start of the jump-table in PIC -/// mode). Used mostly in ARM and Thumb-1 modes. -def JUMPTABLE_ADDRS : -PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, []>; - -/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables -/// that cannot be optimised to use TBB or TBH. -def JUMPTABLE_INSTS : -PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, []>; - -/// A jumptable consisting of 8-bit unsigned integers representing offsets from -/// a TBB instruction. -def JUMPTABLE_TBB : -PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, []>; - -/// A jumptable consisting of 16-bit unsigned integers representing offsets from -/// a TBH instruction. -def JUMPTABLE_TBH : -PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, []>; - - -// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE -// from removing one half of the matched pairs. That breaks PEI, which assumes -// these will always be in pairs, and asserts if it finds otherwise. Better way? -let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { -def ADJCALLSTACKUP : -PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; - -def ADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2, pred:$p), NoItinerary, - [(ARMcallseq_start timm:$amt, timm:$amt2)]>; -} - -def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, - "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>, - Requires<[IsARM, HasV6]> { - bits<8> imm; - let Inst{27-8} = 0b00110010000011110000; - let Inst{7-0} = imm; - let DecoderMethod = "DecodeHINTInstruction"; -} - -def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>; -def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>; -def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>; -def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>; -def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>; -def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; -def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; -def : InstAlias<"csdb$p", (HINT 20, pred:$p)>, Requires<[IsARM, HasV6K]>; - -def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", - "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{27-20} = 0b01101000; - let Inst{7-4} = 0b1011; - let Inst{11-8} = 0b1111; - let Unpredictable{11-8} = 0b1111; -} - -// The 16-bit operand $val can be used by a debugger to store more information -// about the breakpoint. -def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "bkpt", "\t$val", []>, Requires<[IsARM]> { - bits<16> val; - let Inst{3-0} = val{3-0}; - let Inst{19-8} = val{15-4}; - let Inst{27-20} = 0b00010010; - let Inst{31-28} = 0xe; // AL - let Inst{7-4} = 0b0111; -} -// default immediate for breakpoint mnemonic -def : InstAlias<"bkpt", (BKPT 0), 0>, Requires<[IsARM]>; - -def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { - bits<16> val; - let Inst{3-0} = val{3-0}; - let Inst{19-8} = val{15-4}; - let Inst{27-20} = 0b00010000; - let Inst{31-28} = 0xe; // AL - let Inst{7-4} = 0b0111; -} - -// Change Processor State -// FIXME: We should use InstAlias to handle the optional operands. -class CPS - : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops), - []>, Requires<[IsARM]> { - bits<2> imod; - bits<3> iflags; - bits<5> mode; - bit M; - - let Inst{31-28} = 0b1111; - let Inst{27-20} = 0b00010000; - let Inst{19-18} = imod; - let Inst{17} = M; // Enabled if mode is set; - let Inst{16-9} = 0b00000000; - let Inst{8-6} = iflags; - let Inst{5} = 0; - let Inst{4-0} = mode; -} - -let DecoderMethod = "DecodeCPSInstruction" in { -let M = 1 in - def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode), - "$imod\t$iflags, $mode">; -let mode = 0, M = 0 in - def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">; - -let imod = 0, iflags = 0, M = 1 in - def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">; -} - -// Preload signals the memory system of possible future data/instruction access. -multiclass APreLoad read, bits<1> data, string opc> { - - def i12 : AXIM<(outs), (ins addrmode_imm12:$addr), AddrMode_i12, MiscFrm, - IIC_Preload, !strconcat(opc, "\t$addr"), - [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, - Sched<[WritePreLd]> { - bits<4> Rt; - bits<17> addr; - let Inst{31-26} = 0b111101; - let Inst{25} = 0; // 0 for immediate form - let Inst{24} = data; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{22} = read; - let Inst{21-20} = 0b01; - let Inst{19-16} = addr{16-13}; // Rn - let Inst{15-12} = 0b1111; - let Inst{11-0} = addr{11-0}; // imm12 - } - - def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, - !strconcat(opc, "\t$shift"), - [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>, - Sched<[WritePreLd]> { - bits<17> shift; - let Inst{31-26} = 0b111101; - let Inst{25} = 1; // 1 for register form - let Inst{24} = data; - let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{22} = read; - let Inst{21-20} = 0b01; - let Inst{19-16} = shift{16-13}; // Rn - let Inst{15-12} = 0b1111; - let Inst{11-0} = shift{11-0}; - let Inst{4} = 0; - } -} - -defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>; -defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; -defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; - -def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", []>, Requires<[IsARM]>, Deprecated { - bits<1> end; - let Inst{31-10} = 0b1111000100000001000000; - let Inst{9} = end; - let Inst{8-0} = 0; -} - -def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", - [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> { - bits<4> opt; - let Inst{27-4} = 0b001100100000111100001111; - let Inst{3-0} = opt; -} - -// A8.8.247 UDF - Undefined (Encoding A1) -def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, - "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { - bits<16> imm16; - let Inst{31-28} = 0b1110; // AL - let Inst{27-25} = 0b011; - let Inst{24-20} = 0b11111; - let Inst{19-8} = imm16{15-4}; - let Inst{7-4} = 0b1111; - let Inst{3-0} = imm16{3-0}; -} - -/* - * A5.4 Permanently UNDEFINED instructions. - * - * For most targets use UDF #65006, for which the OS will generate SIGTRAP. - * Other UDF encodings generate SIGILL. - * - * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb. - * Encoding A1: - * 1110 0111 1111 iiii iiii iiii 1111 iiii - * Encoding T1: - * 1101 1110 iiii iiii - * It uses the following encoding: - * 1110 0111 1111 1110 1101 1110 1111 0000 - * - In ARM: UDF #60896; - * - In Thumb: UDF #254 followed by a branch-to-self. - */ -let isBarrier = 1, isTerminator = 1 in -def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary, - "trap", [(trap)]>, - Requires<[IsARM,UseNaClTrap]> { - let Inst = 0xe7fedef0; -} -let isBarrier = 1, isTerminator = 1 in -def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, - "trap", [(trap)]>, - Requires<[IsARM,DontUseNaClTrap]> { - let Inst = 0xe7ffdefe; -} - -// Address computation and loads and stores in PIC mode. -let isNotDuplicable = 1 in { -def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), - 4, IIC_iALUr, - [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>, - Sched<[WriteALU, ReadALU]>; - -let AddedComplexity = 10 in { -def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - 4, IIC_iLoad_r, - [(set GPR:$dst, (load addrmodepc:$addr))]>; - -def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - 4, IIC_iLoad_bh_r, - [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>; - -def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - 4, IIC_iLoad_bh_r, - [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>; - -def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - 4, IIC_iLoad_bh_r, - [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>; - -def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - 4, IIC_iLoad_bh_r, - [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>; -} -let AddedComplexity = 10 in { -def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>; - -def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src, - addrmodepc:$addr)]>; - -def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; -} -} // isNotDuplicable = 1 - - -// LEApcrel - Load a pc-relative address into a register without offending the -// assembler. -let hasSideEffects = 0, isReMaterializable = 1 in -// The 'adr' mnemonic encodes differently if the label is before or after -// the instruction. The {24-21} opcode bits are set by the fixup, as we don't -// know until then which form of the instruction will be used. -def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), - MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>, - Sched<[WriteALU, ReadALU]> { - bits<4> Rd; - bits<14> label; - let Inst{27-25} = 0b001; - let Inst{24} = 0; - let Inst{23-22} = label{13-12}; - let Inst{21} = 0; - let Inst{20} = 0; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-0} = label{11-0}; -} - -let hasSideEffects = 1 in { -def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; - -def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), - (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; -} - -//===----------------------------------------------------------------------===// -// Control Flow Instructions. -// - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - // ARMV4T and above - def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, - "bx", "\tlr", [(ARMretflag)]>, - Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { - let Inst{27-0} = 0b0001001011111111111100011110; - } - - // ARMV4 only - def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, - "mov", "\tpc, lr", [(ARMretflag)]>, - Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { - let Inst{27-0} = 0b0001101000001111000000001110; - } - - // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets - // the user-space one). - def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p), - 4, IIC_Br, - [(ARMintretflag imm:$offset)]>; -} - -// Indirect branches -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - // ARMV4T and above - def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", - [(brind GPR:$dst)]>, - Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } - - def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, - "bx", "\t$dst", [/* pattern left blank */]>, - Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { - bits<4> dst; - let Inst{27-4} = 0b000100101111111111110001; - let Inst{3-0} = dst; - } -} - -// SP is marked as a use to prevent stack-pointer assignments that appear -// immediately before calls from potentially appearing dead. -let isCall = 1, - // FIXME: Do we really need a non-predicated version? If so, it should - // at least be a pseudo instruction expanding to the predicated version - // at MC lowering time. - Defs = [LR], Uses = [SP] in { - def BL : ABXI<0b1011, (outs), (ins arm_bl_target:$func), - IIC_Br, "bl\t$func", - [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM]>, Sched<[WriteBrL]> { - let Inst{31-28} = 0b1110; - bits<24> func; - let Inst{23-0} = func; - let DecoderMethod = "DecodeBranchImmInstruction"; - } - - def BL_pred : ABI<0b1011, (outs), (ins arm_bl_target:$func), - IIC_Br, "bl", "\t$func", - [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM]>, Sched<[WriteBrL]> { - bits<24> func; - let Inst{23-0} = func; - let DecoderMethod = "DecodeBranchImmInstruction"; - } - - // ARMv5T and above - def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, - IIC_Br, "blx\t$func", - [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { - bits<4> func; - let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; - } - - def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, - IIC_Br, "blx", "\t$func", - [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { - bits<4> func; - let Inst{27-4} = 0b000100101111111111110011; - let Inst{3-0} = func; - } - - // ARMv4T - // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>; - - // ARMv4 - def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; - - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), - 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM]>, Sched<[WriteBr]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: should be able to write a pattern for ARMBrcond, but can't use - // a two-value operand where a dag node expects two operands. :( - def Bcc : ABI<0b1010, (outs), (ins arm_br_target:$target), - IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, - Sched<[WriteBr]> { - bits<24> target; - let Inst{23-0} = target; - let DecoderMethod = "DecodeBranchImmInstruction"; - } - - let isBarrier = 1 in { - // B is "predicable" since it's just a Bcc with an 'always' condition. - let isPredicable = 1 in - // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly - // should be sufficient. - // FIXME: Is B really a Barrier? That doesn't seem right. - def B : ARMPseudoExpand<(outs), (ins arm_br_target:$target), 4, IIC_Br, - [(br bb:$target)], (Bcc arm_br_target:$target, - (ops 14, zero_reg))>, - Sched<[WriteBr]>; - - let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { - def BR_JTr : ARMPseudoInst<(outs), - (ins GPR:$target, i32imm:$jt), - 0, IIC_Br, - [(ARMbrjt GPR:$target, tjumptable:$jt)]>, - Sched<[WriteBr]>; - def BR_JTm_i12 : ARMPseudoInst<(outs), - (ins addrmode_imm12:$target, i32imm:$jt), - 0, IIC_Br, - [(ARMbrjt (i32 (load addrmode_imm12:$target)), - tjumptable:$jt)]>, Sched<[WriteBrTbl]>; - def BR_JTm_rs : ARMPseudoInst<(outs), - (ins ldst_so_reg:$target, i32imm:$jt), - 0, IIC_Br, - [(ARMbrjt (i32 (load ldst_so_reg:$target)), - tjumptable:$jt)]>, Sched<[WriteBrTbl]>; - def BR_JTadd : ARMPseudoInst<(outs), - (ins GPR:$target, GPR:$idx, i32imm:$jt), - 0, IIC_Br, - [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt)]>, - Sched<[WriteBrTbl]>; - } // isNotDuplicable = 1, isIndirectBranch = 1 - } // isBarrier = 1 - -} - -// BLX (immediate) -def BLXi : AXI<(outs), (ins arm_blx_target:$target), BrMiscFrm, NoItinerary, - "blx\t$target", []>, - Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { - let Inst{31-25} = 0b1111101; - bits<25> target; - let Inst{23-0} = target{24-1}; - let Inst{24} = target{0}; - let isCall = 1; -} - -// Branch and Exchange Jazelle -def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", - [/* pattern left blank */]>, Sched<[WriteBr]> { - bits<4> func; - let Inst{23-20} = 0b0010; - let Inst{19-8} = 0xfff; - let Inst{7-4} = 0b0010; - let Inst{3-0} = func; - let isBranch = 1; -} - -// Tail calls. - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>, - Sched<[WriteBr]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, - Sched<[WriteBr]>; - - def TAILJMPd : ARMPseudoExpand<(outs), (ins arm_br_target:$dst), - 4, IIC_Br, [], - (Bcc arm_br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM]>, Sched<[WriteBr]>; - - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), - 4, IIC_Br, [], - (BX GPR:$dst)>, Sched<[WriteBr]>, - Requires<[IsARM, HasV4T]>; -} - -// Secure Monitor Call is a system instruction. -def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", - []>, Requires<[IsARM, HasTrustZone]> { - bits<4> opt; - let Inst{23-4} = 0b01100000000000000111; - let Inst{3-0} = opt; -} -def : MnemonicAlias<"smi", "smc">; - -// Supervisor Call (Software Interrupt) -let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>, - Sched<[WriteBr]> { - bits<24> svc; - let Inst{23-0} = svc; -} -} - -// Store Return State -class SRSI - : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm, - NoItinerary, asm, "", []> { - bits<5> mode; - let Inst{31-28} = 0b1111; - let Inst{27-25} = 0b100; - let Inst{22} = 1; - let Inst{21} = wb; - let Inst{20} = 0; - let Inst{19-16} = 0b1101; // SP - let Inst{15-5} = 0b00000101000; - let Inst{4-0} = mode; -} - -def SRSDA : SRSI<0, "srsda\tsp, $mode"> { - let Inst{24-23} = 0; -} -def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> { - let Inst{24-23} = 0; -} -def SRSDB : SRSI<0, "srsdb\tsp, $mode"> { - let Inst{24-23} = 0b10; -} -def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> { - let Inst{24-23} = 0b10; -} -def SRSIA : SRSI<0, "srsia\tsp, $mode"> { - let Inst{24-23} = 0b01; -} -def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> { - let Inst{24-23} = 0b01; -} -def SRSIB : SRSI<0, "srsib\tsp, $mode"> { - let Inst{24-23} = 0b11; -} -def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { - let Inst{24-23} = 0b11; -} - -def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>; -def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>; - -def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>; -def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>; - -def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>; -def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>; - -def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>; -def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>; - -// Return From Exception -class RFEI - : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, - NoItinerary, asm, "", []> { - bits<4> Rn; - let Inst{31-28} = 0b1111; - let Inst{27-25} = 0b100; - let Inst{22} = 0; - let Inst{21} = wb; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-0} = 0xa00; -} - -def RFEDA : RFEI<0, "rfeda\t$Rn"> { - let Inst{24-23} = 0; -} -def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> { - let Inst{24-23} = 0; -} -def RFEDB : RFEI<0, "rfedb\t$Rn"> { - let Inst{24-23} = 0b10; -} -def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> { - let Inst{24-23} = 0b10; -} -def RFEIA : RFEI<0, "rfeia\t$Rn"> { - let Inst{24-23} = 0b01; -} -def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> { - let Inst{24-23} = 0b01; -} -def RFEIB : RFEI<0, "rfeib\t$Rn"> { - let Inst{24-23} = 0b11; -} -def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { - let Inst{24-23} = 0b11; -} - -// Hypervisor Call is a system instruction -let isCall = 1 in { -def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary, - "hvc", "\t$imm", []>, - Requires<[IsARM, HasVirtualization]> { - bits<16> imm; - - // Even though HVC isn't predicable, it's encoding includes a condition field. - // The instruction is undefined if the condition field is 0xf otherwise it is - // unpredictable if it isn't condition AL (0xe). - let Inst{31-28} = 0b1110; - let Unpredictable{31-28} = 0b1111; - let Inst{27-24} = 0b0001; - let Inst{23-20} = 0b0100; - let Inst{19-8} = imm{15-4}; - let Inst{7-4} = 0b0111; - let Inst{3-0} = imm{3-0}; -} -} - -// Return from exception in Hypervisor mode. -let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in -def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>, - Requires<[IsARM, HasVirtualization]> { - let Inst{23-0} = 0b011000000000000001101110; -} - -//===----------------------------------------------------------------------===// -// Load / Store Instructions. -// - -// Load - - -defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, load>; -defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, - zextloadi8>; -defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, store>; -defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, - truncstorei8>; - -// Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0, - isReMaterializable = 1, isCodeGenOnly = 1 in -def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), - AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", - []> { - bits<4> Rt; - bits<17> addr; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rt; - let Inst{11-0} = addr{11-0}; // imm12 -} - -// Loads with zero extension -def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr", - [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>; - -// Loads with sign extension -def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", - [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>; - -def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", - [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; - -let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { - // Load doubleword - def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), - LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, - Requires<[IsARM, HasV5TE]>; -} - -def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "lda", "\t$Rt, $addr", []>; -def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldab", "\t$Rt, $addr", []>; -def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldah", "\t$Rt, $addr", []>; - -// Indexed loads -multiclass AI2_ldridx { - def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { - bits<17> addr; - let Inst{25} = 0; - let Inst{23} = addr{12}; - let Inst{19-16} = addr{16-13}; - let Inst{11-0} = addr{11-0}; - let DecoderMethod = "DecodeLDRPreImm"; - } - - def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { - bits<17> addr; - let Inst{25} = 1; - let Inst{23} = addr{12}; - let Inst{19-16} = addr{16-13}; - let Inst{11-0} = addr{11-0}; - let Inst{4} = 0; - let DecoderMethod = "DecodeLDRPreReg"; - } - - def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, LdFrm, iir, - opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let Inst{4} = 0; - - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; - } - - def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, iii, - opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; - } - -} - -let mayLoad = 1, hasSideEffects = 0 in { -// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or -// IIC_iLoad_siu depending on whether it the offset register is shifted. -defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; -defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; -} - -multiclass AI3_ldridx op, string opc, InstrItinClass itin> { - def _PRE : AI3ldstidx { - bits<14> addr; - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr{12-9}; // Rn - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; - } - def _POST : AI3ldstidx { - bits<10> offset; - bits<4> addr; - let Inst{23} = offset{8}; // U bit - let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr; - let Inst{11-8} = offset{7-4}; // imm7_4/zero - let Inst{3-0} = offset{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; - } -} - -let mayLoad = 1, hasSideEffects = 0 in { -defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; -defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; -defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; -let hasExtraDefRegAllocReq = 1 in { -def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins addrmode3_pre:$addr), IndexModePre, - LdMiscFrm, IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, $addr!", - "$addr.base = $Rn_wb", []> { - bits<14> addr; - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr{12-9}; // Rn - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} -def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am3offset:$offset), - IndexModePost, LdMiscFrm, IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, $addr, $offset", - "$addr.base = $Rn_wb", []> { - bits<10> offset; - bits<4> addr; - let Inst{23} = offset{8}; // U bit - let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr; - let Inst{11-8} = offset{7-4}; // imm7_4/zero - let Inst{3-0} = offset{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} -} // hasExtraDefRegAllocReq = 1 -} // mayLoad = 1, hasSideEffects = 0 - -// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. -let mayLoad = 1, hasSideEffects = 0 in { -def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, LdFrm, IIC_iLoad_ru, - "ldrt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-5} = offset{11-5}; - let Inst{4} = 0; - let Inst{3-0} = offset{3-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def LDRT_POST_IMM - : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, IIC_iLoad_ru, - "ldrt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-5} = offset{11-5}; - let Inst{4} = 0; - let Inst{3-0} = offset{3-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def LDRBT_POST_IMM - : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -multiclass AI3ldrT op, string opc> { - def i : AI3ldstidxT { - bits<9> offset; - let Inst{23} = offset{8}; - let Inst{22} = 1; - let Inst{11-8} = offset{7-4}; - let Inst{3-0} = offset{3-0}; - } - def r : AI3ldstidxT { - bits<5> Rm; - let Inst{23} = Rm{4}; - let Inst{22} = 0; - let Inst{11-8} = 0; - let Unpredictable{11-8} = 0b1111; - let Inst{3-0} = Rm{3-0}; - let DecoderMethod = "DecodeLDR"; - } -} - -defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">; -defm LDRHT : AI3ldrT<0b1011, "ldrht">; -defm LDRSHT : AI3ldrT<0b1111, "ldrsht">; -} - -def LDRT_POST - : ARMAsmPseudo<"ldrt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), - (outs GPR:$Rt)>; - -def LDRBT_POST - : ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), - (outs GPR:$Rt)>; - -// Pseudo instruction ldr Rt, =immediate -def LDRConstPool - : ARMAsmPseudo<"ldr${q} $Rt, $immediate", - (ins const_pool_asm_imm:$immediate, pred:$q), - (outs GPR:$Rt)>; - -// Store - -// Stores with truncate -def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, - IIC_iStore_bh_r, "strh", "\t$Rt, $addr", - [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; - -// Store doubleword -let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { - def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), - StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>, - Requires<[IsARM, HasV5TE]> { - let Inst{21} = 0; - } -} - -// Indexed stores -multiclass AI2_stridx { - def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, - StFrm, iii, - opc, "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - bits<17> addr; - let Inst{25} = 0; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{11-0} = addr{11-0}; // imm12 - let DecoderMethod = "DecodeSTRPreImm"; - } - - def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, ldst_so_reg:$addr), - IndexModePre, StFrm, iir, - opc, "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - bits<17> addr; - let Inst{25} = 1; - let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{19-16} = addr{16-13}; // Rn - let Inst{11-0} = addr{11-0}; - let Inst{4} = 0; // Inst{4} = 0 - let DecoderMethod = "DecodeSTRPreReg"; - } - def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, StFrm, iir, - opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let Inst{4} = 0; - - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; - } - - def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, iii, - opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; - } -} - -let mayStore = 1, hasSideEffects = 0 in { -// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or -// IIC_iStore_siu depending on whether it the offset register is shifted. -defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; -defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>; -} - -def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, - am2offset_reg:$offset), - (STR_POST_REG GPR:$Rt, addr_offset_none:$addr, - am2offset_reg:$offset)>; -def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, - am2offset_imm:$offset), - (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr, - am2offset_imm:$offset)>; -def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, - am2offset_reg:$offset), - (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr, - am2offset_reg:$offset)>; -def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, - am2offset_imm:$offset), - (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr, - am2offset_imm:$offset)>; - -// Pseudo-instructions for pattern matching the pre-indexed stores. We can't -// put the patterns on the instruction definitions directly as ISel wants -// the address base and offset to be separate operands, not a single -// complex operand like we represent the instructions themselves. The -// pseudos map between the two. -let usesCustomInserter = 1, - Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { -def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), - 4, IIC_iStore_ru, - [(set GPR:$Rn_wb, - (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; -def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), - 4, IIC_iStore_ru, - [(set GPR:$Rn_wb, - (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; -def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), - 4, IIC_iStore_ru, - [(set GPR:$Rn_wb, - (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; -def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), - 4, IIC_iStore_ru, - [(set GPR:$Rn_wb, - (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; -def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p), - 4, IIC_iStore_ru, - [(set GPR:$Rn_wb, - (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; -} - - - -def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, - StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - bits<14> addr; - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr{12-9}; // Rn - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} - -def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), - IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, - addr_offset_none:$addr, - am3offset:$offset))]> { - bits<10> offset; - bits<4> addr; - let Inst{23} = offset{8}; // U bit - let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr; - let Inst{11-8} = offset{7-4}; // imm7_4/zero - let Inst{3-0} = offset{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} - -let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { -def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), - IndexModePre, StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$Rt, $Rt2, $addr!", - "$addr.base = $Rn_wb", []> { - bits<14> addr; - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr{12-9}; // Rn - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} - -def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr, - am3offset:$offset), - IndexModePost, StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$Rt, $Rt2, $addr, $offset", - "$addr.base = $Rn_wb", []> { - bits<10> offset; - bits<4> addr; - let Inst{23} = offset{8}; // U bit - let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = addr; - let Inst{11-8} = offset{7-4}; // imm7_4/zero - let Inst{3-0} = offset{3-0}; // imm3_0/Rm - let DecoderMethod = "DecodeAddrMode3Instruction"; -} -} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 - -// STRT, STRBT, and STRHT - -def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-5} = offset{11-5}; - let Inst{4} = 0; - let Inst{3-0} = offset{3-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def STRBT_POST_IMM - : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def STRBT_POST - : ARMAsmPseudo<"strbt${q} $Rt, $addr", - (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; - -let mayStore = 1, hasSideEffects = 0 in { -def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 1; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-5} = offset{11-5}; - let Inst{4} = 0; - let Inst{3-0} = offset{3-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} - -def STRT_POST_IMM - : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { - // {12} isAdd - // {11-0} imm12/Rm - bits<14> offset; - bits<4> addr; - let Inst{25} = 0; - let Inst{23} = offset{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr; - let Inst{11-0} = offset{11-0}; - let DecoderMethod = "DecodeAddrMode2IdxInstruction"; -} -} - -def STRT_POST - : ARMAsmPseudo<"strt${q} $Rt, $addr", - (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; - -multiclass AI3strT op, string opc> { - def i : AI3ldstidxT { - bits<9> offset; - let Inst{23} = offset{8}; - let Inst{22} = 1; - let Inst{11-8} = offset{7-4}; - let Inst{3-0} = offset{3-0}; - } - def r : AI3ldstidxT { - bits<5> Rm; - let Inst{23} = Rm{4}; - let Inst{22} = 0; - let Inst{11-8} = 0; - let Inst{3-0} = Rm{3-0}; - } -} - - -defm STRHT : AI3strT<0b1011, "strht">; - -def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stl", "\t$Rt, $addr", []>; -def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stlb", "\t$Rt, $addr", []>; -def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stlh", "\t$Rt, $addr", []>; - -//===----------------------------------------------------------------------===// -// Load / store multiple Instructions. -// - -multiclass arm_ldst_mult { - // IA is the default, so no need for an explicit suffix on the - // mnemonic here. Without it is the canonical spelling. - def IA : - AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeNone, f, itin, - !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { - let Inst{24-23} = 0b01; // Increment After - let Inst{22} = P_bit; - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } - def IA_UPD : - AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeUpd, f, itin_upd, - !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { - let Inst{24-23} = 0b01; // Increment After - let Inst{22} = P_bit; - let Inst{21} = 1; // Writeback - let Inst{20} = L_bit; - - let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; - } - def DA : - AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeNone, f, itin, - !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { - let Inst{24-23} = 0b00; // Decrement After - let Inst{22} = P_bit; - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } - def DA_UPD : - AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeUpd, f, itin_upd, - !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { - let Inst{24-23} = 0b00; // Decrement After - let Inst{22} = P_bit; - let Inst{21} = 1; // Writeback - let Inst{20} = L_bit; - - let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; - } - def DB : - AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeNone, f, itin, - !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{22} = P_bit; - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } - def DB_UPD : - AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeUpd, f, itin_upd, - !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{22} = P_bit; - let Inst{21} = 1; // Writeback - let Inst{20} = L_bit; - - let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; - } - def IB : - AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeNone, f, itin, - !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { - let Inst{24-23} = 0b11; // Increment Before - let Inst{22} = P_bit; - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } - def IB_UPD : - AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - IndexModeUpd, f, itin_upd, - !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { - let Inst{24-23} = 0b11; // Increment Before - let Inst{22} = P_bit; - let Inst{21} = 1; // Writeback - let Inst{20} = L_bit; - - let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; - } -} - -let hasSideEffects = 0 in { - -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, - IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">; - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, - IIC_iStore_mu>, - ComplexDeprecationPredicate<"ARMStore">; - -} // hasSideEffects - -// FIXME: remove when we have a way to marking a MI with these properties. -// FIXME: Should pc be an implicit operand like PICADD, etc? -let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, - hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - 4, IIC_iLoad_mBr, [], - (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, - RegConstraint<"$Rn = $wb">; - -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, - IIC_iLoad_mu>; - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, - IIC_iStore_mu>; - - - -//===----------------------------------------------------------------------===// -// Move Instructions. -// - -let hasSideEffects = 0, isMoveReg = 1 in -def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, - "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<4> Rm; - - let Inst{19-16} = 0b0000; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; -} - -// A version for the smaller set of tail call registers. -let hasSideEffects = 0 in -def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, - IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<4> Rm; - - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; -} - -def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), - DPSoRegRegFrm, IIC_iMOVsr, - "mov", "\t$Rd, $src", - [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP, - Sched<[WriteALU]> { - bits<4> Rd; - bits<12> src; - let Inst{15-12} = Rd; - let Inst{19-16} = 0b0000; - let Inst{11-8} = src{11-8}; - let Inst{7} = 0; - let Inst{6-5} = src{6-5}; - let Inst{4} = 1; - let Inst{3-0} = src{3-0}; - let Inst{25} = 0; -} - -def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), - DPSoRegImmFrm, IIC_iMOVsr, - "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, - UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<12> src; - let Inst{15-12} = Rd; - let Inst{19-16} = 0b0000; - let Inst{11-5} = src{11-5}; - let Inst{4} = 0; - let Inst{3-0} = src{3-0}; - let Inst{25} = 0; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP, - Sched<[WriteALU]> { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = 0b0000; - let Inst{11-0} = imm; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), - DPFrm, IIC_iMOVi, - "movw", "\t$Rd, $imm", - [(set GPR:$Rd, imm0_65535:$imm)]>, - Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<16> imm; - let Inst{15-12} = Rd; - let Inst{11-0} = imm{11-0}; - let Inst{19-16} = imm{15-12}; - let Inst{20} = 0; - let Inst{25} = 1; - let DecoderMethod = "DecodeArmMOVTWInstruction"; -} - -def : InstAlias<"mov${p} $Rd, $imm", - (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>, - Requires<[IsARM, HasV6T2]>; - -def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, - Sched<[WriteALU]>; - -let Constraints = "$src = $Rd" in { -def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), - (ins GPR:$src, imm0_65535_expr:$imm), - DPFrm, IIC_iMOVi, - "movt", "\t$Rd, $imm", - [(set GPRnopc:$Rd, - (or (and GPR:$src, 0xffff), - lo16AllZero:$imm))]>, UnaryDP, - Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> { - bits<4> Rd; - bits<16> imm; - let Inst{15-12} = Rd; - let Inst{11-0} = imm{11-0}; - let Inst{19-16} = imm{15-12}; - let Inst{20} = 0; - let Inst{25} = 1; - let DecoderMethod = "DecodeArmMOVTWInstruction"; -} - -def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, - Sched<[WriteALU]>; - -} // Constraints - -def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, - Requires<[IsARM, HasV6T2]>; - -let Uses = [CPSR] in -def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, - [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, - Requires<[IsARM]>, Sched<[WriteALU]>; - -// These aren't really mov instructions, but we have to define them this way -// due to flag operands. - -let Defs = [CPSR] in { -def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, - Sched<[WriteALU]>, Requires<[IsARM]>; -def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, - Sched<[WriteALU]>, Requires<[IsARM]>; -} - -//===----------------------------------------------------------------------===// -// Extend Instructions. -// - -// Sign extenders - -def SXTB : AI_ext_rrot<0b01101010, - "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -def SXTH : AI_ext_rrot<0b01101011, - "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; - -def SXTAB : AI_exta_rrot<0b01101010, - "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -def SXTAH : AI_exta_rrot<0b01101011, - "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; - -def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), - (SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), - i16)), - (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; - -def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; -def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src), - (SXTB16 GPR:$Src, 0)>; - -def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; -def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), - (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; - -// Zero extenders - -let AddedComplexity = 16 in { -def UXTB : AI_ext_rrot<0b01101110, - "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; -def UXTH : AI_ext_rrot<0b01101111, - "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -def UXTB16 : AI_ext_rrot<0b01101100, - "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; - -// FIXME: This pattern incorrectly assumes the shl operator is a rotate. -// The transformation should probably be done as a combiner action -// instead so we can include a check for masking back in the upper -// eight bits of the source into the lower eight bits of the result. -//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), -// (UXTB16r_rot GPR:$Src, 3)>; -def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16 GPR:$Src, 1)>; -def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), - (UXTB16 GPR:$Src, 0)>; - -def UXTAB : AI_exta_rrot<0b01101110, "uxtab", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -def UXTAH : AI_exta_rrot<0b01101111, "uxtah", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; - -def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), - (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), - (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -} - -// This isn't safe in general, the add is two 16-bit units, not a 32-bit add. -def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; -def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), - (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; - - -def SBFX : I<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, - Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<4> Rn; - bits<5> lsb; - bits<5> width; - let Inst{27-21} = 0b0111101; - let Inst{6-4} = 0b101; - let Inst{20-16} = width; - let Inst{15-12} = Rd; - let Inst{11-7} = lsb; - let Inst{3-0} = Rn; -} - -def UBFX : I<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, - Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<4> Rn; - bits<5> lsb; - bits<5> width; - let Inst{27-21} = 0b0111111; - let Inst{6-4} = 0b101; - let Inst{20-16} = width; - let Inst{15-12} = Rd; - let Inst{11-7} = lsb; - let Inst{3-0} = Rn; -} - -//===----------------------------------------------------------------------===// -// Arithmetic Instructions. -// - -let isAdd = 1 in -defm ADD : AsI1_bin_irs<0b0100, "add", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>; -defm SUB : AsI1_bin_irs<0b0010, "sub", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>; - -// ADD and SUB with 's' bit set. -// -// Currently, ADDS/SUBS are pseudo opcodes that exist only in the -// selection DAG. They are "lowered" to real ADD/SUB opcodes by -// AdjustInstrPostInstrSelection where we determine whether or not to -// set the "s" bit based on CPSR liveness. -// -// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen -// support for an optional CPSR definition that corresponds to the DAG -// node's second value. We can then eliminate the implicit def of CPSR. -let isAdd = 1 in -defm ADDS : AsI1_bin_s_irs; -defm SUBS : AsI1_bin_s_irs; - -let isAdd = 1 in -defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>; -defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>; - -defm RSB : AsI1_rbin_irs<0b0011, "rsb", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - sub>; - -// FIXME: Eliminate them if we can write def : Pat patterns which defines -// CPSR and the implicit def of CPSR is not needed. -defm RSBS : AsI1_rbin_s_is; - -defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>; - -// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. -// The assume-no-carry-in form uses the negation of the input since add/sub -// assume opposite meanings of the carry flag (i.e., carry == !borrow). -// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory -// details. -def : ARMPat<(add GPR:$src, mod_imm_neg:$imm), - (SUBri GPR:$src, mod_imm_neg:$imm)>; -def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm), - (SUBSri GPR:$src, mod_imm_neg:$imm)>; - -def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), - (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, - Requires<[IsARM, HasV6T2]>; -def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), - (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, - Requires<[IsARM, HasV6T2]>; - -// The with-carry-in form matches bitwise not instead of the negation. -// Effectively, the inverse interpretation of the carry flag already accounts -// for part of the negation. -def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR), - (SBCri GPR:$src, mod_imm_not:$imm)>; -def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), - (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>, - Requires<[IsARM, HasV6T2]>; - -// Note: These are implemented in C++ code, because they have to generate -// ADD/SUBrs instructions, which use a complex pattern that a xform function -// cannot produce. -// (mul X, 2^n+1) -> (add (X << n), X) -// (mul X, 2^n-1) -> (rsb X, (X << n)) - -// ARM Arithmetic Instruction -// GPR:$dst = GPR:$a op GPR:$b -class AAI op27_20, bits<8> op11_4, string opc, - list pattern = [], - dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), - string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>, - Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rn; - bits<4> Rd; - bits<4> Rm; - let Inst{27-20} = op27_20; - let Inst{11-4} = op11_4; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{3-0} = Rm; - - let Unpredictable{11-8} = 0b1111; -} - -// Wrappers around the AAI class -class AAIRevOpr op27_20, bits<8> op11_4, string opc, - list pattern = []> - : AAI; - -class AAIIntrinsic op27_20, bits<8> op11_4, string opc, - Intrinsic intrinsic> - : AAI; - -// Saturating add/subtract -let hasSideEffects = 1 in { -def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; -def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; -def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; -def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; - -def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", - [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, - GPRnopc:$Rm), - GPRnopc:$Rn))]>; -def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, - (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; -def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; -let DecoderMethod = "DecodeQADDInstruction" in - def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", - [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; -} - -def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; -def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; -def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; -def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; -def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; -def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; -def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; -def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; - -// Signed/Unsigned add/subtract - -def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; -def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; -def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; -def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; -def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; -def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; -def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; -def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; -def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; -def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; -def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; -def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; - -// Signed/Unsigned halving add/subtract - -def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; -def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; -def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; -def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; -def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; -def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; -def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; -def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; -def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; -def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; -def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; -def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; - -// Unsigned Sum of Absolute Differences [and Accumulate]. - -def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{27-20} = 0b01111000; - let Inst{15-12} = 0b1111; - let Inst{7-4} = 0b0001; - let Inst{19-16} = Rd; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} -def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, - Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - bits<4> Ra; - let Inst{27-20} = 0b01111000; - let Inst{7-4} = 0b0001; - let Inst{19-16} = Rd; - let Inst{15-12} = Ra; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} - -// Signed/Unsigned saturate -def SSAT : AI<(outs GPRnopc:$Rd), - (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), - SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsARM,HasV6]>{ - bits<4> Rd; - bits<5> sat_imm; - bits<4> Rn; - bits<8> sh; - let Inst{27-21} = 0b0110101; - let Inst{5-4} = 0b01; - let Inst{20-16} = sat_imm; - let Inst{15-12} = Rd; - let Inst{11-7} = sh{4-0}; - let Inst{6} = sh{5}; - let Inst{3-0} = Rn; -} - -def SSAT16 : AI<(outs GPRnopc:$Rd), - (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm, - NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []>, - Requires<[IsARM,HasV6]>{ - bits<4> Rd; - bits<4> sat_imm; - bits<4> Rn; - let Inst{27-20} = 0b01101010; - let Inst{11-4} = 0b11110011; - let Inst{15-12} = Rd; - let Inst{19-16} = sat_imm; - let Inst{3-0} = Rn; -} - -def USAT : AI<(outs GPRnopc:$Rd), - (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), - SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsARM,HasV6]> { - bits<4> Rd; - bits<5> sat_imm; - bits<4> Rn; - bits<8> sh; - let Inst{27-21} = 0b0110111; - let Inst{5-4} = 0b01; - let Inst{15-12} = Rd; - let Inst{11-7} = sh{4-0}; - let Inst{6} = sh{5}; - let Inst{20-16} = sat_imm; - let Inst{3-0} = Rn; -} - -def USAT16 : AI<(outs GPRnopc:$Rd), - (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm, - NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>, - Requires<[IsARM,HasV6]>{ - bits<4> Rd; - bits<4> sat_imm; - bits<4> Rn; - let Inst{27-20} = 0b01101110; - let Inst{11-4} = 0b11110011; - let Inst{15-12} = Rd; - let Inst{19-16} = sat_imm; - let Inst{3-0} = Rn; -} - -def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos), - (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; -def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), - (USAT imm0_31:$pos, GPRnopc:$a, 0)>; -def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), - (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; -def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), - (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; -def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), - (SSAT16 imm1_16:$pos, GPRnopc:$a)>; -def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), - (USAT16 imm0_15:$pos, GPRnopc:$a)>; - -//===----------------------------------------------------------------------===// -// Bitwise Instructions. -// - -defm AND : AsI1_bin_irs<0b0000, "and", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, and, 1>; -defm ORR : AsI1_bin_irs<0b1100, "orr", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, or, 1>; -defm EOR : AsI1_bin_irs<0b0001, "eor", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, xor, 1>; -defm BIC : AsI1_bin_irs<0b1110, "bic", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; - -// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just -// like in the actual instruction encoding. The complexity of mapping the mask -// to the lsb/msb pair should be handled by ISel, not encapsulated in the -// instruction description. -def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfc", "\t$Rd, $imm", "$src = $Rd", - [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>, - Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<10> imm; - let Inst{27-21} = 0b0111110; - let Inst{6-0} = 0b0011111; - let Inst{15-12} = Rd; - let Inst{11-7} = imm{4-0}; // lsb - let Inst{20-16} = imm{9-5}; // msb -} - -// A8.6.18 BFI - Bitfield insert (Encoding A1) -def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", - [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn, - bf_inv_mask_imm:$imm))]>, - Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<4> Rn; - bits<10> imm; - let Inst{27-21} = 0b0111110; - let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 - let Inst{15-12} = Rd; - let Inst{11-7} = imm{4-0}; // lsb - let Inst{20-16} = imm{9-5}; // width - let Inst{3-0} = Rn; -} - -def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, - "mvn", "\t$Rd, $Rm", - [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<4> Rm; - let Inst{25} = 0; - let Inst{19-16} = 0b0000; - let Inst{11-4} = 0b00000000; - let Inst{15-12} = Rd; - let Inst{3-0} = Rm; - - let Unpredictable{19-16} = 0b1111; -} -def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), - DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP, - Sched<[WriteALU]> { - bits<4> Rd; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{19-16} = 0b1111; -} -def MVNsr : AsI1<0b1111, (outs GPRnopc:$Rd), (ins so_reg_reg:$shift), - DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPRnopc:$Rd, (not so_reg_reg:$shift))]>, UnaryDP, - Sched<[WriteALU]> { - bits<4> Rd; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{19-16} = 0b1111; -} -let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, - IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} - -let AddedComplexity = 1 in -def : ARMPat<(and GPR:$src, mod_imm_not:$imm), - (BICri GPR:$src, mod_imm_not:$imm)>; - -//===----------------------------------------------------------------------===// -// Multiply Instructions. -// -class AsMul1I32 opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AsMul1I { - bits<4> Rd; - bits<4> Rm; - bits<4> Rn; - let Inst{19-16} = Rd; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} -class AsMul1I64 opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AsMul1I { - bits<4> RdLo; - bits<4> RdHi; - bits<4> Rm; - bits<4> Rn; - let Inst{19-16} = RdHi; - let Inst{15-12} = RdLo; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} -class AsMla1I64 opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : AsMul1I { - bits<4> RdLo; - bits<4> RdHi; - bits<4> Rm; - bits<4> Rn; - let Inst{19-16} = RdHi; - let Inst{15-12} = RdLo; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} - -// FIXME: The v5 pseudos are only necessary for the additional Constraint -// property. Remove them when it's possible to add those properties -// on an individual MachineInstr, not just an instruction description. -let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in { -def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]> { - let Inst{15-12} = 0b0000; - let Unpredictable{15-12} = 0b1111; -} - -let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, - pred:$p, cc_out:$s), - 4, IIC_iMUL32, - [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], - (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6, UseMulOps]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -} - -def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), - IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { - bits<4> Ra; - let Inst{15-12} = Ra; -} - -let Constraints = "@earlyclobber $Rd" in -def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, - pred:$p, cc_out:$s), 4, IIC_iMAC32, - [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], - (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - -def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { - bits<4> Rd; - bits<4> Rm; - bits<4> Rn; - bits<4> Ra; - let Inst{19-16} = Rd; - let Inst{15-12} = Ra; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} - -// Extra precision multiplies with low / high results -let hasSideEffects = 0 in { -let isCommutable = 1 in { -def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "smull", "\t$RdLo, $RdHi, $Rn, $Rm", - [(set GPR:$RdLo, GPR:$RdHi, - (smullohi GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; - -def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "umull", "\t$RdLo, $RdHi, $Rn, $Rm", - [(set GPR:$RdLo, GPR:$RdHi, - (umullohi GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; - -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { -def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, - [(set GPR:$RdLo, GPR:$RdHi, - (smullohi GPR:$Rn, GPR:$Rm))], - (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>, - Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; - -def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, - [(set GPR:$RdLo, GPR:$RdHi, - (umullohi GPR:$Rn, GPR:$Rm))], - (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>, - Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; -} -} - -// Multiply + accumulate -def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; -def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; - -def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), - IIC_iMAC64, - "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { - bits<4> RdLo; - bits<4> RdHi; - bits<4> Rm; - bits<4> Rn; - let Inst{19-16} = RdHi; - let Inst{15-12} = RdLo; - let Inst{11-8} = Rm; - let Inst{3-0} = Rn; -} - -let Constraints = - "@earlyclobber $RdLo,@earlyclobber $RdHi,$RLo = $RdLo,$RHi = $RdHi" in { -def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), - 4, IIC_iMAC64, [], - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, - pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; -def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), - 4, IIC_iMAC64, [], - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, - pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; -} - -} // hasSideEffects - -// Most significant word multiply -def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]> { - let Inst{15-12} = 0b1111; -} - -def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, (i32 0)))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]> { - let Inst{15-12} = 0b1111; -} - -def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - -def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - -def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - -def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (ARMsmmlsr GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - -multiclass AI_smul { - def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; - - def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; - - def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; - - def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; - - def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; - - def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL16, ReadMUL, ReadMUL]>; -} - - -multiclass AI_smla { - let DecoderMethod = "DecodeSMLAInstruction" in { - def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, (add GPR:$Ra, - (mul (sext_inreg GPRnopc:$Rn, i16), - (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - - def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), - (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - - def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - - def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - - def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - - def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; - } -} - -defm SMUL : AI_smul<"smul">; -defm SMLA : AI_smla<"smla">; - -// Halfword multiply accumulate long: SMLAL. -class SMLAL opc1, string asm> - : AMulxyI64<0b0001010, opc1, - (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; - -def SMLALBB : SMLAL<0b00, "smlalbb">; -def SMLALBT : SMLAL<0b10, "smlalbt">; -def SMLALTB : SMLAL<0b01, "smlaltb">; -def SMLALTT : SMLAL<0b11, "smlaltt">; - -def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), - (SMLALBB $Rn, $Rm, $RLo, $RHi)>; -def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), - (SMLALBT $Rn, $Rm, $RLo, $RHi)>; -def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), - (SMLALTB $Rn, $Rm, $RLo, $RHi)>; -def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), - (SMLALTT $Rn, $Rm, $RLo, $RHi)>; - -// Helper class for AI_smld. -class AMulDualIbase - : AI, - Requires<[IsARM, HasV6]> { - bits<4> Rn; - bits<4> Rm; - let Inst{27-23} = 0b01110; - let Inst{22} = long; - let Inst{21-20} = 0b00; - let Inst{11-8} = Rm; - let Inst{7} = 0; - let Inst{6} = sub; - let Inst{5} = swap; - let Inst{4} = 1; - let Inst{3-0} = Rn; -} -class AMulDualI - : AMulDualIbase { - bits<4> Rd; - let Inst{15-12} = 0b1111; - let Inst{19-16} = Rd; -} -class AMulDualIa - : AMulDualIbase { - bits<4> Ra; - bits<4> Rd; - let Inst{19-16} = Rd; - let Inst{15-12} = Ra; -} -class AMulDualI64 - : AMulDualIbase { - bits<4> RdLo; - bits<4> RdHi; - let Inst{19-16} = RdHi; - let Inst{15-12} = RdLo; -} - -multiclass AI_smld { - - def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - - def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; - - def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - NoItinerary, - !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; - - def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - NoItinerary, - !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, - Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; -} - -defm SMLA : AI_smld<0, "smla">; -defm SMLS : AI_smld<1, "smls">; - -def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; -def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; -def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; -def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; -def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; -def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; -def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; -def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), - (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; - -multiclass AI_sdml { - - def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; - def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -} - -defm SMUA : AI_sdml<0, "smua">; -defm SMUS : AI_sdml<1, "smus">; - -def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), - (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; -def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), - (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; -def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), - (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; -def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), - (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; - -//===----------------------------------------------------------------------===// -// Division Instructions (ARMv7-A with virtualization extension) -// -def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, - "sdiv", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>, - Sched<[WriteDIV]>; - -def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, - "udiv", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>, - Sched<[WriteDIV]>; - -//===----------------------------------------------------------------------===// -// Misc. Arithmetic Instructions. -// - -def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iUNAr, "clz", "\t$Rd, $Rm", - [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>, - Sched<[WriteALU]>; - -def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iUNAr, "rbit", "\t$Rd, $Rm", - [(set GPR:$Rd, (bitreverse GPR:$Rm))]>, - Requires<[IsARM, HasV6T2]>, - Sched<[WriteALU]>; - -def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iUNAr, "rev", "\t$Rd, $Rm", - [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>, - Sched<[WriteALU]>; - -let AddedComplexity = 5 in -def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iUNAr, "rev16", "\t$Rd, $Rm", - [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteALU]>; - -def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), - (REV16 (LDRH addrmode3:$addr))>; -def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), - (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; - -let AddedComplexity = 5 in -def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iUNAr, "revsh", "\t$Rd, $Rm", - [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteALU]>; - -def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), - (and (srl GPR:$Rm, (i32 8)), 0xFF)), - (REVSH GPR:$Rm)>; - -def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh), - IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", - [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF), - (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh), - 0xFFFF0000)))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteALUsi, ReadALU]>; - -// Alternate cases for PKHBT where identities eliminate some nodes. -def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)), - (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>; -def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)), - (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>; - -// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and -// will match the pattern below. -def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh), - IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", - [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000), - (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh), - 0xFFFF)))]>, - Requires<[IsARM, HasV6]>, - Sched<[WriteALUsi, ReadALU]>; - -// Alternate cases for PKHTB where identities eliminate some nodes. Note that -// a shift amount of 0 is *not legal* here, it is PKHBT instead. -// We also can not replace a srl (17..31) by an arithmetic shift we would use in -// pkhtb src1, src2, asr (17..31). -def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (srl GPRnopc:$src2, imm16:$sh)), - (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; -def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (sra GPRnopc:$src2, imm16_31:$sh)), - (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; -def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), - (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; - -//===----------------------------------------------------------------------===// -// CRC Instructions -// -// Polynomials: -// + CRC32{B,H,W} 0x04C11DB7 -// + CRC32C{B,H,W} 0x1EDC6F41 -// - -class AI_crc32 sz, string suffix, SDPatternOperator builtin> - : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, - !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", - [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV8, HasCRC]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - - let Inst{31-28} = 0b1110; - let Inst{27-23} = 0b00010; - let Inst{22-21} = sz; - let Inst{20} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-10} = 0b00; - let Inst{9} = C; - let Inst{8} = 0; - let Inst{7-4} = 0b0100; - let Inst{3-0} = Rm; - - let Unpredictable{11-8} = 0b1101; -} - -def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>; -def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>; -def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>; -def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>; -def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; -def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; - -//===----------------------------------------------------------------------===// -// ARMv8.1a Privilege Access Never extension -// -// SETPAN #imm1 - -def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", - "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> { - bits<1> imm; - - let Inst{31-28} = 0b1111; - let Inst{27-20} = 0b00010001; - let Inst{19-16} = 0b0000; - let Inst{15-10} = 0b000000; - let Inst{9} = imm; - let Inst{8} = 0b0; - let Inst{7-4} = 0b0000; - let Inst{3-0} = 0b0000; - - let Unpredictable{19-16} = 0b1111; - let Unpredictable{15-10} = 0b111111; - let Unpredictable{8} = 0b1; - let Unpredictable{3-0} = 0b1111; -} - -//===----------------------------------------------------------------------===// -// Comparison Instructions... -// - -defm CMP : AI1_cmp_irs<0b1010, "cmp", - IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, ARMcmp>; - -// ARMcmpZ can re-use the above instruction definitions. -def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm), - (CMPri GPR:$src, mod_imm:$imm)>; -def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), - (CMPrr GPR:$src, GPR:$rhs)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), - (CMPrsi GPR:$src, so_reg_imm:$rhs)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), - (CMPrsr GPR:$src, so_reg_reg:$rhs)>; - -// CMN register-integer -let isCompare = 1, Defs = [CPSR] in { -def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, - "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, mod_imm:$imm)]>, - Sched<[WriteCMP, ReadALU]> { - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-0} = imm; - - let Unpredictable{15-12} = 0b1111; -} - -// CMN register-register/shift -def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, - "cmn", "\t$Rn, $Rm", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { - bits<4> Rn; - bits<4> Rm; - let isCommutable = 1; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; - - let Unpredictable{15-12} = 0b1111; -} - -def CMNzrsi : AI1<0b1011, (outs), - (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, - "cmn", "\t$Rn, $shift", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, so_reg_imm:$shift)]>, - Sched<[WriteCMPsi, ReadALU]> { - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{15-12} = 0b1111; -} - -def CMNzrsr : AI1<0b1011, (outs), - (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, - "cmn", "\t$Rn, $shift", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, so_reg_reg:$shift)]>, - Sched<[WriteCMPsr, ReadALU]> { - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = 0b0000; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - - let Unpredictable{15-12} = 0b1111; -} - -} - -def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm), - (CMNri GPR:$src, mod_imm_neg:$imm)>; - -def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), - (CMNri GPR:$src, mod_imm_neg:$imm)>; - -// Note that TST/TEQ don't set all the same flags that CMP does! -defm TST : AI1_cmp_irs<0b1000, "tst", - IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1, - "DecodeTSTInstruction">; -defm TEQ : AI1_cmp_irs<0b1001, "teq", - IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; - -// Pseudo i64 compares for some floating point compares. -let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, - Defs = [CPSR] in { -def BCCi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), - IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>, - Sched<[WriteBr]>; - -def BCCZi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>, - Sched<[WriteBr]>; -} // usesCustomInserter - - -// Conditional moves -let hasSideEffects = 0 in { - -let isCommutable = 1, isSelect = 1 in -def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, GPR:$Rm, cmovpred:$p), - 4, IIC_iCMOVr, - [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; - -def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), - 4, IIC_iCMOVsr, - [(set GPR:$Rd, - (ARMcmov GPR:$false, so_reg_imm:$shift, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), - 4, IIC_iCMOVsr, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; - - -let isMoveImm = 1 in -def MOVCCi16 - : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), - 4, IIC_iMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, - Sched<[WriteALU]>; - -let isMoveImm = 1 in -def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, mod_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; - -// Two instruction predicate mov immediate. -let isMoveImm = 1 in -def MOVCCi32imm - : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, cmovpred:$p), - 8, IIC_iCMOVix2, - [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; - -let isMoveImm = 1 in -def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, mod_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm, - cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; - -} // hasSideEffects - - -//===----------------------------------------------------------------------===// -// Atomic operations intrinsics -// - -def MemBarrierOptOperand : AsmOperandClass { - let Name = "MemBarrierOpt"; - let ParserMethod = "parseMemBarrierOptOperand"; -} -def memb_opt : Operand { - let PrintMethod = "printMemBOption"; - let ParserMatchClass = MemBarrierOptOperand; - let DecoderMethod = "DecodeMemBarrierOption"; -} - -def InstSyncBarrierOptOperand : AsmOperandClass { - let Name = "InstSyncBarrierOpt"; - let ParserMethod = "parseInstSyncBarrierOptOperand"; -} -def instsyncb_opt : Operand { - let PrintMethod = "printInstSyncBOption"; - let ParserMatchClass = InstSyncBarrierOptOperand; - let DecoderMethod = "DecodeInstSyncBarrierOption"; -} - -def TraceSyncBarrierOptOperand : AsmOperandClass { - let Name = "TraceSyncBarrierOpt"; - let ParserMethod = "parseTraceSyncBarrierOptOperand"; -} -def tsb_opt : Operand { - let PrintMethod = "printTraceSyncBOption"; - let ParserMatchClass = TraceSyncBarrierOptOperand; -} - -// Memory barriers protect the atomic sequences -let hasSideEffects = 1 in { -def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, - Requires<[IsARM, HasDB]> { - bits<4> opt; - let Inst{31-4} = 0xf57ff05; - let Inst{3-0} = opt; -} - -def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, - Requires<[IsARM, HasDB]> { - bits<4> opt; - let Inst{31-4} = 0xf57ff04; - let Inst{3-0} = opt; -} - -// ISB has only full system option -def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, - "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, - Requires<[IsARM, HasDB]> { - bits<4> opt; - let Inst{31-4} = 0xf57ff06; - let Inst{3-0} = opt; -} - -let hasNoSchedulingInfo = 1 in -def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary, - "tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> { - let Inst{31-0} = 0xe320f012; -} - -} - -let usesCustomInserter = 1, Defs = [CPSR] in { - -// Pseudo instruction that combines movs + predicated rsbmi -// to implement integer ABS - def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -} - -let usesCustomInserter = 1 in { - def COPY_STRUCT_BYVAL_I32 : PseudoInst< - (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), - NoItinerary, - [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; -} - -let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in { - // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs... - // Copies N registers worth of memory from address %src to address %dst - // and returns the incremented addresses. N scratch register will - // be attached for the copy to use. - def MEMCPY : PseudoInst< - (outs GPR:$newdst, GPR:$newsrc), - (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops), - NoItinerary, - [(set GPR:$newdst, GPR:$newsrc, - (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>; -} - -def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -def strex_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_strex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def strex_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_strex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def strex_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_strex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -def ldaex_1 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def ldaex_2 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def ldaex_4 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -def stlex_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_stlex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def stlex_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_stlex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def stlex_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm_stlex node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexb", "\t$Rt, $addr", - [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexh", "\t$Rt, $addr", - [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrex", "\t$Rt, $addr", - [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; -let hasExtraDefRegAllocReq = 1 in -def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), - NoItinerary, "ldrexd", "\t$Rt, $addr", []> { - let DecoderMethod = "DecodeDoubleRegLoad"; -} - -def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexb", "\t$Rt, $addr", - [(set GPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>; -def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexh", "\t$Rt, $addr", - [(set GPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>; -def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaex", "\t$Rt, $addr", - [(set GPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>; -let hasExtraDefRegAllocReq = 1 in -def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), - NoItinerary, "ldaexd", "\t$Rt, $addr", []> { - let DecoderMethod = "DecodeDoubleRegLoad"; -} -} - -let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexb", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_1 GPR:$Rt, - addr_offset_none:$addr))]>; -def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexh", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_2 GPR:$Rt, - addr_offset_none:$addr))]>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strex", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_4 GPR:$Rt, - addr_offset_none:$addr))]>; -let hasExtraSrcRegAllocReq = 1 in -def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPRPairOp:$Rt, addr_offset_none:$addr), - NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { - let DecoderMethod = "DecodeDoubleRegStore"; -} -def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, - (stlex_1 GPR:$Rt, addr_offset_none:$addr))]>; -def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, - (stlex_2 GPR:$Rt, addr_offset_none:$addr))]>; -def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "stlex", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, - (stlex_4 GPR:$Rt, addr_offset_none:$addr))]>; -let hasExtraSrcRegAllocReq = 1 in -def STLEXD : AIstlex<0b01, (outs GPR:$Rd), - (ins GPRPairOp:$Rt, addr_offset_none:$addr), - NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> { - let DecoderMethod = "DecodeDoubleRegStore"; -} -} - -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", - [(int_arm_clrex)]>, - Requires<[IsARM, HasV6K]> { - let Inst{31-0} = 0b11110101011111111111000000011111; -} - -def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), - (STREXB GPR:$Rt, addr_offset_none:$addr)>; -def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), - (STREXH GPR:$Rt, addr_offset_none:$addr)>; - -def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), - (STLEXB GPR:$Rt, addr_offset_none:$addr)>; -def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), - (STLEXH GPR:$Rt, addr_offset_none:$addr)>; - -class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return isAcquireOrStronger(Ordering); -}]>; - -def atomic_load_acquire_8 : acquiring_load; -def atomic_load_acquire_16 : acquiring_load; -def atomic_load_acquire_32 : acquiring_load; - -class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return isReleaseOrStronger(Ordering); -}]>; - -def atomic_store_release_8 : releasing_store; -def atomic_store_release_16 : releasing_store; -def atomic_store_release_32 : releasing_store; - -let AddedComplexity = 8 in { - def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; - def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; - def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; - def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; - def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; - def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; -} - -// SWP/SWPB are deprecated in V6/V7 and optional in v7VE. -// FIXME Use InstAlias to generate LDREX/STREX pairs instead. -let mayLoad = 1, mayStore = 1 in { -def SWP : AIswp<0, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, - Requires<[IsARM,PreV8]>; -def SWPB: AIswp<1, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, - Requires<[IsARM,PreV8]>; -} - -//===----------------------------------------------------------------------===// -// Coprocessor Instructions. -// - -def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, - Requires<[IsARM,PreV8]> { - bits<4> opc1; - bits<4> CRn; - bits<4> CRd; - bits<4> cop; - bits<3> opc2; - bits<4> CRm; - - let Inst{3-0} = CRm; - let Inst{4} = 0; - let Inst{7-5} = opc2; - let Inst{11-8} = cop; - let Inst{15-12} = CRd; - let Inst{19-16} = CRn; - let Inst{23-20} = opc1; - - let DecoderNamespace = "CoProc"; -} - -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, - Requires<[IsARM,PreV8]> { - let Inst{31-28} = 0b1111; - bits<4> opc1; - bits<4> CRn; - bits<4> CRd; - bits<4> cop; - bits<3> opc2; - bits<4> CRm; - - let Inst{3-0} = CRm; - let Inst{4} = 0; - let Inst{7-5} = opc2; - let Inst{11-8} = cop; - let Inst{15-12} = CRd; - let Inst{19-16} = CRn; - let Inst{23-20} = opc1; - - let DecoderNamespace = "CoProc"; -} - -class ACI pattern, IndexMode im = IndexModeNone> - : I { - let Inst{27-25} = 0b110; -} -class ACInoP pattern, IndexMode im = IndexModeNone> - : InoP { - let Inst{31-28} = 0b1111; - let Inst{27-25} = 0b110; -} - -let DecoderNamespace = "CoProc" in { -multiclass LdStCop pattern> { - def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), - asm, "\t$cop, $CRd, $addr", pattern> { - bits<13> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 1; // P = 1 - let Inst{23} = addr{8}; - let Inst{22} = Dbit; - let Inst{21} = 0; // W = 0 - let Inst{20} = load; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = addr{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), - asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { - bits<13> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 1; // P = 1 - let Inst{23} = addr{8}; - let Inst{22} = Dbit; - let Inst{21} = 1; // W = 1 - let Inst{20} = load; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = addr{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, - postidx_imm8s4:$offset), - asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { - bits<9> offset; - bits<4> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 0; // P = 0 - let Inst{23} = offset{8}; - let Inst{22} = Dbit; - let Inst{21} = 1; // W = 1 - let Inst{20} = load; - let Inst{19-16} = addr; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = offset{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _OPTION : ACI<(outs), - (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, - coproc_option_imm:$option), - asm, "\t$cop, $CRd, $addr, $option", []> { - bits<8> option; - bits<4> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 0; // P = 0 - let Inst{23} = 1; // U = 1 - let Inst{22} = Dbit; - let Inst{21} = 0; // W = 0 - let Inst{20} = load; - let Inst{19-16} = addr; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = option; - let DecoderMethod = "DecodeCopMemInstruction"; - } -} -multiclass LdSt2Cop pattern> { - def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), - asm, "\t$cop, $CRd, $addr", pattern> { - bits<13> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 1; // P = 1 - let Inst{23} = addr{8}; - let Inst{22} = Dbit; - let Inst{21} = 0; // W = 0 - let Inst{20} = load; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = addr{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), - asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { - bits<13> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 1; // P = 1 - let Inst{23} = addr{8}; - let Inst{22} = Dbit; - let Inst{21} = 1; // W = 1 - let Inst{20} = load; - let Inst{19-16} = addr{12-9}; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = addr{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, - postidx_imm8s4:$offset), - asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { - bits<9> offset; - bits<4> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 0; // P = 0 - let Inst{23} = offset{8}; - let Inst{22} = Dbit; - let Inst{21} = 1; // W = 1 - let Inst{20} = load; - let Inst{19-16} = addr; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = offset{7-0}; - let DecoderMethod = "DecodeCopMemInstruction"; - } - def _OPTION : ACInoP<(outs), - (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, - coproc_option_imm:$option), - asm, "\t$cop, $CRd, $addr, $option", []> { - bits<8> option; - bits<4> addr; - bits<4> cop; - bits<4> CRd; - let Inst{24} = 0; // P = 0 - let Inst{23} = 1; // U = 1 - let Inst{22} = Dbit; - let Inst{21} = 0; // W = 0 - let Inst{20} = load; - let Inst{19-16} = addr; - let Inst{15-12} = CRd; - let Inst{11-8} = cop; - let Inst{7-0} = option; - let DecoderMethod = "DecodeCopMemInstruction"; - } -} - -defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; - -defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; - -} // DecoderNamespace = "CoProc" - -//===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register. -// - -class MovRCopro pattern> - : ABI<0b1110, oops, iops, NoItinerary, opc, - "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> { - let Inst{20} = direction; - let Inst{4} = 1; - - bits<4> Rt; - bits<4> cop; - bits<3> opc1; - bits<3> opc2; - bits<4> CRm; - bits<4> CRn; - - let Inst{15-12} = Rt; - let Inst{11-8} = cop; - let Inst{23-21} = opc1; - let Inst{7-5} = opc2; - let Inst{3-0} = CRm; - let Inst{19-16} = CRn; - - let DecoderNamespace = "CoProc"; -} - -def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, - (outs), - (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, - ComplexDeprecationPredicate<"MCR">; -def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0, pred:$p)>; -def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPRwithAPSR:$Rt), - (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, - imm0_7:$opc2), []>; -def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0, pred:$p)>; - -def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; - -class MovRCopro2 pattern> - : ABXI<0b1110, oops, iops, NoItinerary, - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-24} = 0b11111110; - let Inst{20} = direction; - let Inst{4} = 1; - - bits<4> Rt; - bits<4> cop; - bits<3> opc1; - bits<3> opc2; - bits<4> CRm; - bits<4> CRn; - - let Inst{15-12} = Rt; - let Inst{11-8} = cop; - let Inst{23-21} = opc1; - let Inst{7-5} = opc2; - let Inst{3-0} = CRm; - let Inst{19-16} = CRn; - - let DecoderNamespace = "CoProc"; -} - -def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, - (outs), - (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, - Requires<[IsARM,PreV8]>; -def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", - (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; -def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPRwithAPSR:$Rt), - (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, - imm0_7:$opc2), []>, - Requires<[IsARM,PreV8]>; -def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; - -def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, - imm:$CRm, imm:$opc2), - (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; - -class MovRRCopro - pattern = []> - : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", - pattern> { - - let Inst{23-21} = 0b010; - let Inst{20} = direction; - - bits<4> Rt; - bits<4> Rt2; - bits<4> cop; - bits<4> opc1; - bits<4> CRm; - - let Inst{15-12} = Rt; - let Inst{19-16} = Rt2; - let Inst{11-8} = cop; - let Inst{7-4} = opc1; - let Inst{3-0} = CRm; -} - -def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, imm:$CRm)]>; -def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, - (outs GPRnopc:$Rt, GPRnopc:$Rt2), - (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; - -class MovRRCopro2 pattern = []> - : ABXI<0b1100, oops, iops, NoItinerary, - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, - Requires<[IsARM,PreV8]> { - let Inst{31-28} = 0b1111; - let Inst{23-21} = 0b010; - let Inst{20} = direction; - - bits<4> Rt; - bits<4> Rt2; - bits<4> cop; - bits<4> opc1; - bits<4> CRm; - - let Inst{15-12} = Rt; - let Inst{19-16} = Rt2; - let Inst{11-8} = cop; - let Inst{7-4} = opc1; - let Inst{3-0} = CRm; - - let DecoderMethod = "DecoderForMRRC2AndMCRR2"; -} - -def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, - (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, c_imm:$CRm), - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, imm:$CRm)]>; - -def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, - (outs GPRnopc:$Rt, GPRnopc:$Rt2), - (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; - -//===----------------------------------------------------------------------===// -// Move between special register and ARM core register -// - -// Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, - "mrs", "\t$Rd, apsr", []> { - bits<4> Rd; - let Inst{23-16} = 0b00001111; - let Unpredictable{19-17} = 0b111; - - let Inst{15-12} = Rd; - - let Inst{11-0} = 0b000000000000; - let Unpredictable{11-0} = 0b110100001111; -} - -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p), 0>, - Requires<[IsARM]>; - -// The MRSsys instruction is the MRS instruction from the ARM ARM, -// section B9.3.9, with the R bit set to 1. -def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, - "mrs", "\t$Rd, spsr", []> { - bits<4> Rd; - let Inst{23-16} = 0b01001111; - let Unpredictable{19-16} = 0b1111; - - let Inst{15-12} = Rd; - - let Inst{11-0} = 0b000000000000; - let Unpredictable{11-0} = 0b110100001111; -} - -// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a -// separate encoding (distinguished by bit 5. -def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), - NoItinerary, "mrs", "\t$Rd, $banked", []>, - Requires<[IsARM, HasVirtualization]> { - bits<6> banked; - bits<4> Rd; - - let Inst{23} = 0; - let Inst{22} = banked{5}; // R bit - let Inst{21-20} = 0b00; - let Inst{19-16} = banked{3-0}; - let Inst{15-12} = Rd; - let Inst{11-9} = 0b001; - let Inst{8} = banked{4}; - let Inst{7-0} = 0b00000000; -} - -// Move from ARM core register to Special Register -// -// No need to have both system and application versions of MSR (immediate) or -// MSR (register), the encodings are the same and the assembly parser has no way -// to distinguish between them. The mask operand contains the special register -// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be -// accessed in the special register. -let Defs = [CPSR] in -def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", []> { - bits<5> mask; - bits<4> Rn; - - let Inst{23} = 0; - let Inst{22} = mask{4}; // R bit - let Inst{21-20} = 0b10; - let Inst{19-16} = mask{3-0}; - let Inst{15-12} = 0b1111; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rn; -} - -let Defs = [CPSR] in -def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary, - "msr", "\t$mask, $imm", []> { - bits<5> mask; - bits<12> imm; - - let Inst{23} = 0; - let Inst{22} = mask{4}; // R bit - let Inst{21-20} = 0b10; - let Inst{19-16} = mask{3-0}; - let Inst{15-12} = 0b1111; - let Inst{11-0} = imm; -} - -// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a -// separate encoding (distinguished by bit 5. -def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), - NoItinerary, "msr", "\t$banked, $Rn", []>, - Requires<[IsARM, HasVirtualization]> { - bits<6> banked; - bits<4> Rn; - - let Inst{23} = 0; - let Inst{22} = banked{5}; // R bit - let Inst{21-20} = 0b10; - let Inst{19-16} = banked{3-0}; - let Inst{15-12} = 0b1111; - let Inst{11-9} = 0b001; - let Inst{8} = banked{4}; - let Inst{7-4} = 0b0000; - let Inst{3-0} = Rn; -} - -// Dynamic stack allocation yields a _chkstk for Windows targets. These calls -// are needed to probe the stack when allocating more than -// 4k bytes in one go. Touching the stack at 4K increments is necessary to -// ensure that the guard pages used by the OS virtual memory manager are -// allocated in correct sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, - [SDNPHasChain, SDNPSideEffect]>; -let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in - def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; - -def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, - [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; -let usesCustomInserter = 1, Defs = [CPSR] in - def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, - [(win__dbzchk tGPR:$divisor)]>; - -//===----------------------------------------------------------------------===// -// TLS Instructions -// - -// __aeabi_read_tp preserves the registers r1-r3. -// This is a pseudo inst so that we can get the encoding right, -// complete with fixup for the aeabi_read_tp function. -// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern -// is defined in "ARMInstrThumb.td". -let isCall = 1, - Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br, - [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>, - Requires<[IsARM, IsReadTPSoft]>; -} - -// Reading thread pointer from coprocessor register -def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>, - Requires<[IsARM, IsReadTPHard]>; - -//===----------------------------------------------------------------------===// -// SJLJ Exception handling intrinsics -// eh_sjlj_setjmp() is an instruction sequence to store the return -// address and save #0 in R0 for the non-longjmp case. -// Since by its nature we may be coming from some other function to get -// here, and we're using the stack frame for the containing function to -// save/restore registers, we can't keep anything live in regs across -// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everything out of registers -// except for our own input by listing the relevant registers in Defs. By -// doing so, we also cause the prologue/epilogue code to actively preserve -// all of the callee-saved resgisters, which is exactly what we want. -// A constant value is passed in $val, and we use the location as a scratch. -// -// These are pseudo-instructions and are lowered to individual MC-insts, so -// no encoding information is necessary. -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], - hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { - def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, HasVFP2]>; -} - -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { - def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, NoVFP]>; -} - -// FIXME: Non-IOS version(s) -let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, - Defs = [ R7, LR, SP ] in { -def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), - NoItinerary, - [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM]>; -} - -let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in -def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary, - [(ARMeh_sjlj_setup_dispatch)]>; - -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for both ARM and Thumb. Any differences are handled when -// the pseudo is expanded (which happens before any passes that need the -// instruction size). -let isBarrier = 1 in -def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; - - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -// - -// ARMv4 indirect branch using (MOVr PC, dst) -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in - def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), - 4, IIC_Br, [(brind GPR:$dst)], - (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, - Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in - def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst), - 4, IIC_Br, [], - (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, - Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; - -// Large immediate handling. - -// 32-bit immediate using two piece mod_imms or movw + movt. -// This is a single pseudo instruction, the benefit is that it can be remat'd -// as a single unit instead of having to handle reg inputs. -// FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1, isMoveImm = 1 in -def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, - [(set GPR:$dst, (arm_i32imm:$src))]>, - Requires<[IsARM]>; - -def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i, - [(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>, - Requires<[IsARM, DontUseMovt]>; - -// Pseudo instruction that combines movw + movt + add pc (if PIC). -// It also makes it possible to rematerialize the instructions. -// FIXME: Remove this when we can do generalized remat and when machine licm -// can properly the instructions. -let isReMaterializable = 1 in { -def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2addpc, - [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovtInPic]>; - -def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iLoadiALU, - [(set GPR:$dst, - (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, DontUseMovtInPic]>; - -let AddedComplexity = 10 in -def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - NoItinerary, - [(set GPR:$dst, - (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, DontUseMovtInPic]>; - -let AddedComplexity = 10 in -def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2ld, - [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, UseMovtInPic]>; -} // isReMaterializable - -// The many different faces of TLS access. -def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst), - (MOVi32imm tglobaltlsaddr :$dst)>, - Requires<[IsARM, UseMovt]>; - -def : Pat<(ARMWrapper tglobaltlsaddr:$src), - (LDRLIT_ga_abs tglobaltlsaddr:$src)>, - Requires<[IsARM, DontUseMovt]>; - -def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), - (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovtInPic]>; - -def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), - (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, - Requires<[IsARM, DontUseMovtInPic]>; -let AddedComplexity = 10 in -def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), - (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>, - Requires<[IsARM, UseMovtInPic]>; - - -// ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, - Requires<[IsARM, UseMovt]>; -def : ARMPat<(ARMWrapper texternalsym :$dst), (MOVi32imm texternalsym :$dst)>, - Requires<[IsARM, UseMovt]>; -def : ARMPat<(ARMWrapperJT tjumptable:$dst), - (LEApcrelJT tjumptable:$dst)>; - -// TODO: add,sub,and, 3-instr forms? - -// Tail calls. These patterns also apply to Thumb mode. -def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; -def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; -def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; - -// Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; -def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCB_CALL texternalsym:$func)>; - -// zextload i1 -> zextload i8 -def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -// extload -> zextload -def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; -def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; - -def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; -def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; - -// smul* and smla* -def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; - -def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), - (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), - (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), - (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), - (SMULTT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), - (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), - (SMULWT GPR:$a, GPR:$b)>; - -def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), - (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), - (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; - -// Pre-v7 uses MCR for synchronization barriers. -def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, - Requires<[IsARM, HasV6]>; - -// SXT/UXT with no rotate -let AddedComplexity = 16 in { -def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>; -def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>; -def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>; -def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)), - (UXTAB GPR:$Rn, GPR:$Rm, 0)>; -def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)), - (UXTAH GPR:$Rn, GPR:$Rm, 0)>; -} - -def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>; -def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>; - -def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)), - (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>; -def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)), - (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>; - -// Atomic load/store patterns -def : ARMPat<(atomic_load_8 ldst_so_reg:$src), - (LDRBrs ldst_so_reg:$src)>; -def : ARMPat<(atomic_load_8 addrmode_imm12:$src), - (LDRBi12 addrmode_imm12:$src)>; -def : ARMPat<(atomic_load_16 addrmode3:$src), - (LDRH addrmode3:$src)>; -def : ARMPat<(atomic_load_32 ldst_so_reg:$src), - (LDRrs ldst_so_reg:$src)>; -def : ARMPat<(atomic_load_32 addrmode_imm12:$src), - (LDRi12 addrmode_imm12:$src)>; -def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val), - (STRBrs GPR:$val, ldst_so_reg:$ptr)>; -def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val), - (STRBi12 GPR:$val, addrmode_imm12:$ptr)>; -def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val), - (STRH GPR:$val, addrmode3:$ptr)>; -def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val), - (STRrs GPR:$val, ldst_so_reg:$ptr)>; -def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val), - (STRi12 GPR:$val, addrmode_imm12:$ptr)>; - - -//===----------------------------------------------------------------------===// -// Thumb Support -// - -include "ARMInstrThumb.td" - -//===----------------------------------------------------------------------===// -// Thumb2 Support -// - -include "ARMInstrThumb2.td" - -//===----------------------------------------------------------------------===// -// Floating Point Support -// - -include "ARMInstrVFP.td" - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// - -include "ARMInstrNEON.td" - -//===----------------------------------------------------------------------===// -// Assembler aliases -// - -// Memory barriers -def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>; -def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>; -def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>; -// Armv8-R 'Data Full Barrier' -def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>; - -// System instructions -def : MnemonicAlias<"swi", "svc">; - -// Load / Store Multiple -def : MnemonicAlias<"ldmfd", "ldm">; -def : MnemonicAlias<"ldmia", "ldm">; -def : MnemonicAlias<"ldmea", "ldmdb">; -def : MnemonicAlias<"stmfd", "stmdb">; -def : MnemonicAlias<"stmia", "stm">; -def : MnemonicAlias<"stmea", "stm">; - -// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the -// input operands swapped when the shift amount is zero (i.e., unspecified). -def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", - (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p), 0>, - Requires<[IsARM, HasV6]>; -def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", - (PKHBT GPRnopc:$Rd, GPRnopc:$Rm, GPRnopc:$Rn, 0, pred:$p), 0>, - Requires<[IsARM, HasV6]>; - -// PUSH/POP aliases for STM/LDM -def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>; -def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>; - -// SSAT/USAT optional shift operand. -def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn", - (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; -def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn", - (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; - - -// Extend instruction optional rotate operand. -def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm", - (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm", - (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm", - (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"sxtb${p} $Rd, $Rm", - (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"sxtb16${p} $Rd, $Rm", - (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"sxth${p} $Rd, $Rm", - (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; - -def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm", - (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm", - (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm", - (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"uxtb${p} $Rd, $Rm", - (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"uxtb16${p} $Rd, $Rm", - (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; -def : ARMInstAlias<"uxth${p} $Rd, $Rm", - (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; - - -// RFE aliases -def : MnemonicAlias<"rfefa", "rfeda">; -def : MnemonicAlias<"rfeea", "rfedb">; -def : MnemonicAlias<"rfefd", "rfeia">; -def : MnemonicAlias<"rfeed", "rfeib">; -def : MnemonicAlias<"rfe", "rfeia">; - -// SRS aliases -def : MnemonicAlias<"srsfa", "srsib">; -def : MnemonicAlias<"srsea", "srsia">; -def : MnemonicAlias<"srsfd", "srsdb">; -def : MnemonicAlias<"srsed", "srsda">; -def : MnemonicAlias<"srs", "srsia">; - -// QSAX == QSUBADDX -def : MnemonicAlias<"qsubaddx", "qsax">; -// SASX == SADDSUBX -def : MnemonicAlias<"saddsubx", "sasx">; -// SHASX == SHADDSUBX -def : MnemonicAlias<"shaddsubx", "shasx">; -// SHSAX == SHSUBADDX -def : MnemonicAlias<"shsubaddx", "shsax">; -// SSAX == SSUBADDX -def : MnemonicAlias<"ssubaddx", "ssax">; -// UASX == UADDSUBX -def : MnemonicAlias<"uaddsubx", "uasx">; -// UHASX == UHADDSUBX -def : MnemonicAlias<"uhaddsubx", "uhasx">; -// UHSAX == UHSUBADDX -def : MnemonicAlias<"uhsubaddx", "uhsax">; -// UQASX == UQADDSUBX -def : MnemonicAlias<"uqaddsubx", "uqasx">; -// UQSAX == UQSUBADDX -def : MnemonicAlias<"uqsubaddx", "uqsax">; -// USAX == USUBADDX -def : MnemonicAlias<"usubaddx", "usax">; - -// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like -// for isel. -def : ARMInstSubst<"mov${s}${p} $Rd, $imm", - (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"mvn${s}${p} $Rd, $imm", - (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -// Same for AND <--> BIC -def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, - pred:$p, cc_out:$s)>; -def : ARMInstSubst<"bic${s}${p} $Rdn, $imm", - (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, - pred:$p, cc_out:$s)>; -def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm", - (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, - pred:$p, cc_out:$s)>; -def : ARMInstSubst<"and${s}${p} $Rdn, $imm", - (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, - pred:$p, cc_out:$s)>; - -// Likewise, "add Rd, mod_imm_neg" -> sub -def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm", - (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"add${s}${p} $Rd, $imm", - (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; -// Likewise, "sub Rd, mod_imm_neg" -> add -def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm", - (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"sub${s}${p} $Rd, $imm", - (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; - - -def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm", - (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"adc${s}${p} $Rdn, $imm", - (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm", - (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm", - (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; - -// Same for CMP <--> CMN via mod_imm_neg -def : ARMInstSubst<"cmp${p} $Rd, $imm", - (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; -def : ARMInstSubst<"cmn${p} $Rd, $imm", - (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; - -// The shifter forms of the MOV instruction are aliased to the ASR, LSL, -// LSR, ROR, and RRX instructions. -// FIXME: We need C++ parser hooks to map the alias to the MOV -// encoding. It seems we should be able to do that sort of thing -// in tblgen, but it could get ugly. -let TwoOperandAliasConstraint = "$Rm = $Rd" in { -def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", - (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, - cc_out:$s)>; -def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm", - (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, - cc_out:$s)>; -def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", - (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, - cc_out:$s)>; -def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", - (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, - cc_out:$s)>; -} -def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", - (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>; -let TwoOperandAliasConstraint = "$Rn = $Rd" in { -def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -} - -// "neg" is and alias for "rsb rd, rn, #0" -def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", - (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; - -// Pre-v6, 'mov r0, r0' was used as a NOP encoding. -def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, - Requires<[IsARM, NoV6]>; - -// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but -// the instruction definitions need difference constraints pre-v6. -// Use these aliases for the assembly parsing on pre-v6. -def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm", - (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; -def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra", - (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, - pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; -def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; -def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; -def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; -def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, - Requires<[IsARM, NoV6]>; - -// 'it' blocks in ARM mode just validate the predicates. The IT itself -// is discarded. -def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, - ComplexDeprecationPredicate<"IT">; - -let mayLoad = 1, mayStore =1, hasSideEffects = 1 in -def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), - NoItinerary, - [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; - -//===---------------------------------- -// Atomic cmpxchg for -O0 -//===---------------------------------- - -// The fast register allocator used during -O0 inserts spills to cover any VRegs -// live across basic block boundaries. When this happens between an LDXR and an -// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to -// fail. - -// Unfortunately, this means we have to have an alternative (expanded -// post-regalloc) path for -O0 compilations. Fortunately this path can be -// significantly more naive than the standard expansion: we conservatively -// assume seq_cst, strong cmpxchg and omit clrex on failure. - -let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", - mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), - (ins GPR:$addr, GPR:$desired, GPR:$new), - NoItinerary, []>, Sched<[]>; - -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), - (ins GPR:$addr, GPR:$desired, GPR:$new), - NoItinerary, []>, Sched<[]>; - -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), - (ins GPR:$addr, GPR:$desired, GPR:$new), - NoItinerary, []>, Sched<[]>; - -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), - (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), - NoItinerary, []>, Sched<[]>; -} - -def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, - [(atomic_fence imm:$ordering, 0)]> { - let hasSideEffects = 1; - let Size = 0; - let AsmString = "@ COMPILER BARRIER"; -} diff --git a/suite/synctools/tablegen/ARM/ARMInstrNEON.td b/suite/synctools/tablegen/ARM/ARMInstrNEON.td deleted file mode 100644 index 4525eec8da..0000000000 --- a/suite/synctools/tablegen/ARM/ARMInstrNEON.td +++ /dev/null @@ -1,8545 +0,0 @@ -//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the ARM NEON instruction set. -// -//===----------------------------------------------------------------------===// - - -//===----------------------------------------------------------------------===// -// NEON-specific Operands. -//===----------------------------------------------------------------------===// -def nModImm : Operand { - let PrintMethod = "printNEONModImmOperand"; -} - -def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } -def nImmSplatI8 : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmSplatI8AsmOperand; -} -def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } -def nImmSplatI16 : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmSplatI16AsmOperand; -} -def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } -def nImmSplatI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmSplatI32AsmOperand; -} -def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } -def nImmSplatNotI16 : Operand { - let ParserMatchClass = nImmSplatNotI16AsmOperand; -} -def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } -def nImmSplatNotI32 : Operand { - let ParserMatchClass = nImmSplatNotI32AsmOperand; -} -def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } -def nImmVMOVI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmVMOVI32AsmOperand; -} - -class nImmVMOVIAsmOperandReplicate - : AsmOperandClass { - let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; - let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; - let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; -} - -class nImmVINVIAsmOperandReplicate - : AsmOperandClass { - let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; - let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; - let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; -} - -class nImmVMOVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmVMOVIAsmOperandReplicate; -} - -class nImmVINVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmVINVIAsmOperandReplicate; -} - -def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } -def nImmVMOVI32Neg : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmVMOVI32NegAsmOperand; -} -def nImmVMOVF32 : Operand { - let PrintMethod = "printFPImmOperand"; - let ParserMatchClass = FPImmOperand; -} -def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } -def nImmSplatI64 : Operand { - let PrintMethod = "printNEONModImmOperand"; - let ParserMatchClass = nImmSplatI64AsmOperand; -} - -def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } -def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } -def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } -def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } -def VectorIndex8 : Operand, ImmLeaf { - let ParserMatchClass = VectorIndex8Operand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i32imm); -} -def VectorIndex16 : Operand, ImmLeaf { - let ParserMatchClass = VectorIndex16Operand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i32imm); -} -def VectorIndex32 : Operand, ImmLeaf { - let ParserMatchClass = VectorIndex32Operand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i32imm); -} -def VectorIndex64 : Operand, ImmLeaf { - let ParserMatchClass = VectorIndex64Operand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i32imm); -} - -// Register list of one D register. -def VecListOneDAsmOperand : AsmOperandClass { - let Name = "VecListOneD"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListOneD : RegisterOperand { - let ParserMatchClass = VecListOneDAsmOperand; -} -// Register list of two sequential D registers. -def VecListDPairAsmOperand : AsmOperandClass { - let Name = "VecListDPair"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListDPair : RegisterOperand { - let ParserMatchClass = VecListDPairAsmOperand; -} -// Register list of three sequential D registers. -def VecListThreeDAsmOperand : AsmOperandClass { - let Name = "VecListThreeD"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListThreeD : RegisterOperand { - let ParserMatchClass = VecListThreeDAsmOperand; -} -// Register list of four sequential D registers. -def VecListFourDAsmOperand : AsmOperandClass { - let Name = "VecListFourD"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListFourD : RegisterOperand { - let ParserMatchClass = VecListFourDAsmOperand; -} -// Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListDPairSpacedAsmOperand : AsmOperandClass { - let Name = "VecListDPairSpaced"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListDPairSpaced : RegisterOperand { - let ParserMatchClass = VecListDPairSpacedAsmOperand; -} -// Register list of three D registers spaced by 2 (three Q registers). -def VecListThreeQAsmOperand : AsmOperandClass { - let Name = "VecListThreeQ"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListThreeQ : RegisterOperand { - let ParserMatchClass = VecListThreeQAsmOperand; -} -// Register list of three D registers spaced by 2 (three Q registers). -def VecListFourQAsmOperand : AsmOperandClass { - let Name = "VecListFourQ"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListFourQ : RegisterOperand { - let ParserMatchClass = VecListFourQAsmOperand; -} - -// Register list of one D register, with "all lanes" subscripting. -def VecListOneDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListOneDAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListOneDAllLanes : RegisterOperand { - let ParserMatchClass = VecListOneDAllLanesAsmOperand; -} -// Register list of two D registers, with "all lanes" subscripting. -def VecListDPairAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListDPairAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListDPairAllLanes : RegisterOperand { - let ParserMatchClass = VecListDPairAllLanesAsmOperand; -} -// Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListDPairSpacedAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListDPairSpacedAllLanes : RegisterOperand { - let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; -} -// Register list of three D registers, with "all lanes" subscripting. -def VecListThreeDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListThreeDAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListThreeDAllLanes : RegisterOperand { - let ParserMatchClass = VecListThreeDAllLanesAsmOperand; -} -// Register list of three D registers spaced by 2 (three sequential Q regs). -def VecListThreeQAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListThreeQAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListThreeQAllLanes : RegisterOperand { - let ParserMatchClass = VecListThreeQAllLanesAsmOperand; -} -// Register list of four D registers, with "all lanes" subscripting. -def VecListFourDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListFourDAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListFourDAllLanes : RegisterOperand { - let ParserMatchClass = VecListFourDAllLanesAsmOperand; -} -// Register list of four D registers spaced by 2 (four sequential Q regs). -def VecListFourQAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListFourQAllLanes"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListOperands"; -} -def VecListFourQAllLanes : RegisterOperand { - let ParserMatchClass = VecListFourQAllLanesAsmOperand; -} - - -// Register list of one D register, with byte lane subscripting. -def VecListOneDByteIndexAsmOperand : AsmOperandClass { - let Name = "VecListOneDByteIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListOneDByteIndexed : Operand { - let ParserMatchClass = VecListOneDByteIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with half-word lane subscripting. -def VecListOneDHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListOneDHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListOneDHWordIndexed : Operand { - let ParserMatchClass = VecListOneDHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListOneDWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListOneDWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListOneDWordIndexed : Operand { - let ParserMatchClass = VecListOneDWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} - -// Register list of two D registers with byte lane subscripting. -def VecListTwoDByteIndexAsmOperand : AsmOperandClass { - let Name = "VecListTwoDByteIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListTwoDByteIndexed : Operand { - let ParserMatchClass = VecListTwoDByteIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with half-word lane subscripting. -def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListTwoDHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListTwoDHWordIndexed : Operand { - let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListTwoDWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListTwoDWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListTwoDWordIndexed : Operand { - let ParserMatchClass = VecListTwoDWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// Register list of two Q registers with half-word lane subscripting. -def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListTwoQHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListTwoQHWordIndexed : Operand { - let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListTwoQWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListTwoQWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListTwoQWordIndexed : Operand { - let ParserMatchClass = VecListTwoQWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} - - -// Register list of three D registers with byte lane subscripting. -def VecListThreeDByteIndexAsmOperand : AsmOperandClass { - let Name = "VecListThreeDByteIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListThreeDByteIndexed : Operand { - let ParserMatchClass = VecListThreeDByteIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with half-word lane subscripting. -def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListThreeDHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListThreeDHWordIndexed : Operand { - let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListThreeDWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListThreeDWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListThreeDWordIndexed : Operand { - let ParserMatchClass = VecListThreeDWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// Register list of three Q registers with half-word lane subscripting. -def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListThreeQHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListThreeQHWordIndexed : Operand { - let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListThreeQWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListThreeQWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListThreeQWordIndexed : Operand { - let ParserMatchClass = VecListThreeQWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} - -// Register list of four D registers with byte lane subscripting. -def VecListFourDByteIndexAsmOperand : AsmOperandClass { - let Name = "VecListFourDByteIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListFourDByteIndexed : Operand { - let ParserMatchClass = VecListFourDByteIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with half-word lane subscripting. -def VecListFourDHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListFourDHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListFourDHWordIndexed : Operand { - let ParserMatchClass = VecListFourDHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListFourDWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListFourDWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListFourDWordIndexed : Operand { - let ParserMatchClass = VecListFourDWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// Register list of four Q registers with half-word lane subscripting. -def VecListFourQHWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListFourQHWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListFourQHWordIndexed : Operand { - let ParserMatchClass = VecListFourQHWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} -// ...with word lane subscripting. -def VecListFourQWordIndexAsmOperand : AsmOperandClass { - let Name = "VecListFourQWordIndexed"; - let ParserMethod = "parseVectorList"; - let RenderMethod = "addVecListIndexedOperands"; -} -def VecListFourQWordIndexed : Operand { - let ParserMatchClass = VecListFourQWordIndexAsmOperand; - let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); -} - -def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 8; -}]>; -def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() >= 8; -}]>; -def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() == 4; -}]>; -def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() == 4; -}]>; -def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() == 2; -}]>; -def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() == 2; -}]>; -def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() == 1; -}]>; -def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() == 1; -}]>; -def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() < 4; -}]>; -def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() < 4; -}]>; - -//===----------------------------------------------------------------------===// -// NEON-specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; -def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; - -// Types for vector shift by immediates. The "SHX" version is for long and -// narrow operations where the source and destination vectors have different -// types. The "SHINS" version is for shift and insert operations. -def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i32>]>; -def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; - -def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; -def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; -def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; -def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; - -def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; -def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; -def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; - -def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; -def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; -def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; -def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; -def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; -def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; - -def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; -def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; -def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; - -def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; -def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; - -def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, - SDTCisVT<2, i32>]>; -def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; -def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; - -def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; -def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; -def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; - -def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; -def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; - -def NEONvbsl : SDNode<"ARMISD::VBSL", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>>; - -def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; - -// VDUPLANE can produce a quad-register result from a double-register source, -// so the result is not constrained to match the source. -def NEONvduplane : SDNode<"ARMISD::VDUPLANE", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisVT<2, i32>]>>; - -def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; -def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; - -def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; -def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; -def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; -def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; - -def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; -def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; -def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; -def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; - -def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisSameAs<1, 2>]>; -def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; -def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; - -def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, - SDTCisVT<2, v8i8>]>; -def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, - SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; -def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; -def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; - - -def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ - ConstantSDNode *ConstVal = cast(N->getOperand(0)); - unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); - return (EltBits == 32 && EltVal == 0); -}]>; - -def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ - ConstantSDNode *ConstVal = cast(N->getOperand(0)); - unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); - return (EltBits == 8 && EltVal == 0xff); -}]>; - -//===----------------------------------------------------------------------===// -// NEON load / store instructions -//===----------------------------------------------------------------------===// - -// Use VLDM to load a Q register as a D register pair. -// This is a pseudo instruction that is expanded to VLDMD after reg alloc. -def VLDMQIA - : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), - IIC_fpLoad_m, "", - [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; - -// Use VSTM to store a Q register as a D register pair. -// This is a pseudo instruction that is expanded to VSTMD after reg alloc. -def VSTMQIA - : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), - IIC_fpStore_m, "", - [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; - -// Classes for VLD* pseudo-instructions with multi-register operands. -// These are expanded to real instructions after register allocation. -class VLDQPseudo - : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; -class VLDQWBPseudo - : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), itin, - "$addr.addr = $wb">; -class VLDQWBfixedPseudo - : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr), itin, - "$addr.addr = $wb">; -class VLDQWBregisterPseudo - : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, rGPR:$offset), itin, - "$addr.addr = $wb">; - -class VLDQQPseudo - : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; -class VLDQQWBPseudo - : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), itin, - "$addr.addr = $wb">; -class VLDQQWBfixedPseudo - : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), - (ins addrmode6:$addr), itin, - "$addr.addr = $wb">; -class VLDQQWBregisterPseudo - : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, rGPR:$offset), itin, - "$addr.addr = $wb">; - - -class VLDQQQQPseudo - : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, - "$src = $dst">; -class VLDQQQQWBPseudo - : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, - "$addr.addr = $wb, $src = $dst">; - -let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { - -// VLD1 : Vector Load (multiple single elements) -class VLD1D op7_4, string Dt, Operand AddrMode> - : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), - (ins AddrMode:$Rn), IIC_VLD1, - "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -class VLD1Q op7_4, string Dt, Operand AddrMode> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), - (ins AddrMode:$Rn), IIC_VLD1x2, - "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} - -def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; -def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; -def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; -def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; - -def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; -def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; -def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; -def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; - -// ...with address register writeback: -multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1u, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} -multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; -defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; -defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; -defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; -defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; -defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; -defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; -defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; - -// ...with 3 registers -class VLD1D3 op7_4, string Dt, Operand AddrMode> - : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), - (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, - "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; -def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; -def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; -def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; - -defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; -defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; -defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; -defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; - -def VLD1d8TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD1d16TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD1d32TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD1d64TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD3]>; -def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD3]>; - -def VLD1q8HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD1q16HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD1q32HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD1q64HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; - -// ...with 4 registers -class VLD1D4 op7_4, string Dt, Operand AddrMode> - : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), - (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, - "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; -def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; -def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; - -defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; -defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; -defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; -defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; - -def VLD1d8QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD1d16QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD1d32QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD1d64QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; -def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; - -def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD1q8HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD1q16HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD1q32HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD1q64HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; - -// VLD2 : Vector Load (multiple 2-element structures) -class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin, Operand AddrMode> - : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins AddrMode:$Rn), itin, - "vld2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; -} - -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; - -def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; - -def VLD2q8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD2q16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD2q32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; - -// ...with address register writeback: -multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { - def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins AddrMode:$Rn), itin, - "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } - def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), itin, - "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } -} - -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; - -defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; -defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; -defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>, Sched<[WriteVLD4]>; - -def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; -def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; -def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; -def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; -def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; -def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; - -// ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>, Sched<[WriteVLD2]>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>, Sched<[WriteVLD2]>; - -// VLD3 : Vector Load (multiple 3-element structures) -class VLD3D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$Rn), IIC_VLD3, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST3Instruction"; -} - -def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; -def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; -def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; - -def VLD3d8Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD3d16Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; -def VLD3d32Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; - -// ...with address register writeback: -class VLD3DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST3Instruction"; -} - -def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; -def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; -def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; - -def VLD3d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; - -// ...with double-spaced registers: -def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; -def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; -def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; -def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; -def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; -def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; - -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; - -// ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD3q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD3q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; - -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; - -// VLD4 : Vector Load (multiple 4-element structures) -class VLD4D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$Rn), IIC_VLD4, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, - Sched<[WriteVLD4]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST4Instruction"; -} - -def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; -def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; -def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; - -def VLD4d8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD4d16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; -def VLD4d32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; - -// ...with address register writeback: -class VLD4DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST4Instruction"; -} - -def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; -def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; -def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; - -def VLD4d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; - -// ...with double-spaced registers: -def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; -def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; -def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; -def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; -def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; -def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; - -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; - -// ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD4q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD4q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; - -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; - -} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 - -// Classes for VLD*LN pseudo-instructions with multi-register operands. -// These are expanded to real instructions after register allocation. -class VLDQLNPseudo - : PseudoNLdSt<(outs QPR:$dst), - (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), - itin, "$src = $dst">; -class VLDQLNWBPseudo - : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; -class VLDQQLNPseudo - : PseudoNLdSt<(outs QQPR:$dst), - (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), - itin, "$src = $dst">; -class VLDQQLNWBPseudo - : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; -class VLDQQQQLNPseudo - : PseudoNLdSt<(outs QQQQPR:$dst), - (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), - itin, "$src = $dst">; -class VLDQQQQLNWBPseudo - : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; - -// VLD1LN : Vector Load (single element to one lane) -class VLD1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag LoadOp> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), - (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), - IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", - "$src = $Vd", - [(set DPR:$Vd, (vector_insert (Ty DPR:$src), - (i32 (LoadOp addrmode6:$Rn)), - imm:$lane))]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVLD1LN"; -} -class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag LoadOp> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), - (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), - IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", - "$src = $Vd", - [(set DPR:$Vd, (vector_insert (Ty DPR:$src), - (i32 (LoadOp addrmode6oneL32:$Rn)), - imm:$lane))]>, Sched<[WriteVLD1]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVLD1LN"; -} -class VLD1QLNPseudo : VLDQLNPseudo, - Sched<[WriteVLD1]> { - let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), - (i32 (LoadOp addrmode6:$addr)), - imm:$lane))]; -} - -def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { - let Inst{7-5} = lane{2-0}; -} -def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { - let Inst{7-6} = lane{1-0}; - let Inst{5-4} = Rn{5-4}; -} -def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { - let Inst{7} = lane{0}; - let Inst{5-4} = Rn{5-4}; -} - -def VLD1LNq8Pseudo : VLD1QLNPseudo; -def VLD1LNq16Pseudo : VLD1QLNPseudo; -def VLD1LNq32Pseudo : VLD1QLNPseudo; - -def : Pat<(vector_insert (v2f32 DPR:$src), - (f32 (load addrmode6:$addr)), imm:$lane), - (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; -def : Pat<(vector_insert (v4f32 QPR:$src), - (f32 (load addrmode6:$addr)), imm:$lane), - (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; - -// A 64-bit subvector insert to the first 128-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), - (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; - - -let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { - -// ...with address register writeback: -class VLD1LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, - "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let DecoderMethod = "DecodeVLD1LN"; -} - -def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; -} -def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{4}; - let Inst{4} = Rn{4}; -} - -def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; -def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; -def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; - -// VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), - (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2LN"; -} - -def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD2LNd8Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; -def VLD2LNd16Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; -def VLD2LNd32Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; - -// ...with double-spaced registers: -def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD2LNq16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; -def VLD2LNq32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; - -// ...with address register writeback: -class VLD2LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, - "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", - "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2LN"; -} - -def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; -def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; -def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; - -def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; -def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; - -// VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, - "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVLD3LN"; -} - -def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD3LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; -def VLD3LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; -def VLD3LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; - -// ...with double-spaced registers: -def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD3LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; - -// ...with address register writeback: -class VLD3LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VLD3lnu, "vld3", Dt, - "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []>, Sched<[WriteVLD2]> { - let DecoderMethod = "DecodeVLD3LN"; -} - -def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; - -def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; - -// VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, - "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, - Sched<[WriteVLD2]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD4LN"; -} - -def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VLD4LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; -def VLD4LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; -def VLD4LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; - -// ...with double-spaced registers: -def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VLD4LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; - -// ...with address register writeback: -class VLD4LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b10, op11_8, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VLD4lnu, "vld4", Dt, -"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", -"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", - []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD4LN" ; -} - -def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; - -def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; - -} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 - -// VLD1DUP : Vector Load (single element to all lanes) -class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, - Operand AddrMode> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), - (ins AddrMode:$Rn), - IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", - [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, - Sched<[WriteVLD2]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; -} -def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, - addrmode6dupalignNone>; -def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, - addrmode6dupalign16>; -def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, - addrmode6dupalign32>; - -def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPd32 addrmode6:$addr)>; - -class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, - Operand AddrMode> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), - (ins AddrMode:$Rn), IIC_VLD1dup, - "vld1", Dt, "$Vd, $Rn", "", - [(set VecListDPairAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; -} - -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, - addrmode6dupalignNone>; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, - addrmode6dupalign16>; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, - addrmode6dupalign32>; - -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32 addrmode6:$addr)>; - -let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { -// ...with address register writeback: -multiclass VLD1DUPWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1dupu, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; - } - def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; - } -} -multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD1dupu, - "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; - } - def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, - "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; - } -} - -defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; -defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; -defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; - -defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; -defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; -defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; - -// VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), - (ins AddrMode:$Rn), IIC_VLD2dup, - "vld2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; -} - -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, - addrmode6dupalign16>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, - addrmode6dupalign32>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, - addrmode6dupalign64>; - -// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or -// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". -// ...with double-spaced registers -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, - addrmode6dupalign16>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, - addrmode6dupalign32>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, - addrmode6dupalign64>; - -def VLD2DUPq8EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD2DUPq8OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD2DUPq16EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD2DUPq16OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD2DUPq32EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD2DUPq32OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; - -// ...with address register writeback: -multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, - Operand AddrMode> { - def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, - (outs VdTy:$Vd, GPR:$wb), - (ins AddrMode:$Rn), IIC_VLD2dupu, - "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; - } - def _register : NLdSt<1, 0b10, 0b1101, op7_4, - (outs VdTy:$Vd, GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, - "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; - } -} - -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, - addrmode6dupalign16>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, - addrmode6dupalign32>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, - addrmode6dupalign64>; - -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, - addrmode6dupalign16>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, - addrmode6dupalign32>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, - addrmode6dupalign64>; - -// VLD3DUP : Vector Load (single 3-element structure to all lanes) -class VLD3DUP op7_4, string Dt> - : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), - (ins addrmode6dup:$Rn), IIC_VLD3dup, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, - Sched<[WriteVLD2]> { - let Rm = 0b1111; - let Inst{4} = 0; - let DecoderMethod = "DecodeVLD3DupInstruction"; -} - -def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; -def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; -def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; - -def VLD3DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; - -// ...with double-spaced registers (not used for codegen): -def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; - -def VLD3DUPq8EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPq8OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPq16EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPq16OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPq32EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD3DUPq32OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; - -// ...with address register writeback: -class VLD3DUPWB op7_4, string Dt, Operand AddrMode> - : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { - let Inst{4} = 0; - let DecoderMethod = "DecodeVLD3DupInstruction"; -} - -def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; -def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; -def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; - -def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; -def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; -def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; - -def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; -def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; -def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; - -// VLD4DUP : Vector Load (single 4-element structure to all lanes) -class VLD4DUP op7_4, string Dt> - : NLdSt<1, 0b10, 0b1111, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6dup:$Rn), IIC_VLD4dup, - "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD4DupInstruction"; -} - -def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; -def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; -def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } - -def VLD4DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; - -// ...with double-spaced registers (not used for codegen): -def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } - -def VLD4DUPq8EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPq8OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPq16EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPq16OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPq32EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; -def VLD4DUPq32OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; - -// ...with address register writeback: -class VLD4DUPWB op7_4, string Dt> - : NLdSt<1, 0b10, 0b1111, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, - "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD4DupInstruction"; -} - -def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; -def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; -def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } - -def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } - -def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; -def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; -def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; - -} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 - -let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { - -// Classes for VST* pseudo-instructions with multi-register operands. -// These are expanded to real instructions after register allocation. -class VSTQPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; -class VSTQWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, - "$addr.addr = $wb">; -class VSTQWBfixedPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, QPR:$src), itin, - "$addr.addr = $wb">; -class VSTQWBregisterPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, - "$addr.addr = $wb">; -class VSTQQPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; -class VSTQQWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, - "$addr.addr = $wb">; -class VSTQQWBfixedPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, QQPR:$src), itin, - "$addr.addr = $wb">; -class VSTQQWBregisterPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, - "$addr.addr = $wb">; - -class VSTQQQQPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; -class VSTQQQQWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, - "$addr.addr = $wb">; - -// VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string Dt, Operand AddrMode> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), - IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -class VST1Q op7_4, string Dt, Operand AddrMode> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} - -def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; -def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; -def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; -def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; - -def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; -def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; -def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; -def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; - -// ...with address register writeback: -multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, - "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), - IIC_VLD1u, - "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} -multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, - "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), - IIC_VLD1x2u, - "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; -defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; -defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; -defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; - -defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; -defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; -defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; -defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; - -// ...with 3 registers -class VST1D3 op7_4, string Dt, Operand AddrMode> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins AddrMode:$Rn, VecListThreeD:$Vd), - IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, - "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), - IIC_VLD1x3u, - "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; -def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; -def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; -def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; - -defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; -defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; -defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; -defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; - -def VST1d8TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST1d16TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST1d32TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST1d64TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST3]>; -def VST1d64TPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST3]>; - -def VST1q8HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST1q16HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST1q32HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST1q64HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; - -// ...with 4 registers -class VST1D4 op7_4, string Dt, Operand AddrMode> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins AddrMode:$Rn, VecListFourD:$Vd), - IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", - []>, Sched<[WriteVST4]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; -} -multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, - "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } - def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), - IIC_VLD1x4u, - "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST1Instruction"; - } -} - -def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; -def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; -def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; - -defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; -defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; -defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; -defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; - -def VST1d8QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST1d16QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST1d32QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST1d64QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; -def VST1d64QPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST4]>; - -def VST1q8HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST1q16HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST1q32HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST1q64HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; - -// VST2 : Vector Store (multiple 2-element structures) -class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin, Operand AddrMode> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), - itin, "vst2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; -} - -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, - addrmode6align64or128>, Sched<[WriteVST2]>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, - addrmode6align64or128>, Sched<[WriteVST2]>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, - addrmode6align64or128>, Sched<[WriteVST2]>; - -def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>, Sched<[WriteVST4]>; - -def VST2q8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST2q16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST2q32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; - -// ...with address register writeback: -multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy, Operand AddrMode> { - def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, - "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } - def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, - "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } -} -multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { - def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, - "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { - let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } - def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), - IIC_VLD1u, - "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST2Instruction"; - } -} - -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, - addrmode6align64or128>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, - addrmode6align64or128>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, - addrmode6align64or128>; - -defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; -defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; -defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; - -def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; -def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; -def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; -def VST2q8PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; -def VST2q16PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; -def VST2q32PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; - -// ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, - addrmode6align64or128>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, - addrmode6align64or128>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, - addrmode6align64or128>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, - addrmode6align64or128>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, - addrmode6align64or128>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, - addrmode6align64or128>; - -// VST3 : Vector Store (multiple 3-element structures) -class VST3D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST3Instruction"; -} - -def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; -def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; -def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; - -def VST3d8Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST3d16Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; -def VST3d32Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; - -// ...with address register writeback: -class VST3DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDST3Instruction"; -} - -def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; -def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; -def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; - -def VST3d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; -def VST3d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; -def VST3d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; - -// ...with double-spaced registers: -def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; -def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; -def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; -def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; -def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; -def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; - -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; - -// ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST3q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST3q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; - -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; - -// VST4 : Vector Store (multiple 4-element structures) -class VST4D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []>, Sched<[WriteVST4]> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST4Instruction"; -} - -def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; -def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; -def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; - -def VST4d8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST4d16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; -def VST4d32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; - -// ...with address register writeback: -class VST4DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, - "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDST4Instruction"; -} - -def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; -def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; -def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; - -def VST4d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; -def VST4d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; -def VST4d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; - -// ...with double-spaced registers: -def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; -def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; -def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; -def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; -def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; -def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; - -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; - -// ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST4q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST4q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; - -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; - -} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 - -// Classes for VST*LN pseudo-instructions with multi-register operands. -// These are expanded to real instructions after register allocation. -class VSTQLNPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), - itin, "">; -class VSTQLNWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb">; -class VSTQQLNPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), - itin, "">; -class VSTQQLNWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb">; -class VSTQQQQLNPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), - itin, "">; -class VSTQQQQLNWBPseudo - : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, - nohash_imm:$lane), itin, "$addr.addr = $wb">; - -// VST1LN : Vector Store (single element from one lane) -class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, - Sched<[WriteVST1]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1QLNPseudo - : VSTQLNPseudo, Sched<[WriteVST1]> { - let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), - addrmode6:$addr)]; -} - -def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu, addrmode6> { - let Inst{7-5} = lane{2-0}; -} -def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu, addrmode6> { - let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; -} - -def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, - addrmode6oneL32> { - let Inst{7} = lane{0}; - let Inst{5-4} = Rn{5-4}; -} - -def VST1LNq8Pseudo : VST1QLNPseudo; -def VST1LNq16Pseudo : VST1QLNPseudo; -def VST1LNq32Pseudo : VST1QLNPseudo; - -def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), - (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; -def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), - (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; - -// ...with address register writeback: -class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins AdrMode:$Rn, am6offset:$Rm, - DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, - "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", - [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - AdrMode:$Rn, am6offset:$Rm))]>, - Sched<[WriteVST1]> { - let DecoderMethod = "DecodeVST1LN"; -} -class VST1QLNWBPseudo - : VSTQLNWBPseudo, Sched<[WriteVST1]> { - let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), - addrmode6:$addr, am6offset:$offset))]; -} - -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu, addrmode6> { - let Inst{7-5} = lane{2-0}; -} -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu, addrmode6> { - let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; -} -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt, addrmode6oneL32> { - let Inst{7} = lane{0}; - let Inst{5-4} = Rn{5-4}; -} - -def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; -def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; -def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; - -let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { - -// VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), - IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", - "", []>, Sched<[WriteVST1]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVST2LN"; -} - -def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VST2LNd8Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; -def VST2LNd16Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; -def VST2LNd32Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; - -// ...with double-spaced registers: -def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; -} -def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{4} = Rn{4}; -} - -def VST2LNq16Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; -def VST2LNq32Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; - -// ...with address register writeback: -class VST2LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVST2LN"; -} - -def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; - -def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { - let Inst{7} = lane{0}; -} - -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; - -// VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, - Sched<[WriteVST2]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST3LN"; -} - -def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VST3LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; -def VST3LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; -def VST3LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; - -// ...with double-spaced registers: -def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VST3LNq16Pseudo : VSTQQQQLNPseudo; -def VST3LNq32Pseudo : VSTQQQQLNPseudo; - -// ...with address register writeback: -class VST3LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VST3lnu, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let DecoderMethod = "DecodeVST3LN"; -} - -def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; - -def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; -} - -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; - -// VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", - "", []>, Sched<[WriteVST2]> { - let Rm = 0b1111; - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVST4LN"; -} - -def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VST4LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; -def VST4LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; -def VST4LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; - -// ...with double-spaced registers: -def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VST4LNq16Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; -def VST4LNq32Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; - -// ...with address register writeback: -class VST4LNWB op11_8, bits<4> op7_4, string Dt> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VST4lnu, "vst4", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVST4LN"; -} - -def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { - let Inst{7-5} = lane{2-0}; -} -def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; - -def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { - let Inst{7-6} = lane{1-0}; -} -def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; - let Inst{5} = Rn{5}; -} - -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; - -} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 - -// Use vld1/vst1 for unaligned f64 load / store -def : Pat<(f64 (hword_alignedload addrmode6:$addr)), - (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; -def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), - (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; -def : Pat<(f64 (byte_alignedload addrmode6:$addr)), - (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; -def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), - (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; -def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), - (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; -def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), - (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; - -// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 -// load / store if it's legal. -def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), - (VLD1q64 addrmode6:$addr)>; -def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q64 addrmode6:$addr, QPR:$value)>; -def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), - (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; -def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; -def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), - (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; -def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; -def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), - (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; -def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; - -//===----------------------------------------------------------------------===// -// NEON pattern fragments -//===----------------------------------------------------------------------===// - -// Extract D sub-registers of Q registers. -def DSubReg_i8_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), - MVT::i32); -}]>; -def DSubReg_i16_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), - MVT::i32); -}]>; -def DSubReg_i32_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), - MVT::i32); -}]>; -def DSubReg_f64_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), - MVT::i32); -}]>; - -// Extract S sub-registers of Q/D registers. -def SSubReg_f32_reg : SDNodeXFormgetTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), - MVT::i32); -}]>; - -// Translate lane numbers from Q registers to D subregs. -def SubReg_i8_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); -}]>; -def SubReg_i16_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); -}]>; -def SubReg_i32_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); -}]>; - -//===----------------------------------------------------------------------===// -// Instruction Classes -//===----------------------------------------------------------------------===// - -// Basic 2-register operations: double- and quad-register. -class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V; -class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V; - -// Basic 2-register intrinsics, both double- and quad-register. -class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; -class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; - -// Same as above, but not predicated. -class N2VDIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; - -class N2VQIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; - -// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). -class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, - bit op7, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; - -// Same as N2VQIntXnp but with Vd as a src register. -class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, - bit op7, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp { - let Constraints = "$src = $Vd"; -} - -// Narrow 2-register operations. -class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyD, ValueType TyQ, SDNode OpNode> - : N2V; - -// Narrow 2-register intrinsics. -class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> - : N2V; - -// Long 2-register operations (currently only used for VMOVL). -class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode OpNode> - : N2V; - -// Long 2-register intrinsics. -class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> - : N2V; - -// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. -class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr, string Dt> - : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), - (ins DPR:$src1, DPR:$src2), IIC_VPERMD, - OpcodeStr, Dt, "$Vd, $Vm", - "$src1 = $Vd, $src2 = $Vm", []>; -class N2VQShuffle op19_18, bits<5> op11_7, - InstrItinClass itin, string OpcodeStr, string Dt> - : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), - (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", - "$src1 = $Vd, $src2 = $Vm", []>; - -// Basic 3-register operations: double- and quad-register. -class N3VD op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> - : N3V { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} -// Same as N3VD but no data type. -class N3VDX op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3VX{ - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} - -class N3VDSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode ShOp> - : N3VLane32<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (Ty DPR:$Vd), - (Ty (ShOp (Ty DPR:$Vn), - (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = 0; -} -class N3VDSL16 op21_20, bits<4> op11_8, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3VLane16<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", - [(set (Ty DPR:$Vd), - (Ty (ShOp (Ty DPR:$Vn), - (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = 0; -} - -class N3VQ op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> - : N3V { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} -class N3VQX op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, - ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> - : N3VX{ - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} -class N3VQSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3VLane32<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (ResTy QPR:$Vd), - (ResTy (ShOp (ResTy QPR:$Vn), - (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), - imm:$lane)))))]> { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = 0; -} -class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3VLane16<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", - [(set (ResTy QPR:$Vd), - (ResTy (ShOp (ResTy QPR:$Vn), - (ResTy (NEONvduplane (OpTy DPR_8:$Vm), - imm:$lane)))))]> { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = 0; -} - -// Basic 3-register intrinsics, both double- and quad-register. -class N3VDInt op21_20, bits<4> op11_8, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> - : N3V { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} - -class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, - bit op4, Format f, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable> - : N3Vnp; - -class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> - : N3VLane32<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (Ty DPR:$Vd), - (Ty (IntOp (Ty DPR:$Vn), - (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), - imm:$lane)))))]> { - let isCommutable = 0; -} - -class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> - : N3VLane16<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (Ty DPR:$Vd), - (Ty (IntOp (Ty DPR:$Vn), - (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { - let isCommutable = 0; -} -class N3VDIntSh op21_20, bits<4> op11_8, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3V { - let TwoOperandAliasConstraint = "$Vm = $Vd"; - let isCommutable = 0; -} - -class N3VQInt op21_20, bits<4> op11_8, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> - : N3V { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} - -class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, - bit op4, Format f, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable> - : N3Vnp; - -// Same as N3VQIntnp but with Vd as a src register. -class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, - bit op4, Format f, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable> - : N3Vnp { - let Constraints = "$src = $Vd"; -} - -class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane32<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (ResTy QPR:$Vd), - (ResTy (IntOp (ResTy QPR:$Vn), - (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), - imm:$lane)))))]> { - let isCommutable = 0; -} -class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane16<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", - [(set (ResTy QPR:$Vd), - (ResTy (IntOp (ResTy QPR:$Vn), - (ResTy (NEONvduplane (OpTy DPR_8:$Vm), - imm:$lane)))))]> { - let isCommutable = 0; -} -class N3VQIntSh op21_20, bits<4> op11_8, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3V { - let TwoOperandAliasConstraint = "$Vm = $Vd"; - let isCommutable = 0; -} - -// Multiply-Add/Sub operations: double- and quad-register. -class N3VDMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> - : N3V; - -class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3VLane32<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", - [(set (Ty DPR:$Vd), - (Ty (ShOp (Ty DPR:$src1), - (Ty (MulOp DPR:$Vn, - (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), - imm:$lane)))))))]>; -class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3VLane16<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", - [(set (Ty DPR:$Vd), - (Ty (ShOp (Ty DPR:$src1), - (Ty (MulOp DPR:$Vn, - (Ty (NEONvduplane (Ty DPR_8:$Vm), - imm:$lane)))))))]>; - -class N3VQMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, - SDPatternOperator MulOp, SDPatternOperator OpNode> - : N3V; -class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3VLane32<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", - [(set (ResTy QPR:$Vd), - (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (MulOp QPR:$Vn, - (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), - imm:$lane)))))))]>; -class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3VLane16<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), - NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", - [(set (ResTy QPR:$Vd), - (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (MulOp QPR:$Vn, - (ResTy (NEONvduplane (OpTy DPR_8:$Vm), - imm:$lane)))))))]>; - -// Neon Intrinsic-Op instructions (VABA): double- and quad-register. -class N3VDIntOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> - : N3V; -class N3VQIntOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> - : N3V; - -// Neon 3-argument intrinsics, both double- and quad-register. -// The destination register is also used as the first source operand register. -class N3VDInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3V; -class N3VQInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3V; - -// Long Multiply-Add/Sub operations. -class N3VLMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V; -class N3VLMulOpSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3VLane32; -class N3VLMulOpSL16 op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3VLane16; - -// Long Intrinsic-Op vector operations with explicit extend (VABAL). -class N3VLIntExtOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, - SDNode OpNode> - : N3V; - -// Neon Long 3-argument intrinsic. The destination register is -// a quad-register and is also used as the first source operand register. -class N3VLInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> - : N3V; -class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane32; -class N3VLInt3SL16 op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane16; - -// Narrowing 3-register intrinsics. -class N3VNInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, - SDPatternOperator IntOp, bit Commutable> - : N3V { - let isCommutable = Commutable; -} - -// Long 3-register operations. -class N3VL op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> - : N3V { - let isCommutable = Commutable; -} - -class N3VLSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3VLane32; -class N3VLSL16 op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3VLane16; - -// Long 3-register operations with explicitly extended operands. -class N3VLExt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, - bit Commutable> - : N3V { - let isCommutable = Commutable; -} - -// Long 3-register intrinsics with explicit extend (VABDL). -class N3VLIntExt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, - bit Commutable> - : N3V { - let isCommutable = Commutable; -} - -// Long 3-register intrinsics. -class N3VLInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> - : N3V { - let isCommutable = Commutable; -} - -// Same as above, but not predicated. -class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, - bit op4, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable> - : N3Vnp; - -class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane32; -class N3VLIntSL16 op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N3VLane16; - -// Wide 3-register operations. -class N3VW op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, - SDNode OpNode, SDNode ExtOp, bit Commutable> - : N3V { - // All of these have a two-operand InstAlias. - let TwoOperandAliasConstraint = "$Vn = $Vd"; - let isCommutable = Commutable; -} - -// Pairwise long 2-register intrinsics, both double- and quad-register. -class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; -class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; - -// Pairwise long 2-register accumulate intrinsics, -// both double- and quad-register. -// The destination register is also used as the first source operand register. -class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; -class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2V; - -// Shift by immediate, -// both double- and quad-register. -let TwoOperandAliasConstraint = "$Vm = $Vd" in { -class N2VDSh op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, Operand ImmTy, - string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> - : N2VImm; -class N2VQSh op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, Operand ImmTy, - string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> - : N2VImm; -} - -// Long shift by immediate. -class N2VLSh op11_8, bit op7, bit op6, bit op4, - string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Operand ImmTy, - SDPatternOperator OpNode> - : N2VImm; - -// Narrow shift by immediate. -class N2VNSh op11_8, bit op7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Operand ImmTy, - SDPatternOperator OpNode> - : N2VImm; - -// Shift right by immediate and accumulate, -// both double- and quad-register. -let TwoOperandAliasConstraint = "$Vm = $Vd" in { -class N2VDShAdd op11_8, bit op7, bit op4, - Operand ImmTy, string OpcodeStr, string Dt, - ValueType Ty, SDNode ShOp> - : N2VImm; -class N2VQShAdd op11_8, bit op7, bit op4, - Operand ImmTy, string OpcodeStr, string Dt, - ValueType Ty, SDNode ShOp> - : N2VImm; -} - -// Shift by immediate and insert, -// both double- and quad-register. -let TwoOperandAliasConstraint = "$Vm = $Vd" in { -class N2VDShIns op11_8, bit op7, bit op4, - Operand ImmTy, Format f, string OpcodeStr, string Dt, - ValueType Ty,SDNode ShOp> - : N2VImm; -class N2VQShIns op11_8, bit op7, bit op4, - Operand ImmTy, Format f, string OpcodeStr, string Dt, - ValueType Ty,SDNode ShOp> - : N2VImm; -} - -// Convert, with fractional bits immediate, -// both double- and quad-register. -class N2VCvtD op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp> - : N2VImm; -class N2VCvtQ op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp> - : N2VImm; - -//===----------------------------------------------------------------------===// -// Multiclasses -//===----------------------------------------------------------------------===// - -// Abbreviations used in multiclass suffixes: -// Q = quarter int (8 bit) elements -// H = half int (16 bit) elements -// S = single int (32 bit) elements -// D = double int (64 bit) elements - -// Neon 2-register vector operations and intrinsics. - -// Neon 2-register comparisons. -// source operand element sizes of 8, 16 and 32 bits: -multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, string opc, string Dt, - string asm, SDNode OpNode> { - // 64-bit vector types. - def v8i8 : N2V; - def v4i16 : N2V; - def v2i32 : N2V; - def v2f32 : N2V { - let Inst{10} = 1; // overwrite F = 1 - } - def v4f16 : N2V, - Requires<[HasNEON,HasFullFP16]> { - let Inst{10} = 1; // overwrite F = 1 - } - - // 128-bit vector types. - def v16i8 : N2V; - def v8i16 : N2V; - def v4i32 : N2V; - def v4f32 : N2V { - let Inst{10} = 1; // overwrite F = 1 - } - def v8f16 : N2V, - Requires<[HasNEON,HasFullFP16]> { - let Inst{10} = 1; // overwrite F = 1 - } -} - - -// Neon 2-register vector intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - // 64-bit vector types. - def v8i8 : N2VDInt; - def v4i16 : N2VDInt; - def v2i32 : N2VDInt; - - // 128-bit vector types. - def v16i8 : N2VQInt; - def v8i16 : N2VQInt; - def v4i32 : N2VQInt; -} - - -// Neon Narrowing 2-register vector operations, -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { - def v8i8 : N2VN; - def v4i16 : N2VN; - def v2i32 : N2VN; -} - -// Neon Narrowing 2-register vector intrinsics, -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDPatternOperator IntOp> { - def v8i8 : N2VNInt; - def v4i16 : N2VNInt; - def v2i32 : N2VNInt; -} - - -// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VL_QHS op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, string Dt, SDNode OpNode> { - def v8i16 : N2VL; - def v4i32 : N2VL; - def v2i64 : N2VL; -} - - -// Neon 3-register vector operations. - -// First with only element sizes of 8, 16 and 32 bits: -multiclass N3V_QHS op11_8, bit op4, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDNode OpNode, bit Commutable = 0> { - // 64-bit vector types. - def v8i8 : N3VD; - def v4i16 : N3VD; - def v2i32 : N3VD; - - // 128-bit vector types. - def v16i8 : N3VQ; - def v8i16 : N3VQ; - def v4i32 : N3VQ; -} - -multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", - v4i32, v2i32, ShOp>; -} - -// ....then also with element size 64 bits: -multiclass N3V_QHSD op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, - SDNode OpNode, bit Commutable = 0> - : N3V_QHS { - def v1i64 : N3VD; - def v2i64 : N3VQ; -} - - -// Neon 3-register vector intrinsics. - -// First with only element sizes of 16 and 32 bits: -multiclass N3VInt_HS op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> { - // 64-bit vector types. - def v4i16 : N3VDInt; - def v2i32 : N3VDInt; - - // 128-bit vector types. - def v8i16 : N3VQInt; - def v4i32 : N3VQInt; -} -multiclass N3VInt_HSSh op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp> { - // 64-bit vector types. - def v4i16 : N3VDIntSh; - def v2i32 : N3VDIntSh; - - // 128-bit vector types. - def v8i16 : N3VQIntSh; - def v4i32 : N3VQIntSh; -} - -multiclass N3VIntSL_HS op11_8, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, - OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; - def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, - OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; - def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; - def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; -} - -// ....then also with element size of 8 bits: -multiclass N3VInt_QHS op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> - : N3VInt_HS { - def v8i8 : N3VDInt; - def v16i8 : N3VQInt; -} -multiclass N3VInt_QHSSh op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp> - : N3VInt_HSSh { - def v8i8 : N3VDIntSh; - def v16i8 : N3VQIntSh; -} - - -// ....then also with element size of 64 bits: -multiclass N3VInt_QHSD op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> - : N3VInt_QHS { - def v1i64 : N3VDInt; - def v2i64 : N3VQInt; -} -multiclass N3VInt_QHSDSh op11_8, bit op4, Format f, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp> - : N3VInt_QHSSh { - def v1i64 : N3VDIntSh; - def v2i64 : N3VQIntSh; -} - -// Neon Narrowing 3-register vector intrinsics, -// source operand element sizes of 16, 32 and 64 bits: -multiclass N3VNInt_HSD op11_8, bit op4, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> { - def v8i8 : N3VNInt; - def v4i16 : N3VNInt; - def v2i32 : N3VNInt; -} - - -// Neon Long 3-register vector operations. - -multiclass N3VL_QHS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, - SDNode OpNode, bit Commutable = 0> { - def v8i16 : N3VL; - def v4i32 : N3VL; - def v2i64 : N3VL; -} - -multiclass N3VLSL_HS op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { - def v4i16 : N3VLSL16; - def v2i32 : N3VLSL; -} - -multiclass N3VLExt_QHS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, - SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { - def v8i16 : N3VLExt; - def v4i32 : N3VLExt; - def v2i64 : N3VLExt; -} - -// Neon Long 3-register vector intrinsics. - -// First with only element sizes of 16 and 32 bits: -multiclass N3VLInt_HS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt; - def v2i64 : N3VLInt; -} - -multiclass N3VLIntSL_HS op11_8, - InstrItinClass itin, string OpcodeStr, string Dt, - SDPatternOperator IntOp> { - def v4i16 : N3VLIntSL16; - def v2i32 : N3VLIntSL; -} - -// ....then also with element size of 8 bits: -multiclass N3VLInt_QHS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, - SDPatternOperator IntOp, bit Commutable = 0> - : N3VLInt_HS { - def v8i16 : N3VLInt; -} - -// ....with explicit extend (VABDL). -multiclass N3VLIntExt_QHS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { - def v8i16 : N3VLIntExt; - def v4i32 : N3VLIntExt; - def v2i64 : N3VLIntExt; -} - - -// Neon Wide 3-register vector intrinsics, -// source operand element sizes of 8, 16 and 32 bits: -multiclass N3VW_QHS op11_8, bit op4, - string OpcodeStr, string Dt, - SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { - def v8i16 : N3VW; - def v4i32 : N3VW; - def v2i64 : N3VW; -} - - -// Neon Multiply-Op vector operations, -// element sizes of 8, 16 and 32 bits: -multiclass N3VMulOp_QHS op11_8, bit op4, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDNode OpNode> { - // 64-bit vector types. - def v8i8 : N3VDMulOp; - def v4i16 : N3VDMulOp; - def v2i32 : N3VDMulOp; - - // 128-bit vector types. - def v16i8 : N3VQMulOp; - def v8i16 : N3VQMulOp; - def v4i32 : N3VQMulOp; -} - -multiclass N3VMulOpSL_HS op11_8, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDPatternOperator ShOp> { - def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, - OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; - def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, - OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; - def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, - mul, ShOp>; - def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, - mul, ShOp>; -} - -// Neon Intrinsic-Op vector operations, -// element sizes of 8, 16 and 32 bits: -multiclass N3VIntOp_QHS op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, SDPatternOperator IntOp, - SDNode OpNode> { - // 64-bit vector types. - def v8i8 : N3VDIntOp; - def v4i16 : N3VDIntOp; - def v2i32 : N3VDIntOp; - - // 128-bit vector types. - def v16i8 : N3VQIntOp; - def v8i16 : N3VQIntOp; - def v4i32 : N3VQIntOp; -} - -// Neon 3-argument intrinsics, -// element sizes of 16 and 32 bits: -multiclass N3VInt3_HS op11_8, bit op4, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - // 64-bit vector types. - def v4i16 : N3VDInt3; - def v2i32 : N3VDInt3; - - // 128-bit vector types. - def v8i16 : N3VQInt3; - def v4i32 : N3VQInt3; -} - -// element sizes of 8, 16 and 32 bits: -multiclass N3VInt3_QHS op11_8, bit op4, - InstrItinClass itinD16, InstrItinClass itinD32, - InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDPatternOperator IntOp> - :N3VInt3_HS { - // 64-bit vector types. - def v8i8 : N3VDInt3; - // 128-bit vector types. - def v16i8 : N3VQInt3; -} - -// Neon Long Multiply-Op vector operations, -// element sizes of 8, 16 and 32 bits: -multiclass N3VLMulOp_QHS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, SDNode MulOp, - SDNode OpNode> { - def v8i16 : N3VLMulOp; - def v4i32 : N3VLMulOp; - def v2i64 : N3VLMulOp; -} - -multiclass N3VLMulOpSL_HS op11_8, string OpcodeStr, - string Dt, SDNode MulOp, SDNode OpNode> { - def v4i16 : N3VLMulOpSL16; - def v2i32 : N3VLMulOpSL; -} - - -// Neon Long 3-argument intrinsics. - -// First with only element sizes of 16 and 32 bits: -multiclass N3VLInt3_HS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - def v4i32 : N3VLInt3; - def v2i64 : N3VLInt3; -} - -multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - def v4i16 : N3VLInt3SL16; - def v2i32 : N3VLInt3SL; -} - -// ....then also with element size of 8 bits: -multiclass N3VLInt3_QHS op11_8, bit op4, - InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, SDPatternOperator IntOp> - : N3VLInt3_HS { - def v8i16 : N3VLInt3; -} - -// ....with explicit extend (VABAL). -multiclass N3VLIntExtOp_QHS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { - def v8i16 : N3VLIntExtOp; - def v4i32 : N3VLIntExtOp; - def v2i64 : N3VLIntExtOp; -} - - -// Neon Pairwise long 2-register intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - // 64-bit vector types. - def v8i8 : N2VDPLInt; - def v4i16 : N2VDPLInt; - def v2i32 : N2VDPLInt; - - // 128-bit vector types. - def v16i8 : N2VQPLInt; - def v8i16 : N2VQPLInt; - def v4i32 : N2VQPLInt; -} - - -// Neon Pairwise long 2-register accumulate intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, SDPatternOperator IntOp> { - // 64-bit vector types. - def v8i8 : N2VDPLInt2; - def v4i16 : N2VDPLInt2; - def v2i32 : N2VDPLInt2; - - // 128-bit vector types. - def v16i8 : N2VQPLInt2; - def v8i16 : N2VQPLInt2; - def v4i32 : N2VQPLInt2; -} - - -// Neon 2-register vector shift by immediate, -// with f of either N2RegVShLFrm or N2RegVShRFrm -// element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShL_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { - // 64-bit vector types. - def v8i8 : N2VDSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VDSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VDSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v1i64 : N2VDSh; - // imm6 = xxxxxx - - // 128-bit vector types. - def v16i8 : N2VQSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v8i16 : N2VQSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v4i32 : N2VQSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v2i64 : N2VQSh; - // imm6 = xxxxxx -} -multiclass N2VShR_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - string baseOpc, SDNode OpNode> { - // 64-bit vector types. - def v8i8 : N2VDSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VDSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VDSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v1i64 : N2VDSh; - // imm6 = xxxxxx - - // 128-bit vector types. - def v16i8 : N2VQSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v8i16 : N2VQSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v4i32 : N2VQSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v2i64 : N2VQSh; - // imm6 = xxxxxx -} - -// Neon Shift-Accumulate vector operations, -// element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShAdd_QHSD op11_8, bit op4, - string OpcodeStr, string Dt, SDNode ShOp> { - // 64-bit vector types. - def v8i8 : N2VDShAdd { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VDShAdd { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VDShAdd { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v1i64 : N2VDShAdd; - // imm6 = xxxxxx - - // 128-bit vector types. - def v16i8 : N2VQShAdd { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v8i16 : N2VQShAdd { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v4i32 : N2VQShAdd { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v2i64 : N2VQShAdd; - // imm6 = xxxxxx -} - -// Neon Shift-Insert vector operations, -// with f of either N2RegVShLFrm or N2RegVShRFrm -// element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShInsL_QHSD op11_8, bit op4, - string OpcodeStr> { - // 64-bit vector types. - def v8i8 : N2VDShIns { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VDShIns { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VDShIns { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v1i64 : N2VDShIns; - // imm6 = xxxxxx - - // 128-bit vector types. - def v16i8 : N2VQShIns { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v8i16 : N2VQShIns { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v4i32 : N2VQShIns { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v2i64 : N2VQShIns; - // imm6 = xxxxxx -} -multiclass N2VShInsR_QHSD op11_8, bit op4, - string OpcodeStr> { - // 64-bit vector types. - def v8i8 : N2VDShIns { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VDShIns { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VDShIns { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v1i64 : N2VDShIns; - // imm6 = xxxxxx - - // 128-bit vector types. - def v16i8 : N2VQShIns { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v8i16 : N2VQShIns { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v4i32 : N2VQShIns { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } - def v2i64 : N2VQShIns; - // imm6 = xxxxxx -} - -// Neon Shift Long operations, -// element sizes of 8, 16, 32 bits: -multiclass N2VLSh_QHS op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, string Dt, - SDPatternOperator OpNode> { - def v8i16 : N2VLSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i32 : N2VLSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i64 : N2VLSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } -} - -// Neon Shift Narrow operations, -// element sizes of 16, 32, 64 bits: -multiclass N2VNSh_HSD op11_8, bit op7, bit op6, - bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDPatternOperator OpNode> { - def v8i8 : N2VNSh { - let Inst{21-19} = 0b001; // imm6 = 001xxx - } - def v4i16 : N2VNSh { - let Inst{21-20} = 0b01; // imm6 = 01xxxx - } - def v2i32 : N2VNSh { - let Inst{21} = 0b1; // imm6 = 1xxxxx - } -} - -//===----------------------------------------------------------------------===// -// Instruction Definitions. -//===----------------------------------------------------------------------===// - -// Vector Add Operations. - -// VADD : Vector Add (integer and floating-point) -defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", - add, 1>; -def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", - v2f32, v2f32, fadd, 1>; -def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", - v4f32, v4f32, fadd, 1>; -def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", - v4f16, v4f16, fadd, 1>, - Requires<[HasNEON,HasFullFP16]>; -def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", - v8f16, v8f16, fadd, 1>, - Requires<[HasNEON,HasFullFP16]>; -// VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "s", add, sext, 1>; -defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", add, zext, 1>; -// VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; -defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; -// VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vhadd", "s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vhadd", "u", int_arm_neon_vhaddu, 1>; -// VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vrhadd", "s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vrhadd", "u", int_arm_neon_vrhaddu, 1>; -// VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqadd", "s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqadd", "u", int_arm_neon_vqaddu, 1>; -// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; -// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) -defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", - int_arm_neon_vraddhn, 1>; - -def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), - (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), - (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), - (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; - -// Vector Multiply Operations. - -// VMUL : Vector Multiply (integer, polynomial and floating-point) -defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", - "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", - "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", - v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", - v4f32, v4f32, fmul, 1>; -def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", - v4f16, v4f16, fmul, 1>, - Requires<[HasNEON,HasFullFP16]>; -def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", - v8f16, v8f16, fmul, 1>, - Requires<[HasNEON,HasFullFP16]>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, - v2f32, fmul>; -def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, - Requires<[HasNEON,HasFullFP16]>; -def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, - v4f16, fmul>, - Requires<[HasNEON,HasFullFP16]>; - -def : Pat<(v8i16 (mul (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), - (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), - (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; -def : Pat<(v4i32 (mul (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), - (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), - (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; -def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), - (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), - (v4f32 (VMULslfq (v4f32 QPR:$src1), - (v2f32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - - -def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), - (VMULslfd DPR:$Rn, - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), - (i32 0))>; -def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), - (VMULslfq QPR:$Rn, - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), - (i32 0))>; - - -// VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; -defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh", "s", int_arm_neon_vqdmulh>; -def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), - imm:$lane)))), - (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), - (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; -def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), - imm:$lane)))), - (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), - (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - -// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, - IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, - "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; -defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh", "s", int_arm_neon_vqrdmulh>; -def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), - imm:$lane)))), - (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), - (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; -def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), - imm:$lane)))), - (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), - (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - -// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -let PostEncoderMethod = "NEONThumb2DataIPostEncoder", - DecoderNamespace = "NEONData" in { - defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; - defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; - def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; - def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, - "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, - Requires<[HasV8, HasCrypto]>; -} -defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; -defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; - -// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, - "vqdmull", "s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, - "vqdmull", "s", int_arm_neon_vqdmull>; - -// Vector Multiply-Accumulate and Multiply-Subtract Operations. - -// VMLA : Vector Multiply Accumulate (integer and floating-point) -defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", - v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", - v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; -def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", - v4f16, fmul_su, fadd_mlx>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; -def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", - v8f16, fmul_su, fadd_mlx>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; -defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", - v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", - v4f32, v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; -def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", - v4f16, fmul, fadd>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; -def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", - v8f16, v4f16, fmul, fadd>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; - -def : Pat<(v8i16 (add (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; - -def : Pat<(v4i32 (add (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - -def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), - (fmul_su (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), - (v4f32 (VMLAslfq (v4f32 QPR:$src1), - (v4f32 QPR:$src2), - (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>, - Requires<[HasNEON, UseFPVMLx]>; - -// VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "s", NEONvmulls, add>; -defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "u", NEONvmullu, add>; - -defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; -defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; - -let Predicates = [HasNEON, HasV8_1a] in { - // v8.1a Neon Rounding Double Multiply-Op vector operations, - // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long - // (Q += D * D) - defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", - null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds - (v4i16 DPR:$src1), - (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), - (v4i16 DPR:$Vm))))), - (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqadds - (v2i32 DPR:$src1), - (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), - (v2i32 DPR:$Vm))))), - (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqadds - (v8i16 QPR:$src1), - (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), - (v8i16 QPR:$Vm))))), - (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqadds - (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), - (v4i32 QPR:$Vm))))), - (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - - defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", - null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds - (v4i16 DPR:$src1), - (v4i16 (int_arm_neon_vqrdmulh - (v4i16 DPR:$Vn), - (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), - imm:$lane)))))), - (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, - imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqadds - (v2i32 DPR:$src1), - (v2i32 (int_arm_neon_vqrdmulh - (v2i32 DPR:$Vn), - (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), - imm:$lane)))))), - (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, - imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqadds - (v8i16 QPR:$src1), - (v8i16 (int_arm_neon_vqrdmulh - (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), - imm:$lane)))))), - (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG - QPR:$src3, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqadds - (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh - (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), - imm:$lane)))))), - (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG - QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - - // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long - // (Q -= D * D) - defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", - null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs - (v4i16 DPR:$src1), - (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), - (v4i16 DPR:$Vm))))), - (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs - (v2i32 DPR:$src1), - (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), - (v2i32 DPR:$Vm))))), - (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs - (v8i16 QPR:$src1), - (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), - (v8i16 QPR:$Vm))))), - (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs - (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), - (v4i32 QPR:$Vm))))), - (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - - defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", - null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs - (v4i16 DPR:$src1), - (v4i16 (int_arm_neon_vqrdmulh - (v4i16 DPR:$Vn), - (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), - imm:$lane)))))), - (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs - (v2i32 DPR:$src1), - (v2i32 (int_arm_neon_vqrdmulh - (v2i32 DPR:$Vn), - (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), - imm:$lane)))))), - (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, - imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs - (v8i16 QPR:$src1), - (v8i16 (int_arm_neon_vqrdmulh - (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), - imm:$lane)))))), - (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG - QPR:$src3, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs - (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh - (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), - imm:$lane)))))), - (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG - QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; -} -// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", null_frag>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; - -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), - (v4i16 DPR:$Vm))))), - (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), - (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), - (v2i32 DPR:$Vm))))), - (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), - (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), - imm:$lane)))))), - (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), - (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), - (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), - imm:$lane)))))), - (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; - -// VMLS : Vector Multiply Subtract (integer and floating-point) -defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", - v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", - v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; -def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", - v4f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; -def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", - v8f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; -defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", - v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", - v4f32, v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; -def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", - v4f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; -def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", - v8f16, v4f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; - -def : Pat<(v8i16 (sub (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; - -def : Pat<(v4i32 (sub (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - -def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), - (fmul_su (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), - (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), - (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>, - Requires<[HasNEON, UseFPVMLx]>; - -// VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "s", NEONvmulls, sub>; -defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "u", NEONvmullu, sub>; - -defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; -defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; - -// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", null_frag>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; - -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), - (v4i16 DPR:$Vm))))), - (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), - (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), - (v2i32 DPR:$Vm))))), - (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), - (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), - imm:$lane)))))), - (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), - (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), - (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), - imm:$lane)))))), - (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; - -// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. -def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", - v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON,HasVFP4,UseFusedMAC]>; - -def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", - v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON,HasVFP4,UseFusedMAC]>; -def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", - v4f16, fmul, fadd>, - Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; - -def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", - v8f16, fmul, fadd>, - Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; - -// Fused Vector Multiply Subtract (floating-point) -def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", - v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON,HasVFP4,UseFusedMAC]>; -def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", - v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON,HasVFP4,UseFusedMAC]>; -def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", - v4f16, fmul, fsub>, - Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; -def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", - v8f16, fmul, fsub>, - Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; - -// Match @llvm.fma.* intrinsics -def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), - (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasVFP4]>; -def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), - (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasVFP4]>; -def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), - (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasVFP4]>; -def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), - (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasVFP4]>; - -// ARMv8.2a dot product instructions. -// We put them in the VFPV8 decoder namespace because the ARM and Thumb -// encodings are the same and thus no further bit twiddling is necessary -// in the disassembler. -class VDOT : - N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), - (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, - Asm, AsmTy, - [(set (AccumTy RegTy:$dst), - (OpNode (AccumTy RegTy:$Vd), - (InputTy RegTy:$Vn), - (InputTy RegTy:$Vm)))]> { - let Predicates = [HasDotProd]; - let DecoderNamespace = "VFPV8"; - let Constraints = "$dst = $Vd"; -} - -def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; -def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; -def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; -def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; - -// Indexed dot product instructions: -multiclass DOTI { - def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), - (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), - N3RegFrm, IIC_VDOTPROD, opc, dt, []> { - bit lane; - let Inst{5} = lane; - let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); - let Constraints = "$dst = $Vd"; - let Predicates = [HasDotProd]; - let DecoderNamespace = "VFPV8"; - } - - def : Pat< - (AccumType (OpNode (AccumType Ty:$Vd), - (InputType Ty:$Vn), - (InputType (bitconvert (AccumType - (NEONvduplane (AccumType Ty:$Vm), - VectorIndex32:$lane)))))), - (!cast(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; -} - -defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, - int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; -defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, - int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; -defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, - int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; -defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, - int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; - - -// ARMv8.3 complex operations -class BaseN3VCP8ComplexTied pattern> - : N3VCP8<{?,?}, {op21,s}, q, op4, oops, - iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ - bits<2> rot; - let Inst{24-23} = rot; -} - -class BaseN3VCP8ComplexOdd pattern> - : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, - iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { - bits<1> rot; - let Inst{24} = rot; -} - -class BaseN3VCP8ComplexTiedLane32 pattern> - : N3VLaneCP8 { - bits<2> rot; - bit lane; - - let Inst{21-20} = rot; - let Inst{5} = lane; -} - -class BaseN3VCP8ComplexTiedLane64 pattern> - : N3VLaneCP8 { - bits<2> rot; - bit lane; - - let Inst{21-20} = rot; - let Inst{5} = Vm{4}; - // This is needed because the lane operand does not have any bits in the - // encoding (it only has one possible value), so we need to manually set it - // to it's default value. - let DecoderMethod = "DecodeNEONComplexLane64Instruction"; -} - -multiclass N3VCP8ComplexTied { - let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { - def v4f16 : BaseN3VCP8ComplexTied; - def v8f16 : BaseN3VCP8ComplexTied; - } - let Predicates = [HasNEON,HasV8_3a] in { - def v2f32 : BaseN3VCP8ComplexTied; - def v4f32 : BaseN3VCP8ComplexTied; - } -} - -multiclass N3VCP8ComplexOdd { - let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { - def v4f16 : BaseN3VCP8ComplexOdd; - def v8f16 : BaseN3VCP8ComplexOdd; - } - let Predicates = [HasNEON,HasV8_3a] in { - def v2f32 : BaseN3VCP8ComplexOdd; - def v4f32 : BaseN3VCP8ComplexOdd; - } -} - -// These instructions index by pairs of lanes, so the VectorIndexes are twice -// as wide as the data types. -multiclass N3VCP8ComplexTiedLane { - let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { - def v4f16_indexed : BaseN3VCP8ComplexTiedLane32; - def v8f16_indexed : BaseN3VCP8ComplexTiedLane32; - } - let Predicates = [HasNEON,HasV8_3a] in { - def v2f32_indexed : BaseN3VCP8ComplexTiedLane64; - def v4f32_indexed : BaseN3VCP8ComplexTiedLane64; - } -} - -defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; -defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; -defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; - -// Vector Subtract Operations. - -// VSUB : Vector Subtract (integer and floating-point) -defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, - "vsub", "i", sub, 0>; -def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", - v2f32, v2f32, fsub, 0>; -def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", - v4f32, v4f32, fsub, 0>; -def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", - v4f16, v4f16, fsub, 0>, - Requires<[HasNEON,HasFullFP16]>; -def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", - v8f16, v8f16, fsub, 0>, - Requires<[HasNEON,HasFullFP16]>; -// VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "s", sub, sext, 0>; -defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", sub, zext, 0>; -// VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; -defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; -// VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vhsub", "s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vhsub", "u", int_arm_neon_vhsubu, 0>; -// VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "u", int_arm_neon_vqsubu, 0>; -// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; -// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) -defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", - int_arm_neon_vrsubhn, 0>; - -def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), - (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), - (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), - (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; - -// Vector Comparisons. - -// VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - NEONvceq, 1>; -def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - NEONvceq, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - NEONvceq, 1>, - Requires<[HasNEON, HasFullFP16]>; - -let TwoOperandAliasConstraint = "$Vm = $Vd" in -defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", NEONvceqz>; - -// VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - NEONvcge, 0>; -def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - NEONvcge, 0>, - Requires<[HasNEON, HasFullFP16]>; -def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - NEONvcge, 0>, - Requires<[HasNEON, HasFullFP16]>; - -let TwoOperandAliasConstraint = "$Vm = $Vd" in { -defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", NEONvcgez>; -defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", NEONvclez>; -} - -// VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - NEONvcgt, 0>; -def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - NEONvcgt, 0>, - Requires<[HasNEON, HasFullFP16]>; -def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - NEONvcgt, 0>, - Requires<[HasNEON, HasFullFP16]>; - -let TwoOperandAliasConstraint = "$Vm = $Vd" in { -defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", NEONvcgtz>; -defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", NEONvcltz>; -} - -// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", - "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; -def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", - "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; -def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", - "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, - Requires<[HasNEON, HasFullFP16]>; -def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", - "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, - Requires<[HasNEON, HasFullFP16]>; -// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", - "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; -def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", - "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; -def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", - "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, - Requires<[HasNEON, HasFullFP16]>; -def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", - "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, - Requires<[HasNEON, HasFullFP16]>; -// VTST : Vector Test Bits -defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; - -def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", - (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", - (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", - (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", - (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in { -def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", - (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", - (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", - (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", - (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; -} - -def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", - (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", - (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", - (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", - (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in { -def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", - (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", - (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", - (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; -def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", - (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; -} - -// Vector Bitwise Operations. - -def vnotd : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; -def vnotq : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; - - -// VAND : Vector Bitwise AND -def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", - v2i32, v2i32, and, 1>; -def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", - v4i32, v4i32, and, 1>; - -// VEOR : Vector Bitwise Exclusive OR -def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", - v2i32, v2i32, xor, 1>; -def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", - v4i32, v4i32, xor, 1>; - -// VORR : Vector Bitwise OR -def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", - v2i32, v2i32, or, 1>; -def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", - v4i32, v4i32, or, 1>; - -def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, - (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), - IIC_VMOVImm, - "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", - [(set DPR:$Vd, - (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, - (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), - IIC_VMOVImm, - "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", - [(set DPR:$Vd, - (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { - let Inst{10-9} = SIMM{10-9}; -} - -def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, - (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), - IIC_VMOVImm, - "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", - [(set QPR:$Vd, - (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, - (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), - IIC_VMOVImm, - "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", - [(set QPR:$Vd, - (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { - let Inst{10-9} = SIMM{10-9}; -} - - -// VBIC : Vector Bitwise Bit Clear (AND NOT) -let TwoOperandAliasConstraint = "$Vn = $Vd" in { -def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, - "vbic", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v2i32 (and DPR:$Vn, - (vnotd DPR:$Vm))))]>; -def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, - "vbic", "$Vd, $Vn, $Vm", "", - [(set QPR:$Vd, (v4i32 (and QPR:$Vn, - (vnotq QPR:$Vm))))]>; -} - -def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, - (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), - IIC_VMOVImm, - "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", - [(set DPR:$Vd, - (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, - (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), - IIC_VMOVImm, - "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", - [(set DPR:$Vd, - (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { - let Inst{10-9} = SIMM{10-9}; -} - -def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, - (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), - IIC_VMOVImm, - "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", - [(set QPR:$Vd, - (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, - (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), - IIC_VMOVImm, - "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", - [(set QPR:$Vd, - (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { - let Inst{10-9} = SIMM{10-9}; -} - -// VORN : Vector Bitwise OR NOT -def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, - "vorn", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v2i32 (or DPR:$Vn, - (vnotd DPR:$Vm))))]>; -def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, - "vorn", "$Vd, $Vn, $Vm", "", - [(set QPR:$Vd, (v4i32 (or QPR:$Vn, - (vnotq QPR:$Vm))))]>; - -// VMVN : Vector Bitwise NOT (Immediate) - -let isReMaterializable = 1 in { - -def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nImmSplatI16:$SIMM), IIC_VMOVImm, - "vmvn", "i16", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nImmSplatI16:$SIMM), IIC_VMOVImm, - "vmvn", "i16", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, - "vmvn", "i32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { - let Inst{11-8} = SIMM{11-8}; -} - -def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, - "vmvn", "i32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { - let Inst{11-8} = SIMM{11-8}; -} -} - -// VMVN : Vector Bitwise NOT -def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, - "vmvn", "$Vd, $Vm", "", - [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; -def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, - "vmvn", "$Vd, $Vm", "", - [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; -def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; - -// VBSL : Vector Bitwise Select -def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - N3RegFrm, IIC_VCNTiD, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, - (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; -def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), - (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), - (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), - (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), - (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), - (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; - -def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), - (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; - -def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), - (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON]>; - -def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - N3RegFrm, IIC_VCNTiQ, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, - (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; - -def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), - (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), - (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), - (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), - (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), - (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; - -def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), - (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; -def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), - (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON]>; - -// VBIF : Vector Bitwise Insert if False -// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", -// FIXME: This instruction's encoding MAY NOT BE correct. -def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, - (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - N3RegFrm, IIC_VBINiD, - "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; -def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, - (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - N3RegFrm, IIC_VBINiQ, - "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; - -// VBIT : Vector Bitwise Insert if True -// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", -// FIXME: This instruction's encoding MAY NOT BE correct. -def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, - (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - N3RegFrm, IIC_VBINiD, - "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; -def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, - (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - N3RegFrm, IIC_VBINiQ, - "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; - -// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking -// for equivalent operations with different register constraints; it just -// inserts copies. - -// Vector Absolute Differences. - -// VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 1>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 1>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; -def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, - Requires<[HasNEON, HasFullFP16]>; - -// VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabds, zext, 1>; -defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdu, zext, 1>; - -def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), - (VABDLuv8i16 DPR:$opA, DPR:$opB)>; -def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), - (VABDLuv4i32 DPR:$opA, DPR:$opB)>; - -// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the -// shift/xor pattern for ABS. - -def abd_shr : - PatFrag<(ops node:$in1, node:$in2, node:$shift), - (NEONvshrs (sub (zext node:$in1), - (zext node:$in2)), (i32 $shift))>; - -def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), - (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), - (zext (v2i32 DPR:$opB))), - (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), - (VABDLuv2i64 DPR:$opA, DPR:$opB)>; - -// VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabds, add>; -defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabdu, add>; - -// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, - "vabal", "s", int_arm_neon_vabds, zext, add>; -defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, - "vabal", "u", int_arm_neon_vabdu, zext, add>; - -// Vector Maximum and Minimum. - -// VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "s", smax, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "u", umax, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, - "vmax", "f32", - v2f32, v2f32, fmaxnan, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, - "vmax", "f32", - v4f32, v4f32, fmaxnan, 1>; -def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, - "vmax", "f16", - v4f16, v4f16, fmaxnan, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, - "vmax", "f16", - v8f16, v8f16, fmaxnan, 1>, - Requires<[HasNEON, HasFullFP16]>; - -// VMAXNM -let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, - N3RegFrm, NoItinerary, "vmaxnm", "f32", - v2f32, v2f32, fmaxnum, 1>, - Requires<[HasV8, HasNEON]>; - def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, - N3RegFrm, NoItinerary, "vmaxnm", "f32", - v4f32, v4f32, fmaxnum, 1>, - Requires<[HasV8, HasNEON]>; - def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, - N3RegFrm, NoItinerary, "vmaxnm", "f16", - v4f16, v4f16, fmaxnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, - N3RegFrm, NoItinerary, "vmaxnm", "f16", - v8f16, v8f16, fmaxnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; -} - -// VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "s", smin, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, - IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "u", umin, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, - "vmin", "f32", - v2f32, v2f32, fminnan, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, - "vmin", "f32", - v4f32, v4f32, fminnan, 1>; -def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, - "vmin", "f16", - v4f16, v4f16, fminnan, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, - "vmin", "f16", - v8f16, v8f16, fminnan, 1>, - Requires<[HasNEON, HasFullFP16]>; - -// VMINNM -let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, - N3RegFrm, NoItinerary, "vminnm", "f32", - v2f32, v2f32, fminnum, 1>, - Requires<[HasV8, HasNEON]>; - def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, - N3RegFrm, NoItinerary, "vminnm", "f32", - v4f32, v4f32, fminnum, 1>, - Requires<[HasV8, HasNEON]>; - def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, - N3RegFrm, NoItinerary, "vminnm", "f16", - v4f16, v4f16, fminnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, - N3RegFrm, NoItinerary, "vminnm", "f16", - v8f16, v8f16, fminnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; -} - -// Vector Pairwise Operations. - -// VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, - "vpadd", "i8", - v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, - "vpadd", "i16", - v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, - "vpadd", "i32", - v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, - IIC_VPBIND, "vpadd", "f32", - v2f32, v2f32, int_arm_neon_vpadd, 0>; -def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, - IIC_VPBIND, "vpadd", "f16", - v4f16, v4f16, int_arm_neon_vpadd, 0>, - Requires<[HasNEON, HasFullFP16]>; - -// VPADDL : Vector Pairwise Add Long -defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", - int_arm_neon_vpaddls>; -defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", - int_arm_neon_vpaddlu>; - -// VPADAL : Vector Pairwise Add and Accumulate Long -defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", - int_arm_neon_vpadals>; -defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", - int_arm_neon_vpadalu>; - -// VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", - "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", - "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; -def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", - "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, - Requires<[HasNEON, HasFullFP16]>; - -// VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", - "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", - "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; -def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", - "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, - Requires<[HasNEON, HasFullFP16]>; - -// Vector Reciprocal and Reciprocal Square Root Estimate and Step. - -// VRECPE : Vector Reciprocal Estimate -def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAD, "vrecpe", "u32", - v2i32, v2i32, int_arm_neon_vrecpe>; -def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAQ, "vrecpe", "u32", - v4i32, v4i32, int_arm_neon_vrecpe>; -def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAD, "vrecpe", "f32", - v2f32, v2f32, int_arm_neon_vrecpe>; -def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAQ, "vrecpe", "f32", - v4f32, v4f32, int_arm_neon_vrecpe>; -def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, - IIC_VUNAD, "vrecpe", "f16", - v4f16, v4f16, int_arm_neon_vrecpe>, - Requires<[HasNEON, HasFullFP16]>; -def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, - IIC_VUNAQ, "vrecpe", "f16", - v8f16, v8f16, int_arm_neon_vrecpe>, - Requires<[HasNEON, HasFullFP16]>; - -// VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, - IIC_VRECSD, "vrecps", "f32", - v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, - IIC_VRECSQ, "vrecps", "f32", - v4f32, v4f32, int_arm_neon_vrecps, 1>; -def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, - IIC_VRECSD, "vrecps", "f16", - v4f16, v4f16, int_arm_neon_vrecps, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, - IIC_VRECSQ, "vrecps", "f16", - v8f16, v8f16, int_arm_neon_vrecps, 1>, - Requires<[HasNEON, HasFullFP16]>; - -// VRSQRTE : Vector Reciprocal Square Root Estimate -def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAD, "vrsqrte", "u32", - v2i32, v2i32, int_arm_neon_vrsqrte>; -def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAQ, "vrsqrte", "u32", - v4i32, v4i32, int_arm_neon_vrsqrte>; -def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAD, "vrsqrte", "f32", - v2f32, v2f32, int_arm_neon_vrsqrte>; -def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAQ, "vrsqrte", "f32", - v4f32, v4f32, int_arm_neon_vrsqrte>; -def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, - IIC_VUNAD, "vrsqrte", "f16", - v4f16, v4f16, int_arm_neon_vrsqrte>, - Requires<[HasNEON, HasFullFP16]>; -def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, - IIC_VUNAQ, "vrsqrte", "f16", - v8f16, v8f16, int_arm_neon_vrsqrte>, - Requires<[HasNEON, HasFullFP16]>; - -// VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, - IIC_VRECSD, "vrsqrts", "f32", - v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, - IIC_VRECSQ, "vrsqrts", "f32", - v4f32, v4f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, - IIC_VRECSD, "vrsqrts", "f16", - v4f16, v4f16, int_arm_neon_vrsqrts, 1>, - Requires<[HasNEON, HasFullFP16]>; -def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, - IIC_VRECSQ, "vrsqrts", "f16", - v8f16, v8f16, int_arm_neon_vrsqrts, 1>, - Requires<[HasNEON, HasFullFP16]>; - -// Vector Shifts. - -// VSHL : Vector Shift -defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, - IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, - "vshl", "s", int_arm_neon_vshifts>; -defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, - IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, - "vshl", "u", int_arm_neon_vshiftu>; - -// VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; - -// VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", - NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", - NEONvshru>; - -// VSHLL : Vector Shift Left Long -defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", - PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; -defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", - PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; - -// VSHLL : Vector Shift Left Long (with maximum shift count) -class N2VLShMax op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, Operand ImmTy> - : N2VLSh { - let Inst{21-16} = op21_16; - let DecoderMethod = "DecodeVSHLMaxInstruction"; -} -def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, imm8>; -def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, imm16>; -def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, imm32>; - -def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), - (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), - (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), - (VSHLLi32 DPR:$Rn, 32)>; -def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), - (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), - (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), - (VSHLLi32 DPR:$Rn, 32)>; -def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))), - (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))), - (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))), - (VSHLLi32 DPR:$Rn, 32)>; - -// VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", - PatFrag<(ops node:$Rn, node:$amt), - (trunc (NEONvshrs node:$Rn, node:$amt))>>; - -def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), - (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; -def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), - (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; -def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), - (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; - -// VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vrshl", "s", int_arm_neon_vrshifts>; -defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vrshl", "u", int_arm_neon_vrshiftu>; -// VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", - NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", - NEONvrshru>; - -// VRSHRN : Vector Rounding Shift Right and Narrow -defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", - NEONvrshrn>; - -// VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqshl", "s", int_arm_neon_vqshifts>; -defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqshl", "u", int_arm_neon_vqshiftu>; -// VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; -defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; - -// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; - -// VQSHRN : Vector Saturating Shift Right and Narrow -defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", - NEONvqshrns>; -defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", - NEONvqshrnu>; - -// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", - NEONvqshrnsu>; - -// VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqrshl", "s", int_arm_neon_vqrshifts>; -defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, - IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqrshl", "u", int_arm_neon_vqrshiftu>; - -// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", - NEONvqrshrns>; -defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", - NEONvqrshrnu>; - -// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", - NEONvqrshrnsu>; - -// VSRA : Vector Shift Right and Accumulate -defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; -defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; -// VRSRA : Vector Rounding Shift Right and Accumulate -defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; -defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; - -// VSLI : Vector Shift Left and Insert -defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; - -// VSRI : Vector Shift Right and Insert -defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; - -// Vector Absolute and Saturating Absolute. - -// VABS : Vector Absolute Value -defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; -def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - "vabs", "f32", - v2f32, v2f32, fabs>; -def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - "vabs", "f32", - v4f32, v4f32, fabs>; -def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, - "vabs", "f16", - v4f16, v4f16, fabs>, - Requires<[HasNEON, HasFullFP16]>; -def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, - "vabs", "f16", - v8f16, v8f16, fabs>, - Requires<[HasNEON, HasFullFP16]>; - -// VQABS : Vector Saturating Absolute Value -defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", - int_arm_neon_vqabs>; - -// Vector Negate. - -def vnegd : PatFrag<(ops node:$in), - (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; -def vnegq : PatFrag<(ops node:$in), - (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; - -class VNEGD size, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), - IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", - [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; -class VNEGQ size, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), - IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", - [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; - -// VNEG : Vector Negate (integer) -def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; -def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; -def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; -def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; -def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; -def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; - -// VNEG : Vector Negate (floating-point) -def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, - "vneg", "f32", "$Vd, $Vm", "", - [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; -def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, - "vneg", "f32", "$Vd, $Vm", "", - [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; -def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, - "vneg", "f16", "$Vd, $Vm", "", - [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, - Requires<[HasNEON, HasFullFP16]>; -def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, - "vneg", "f16", "$Vd, $Vm", "", - [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, - Requires<[HasNEON, HasFullFP16]>; - -def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; -def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; -def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; -def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; -def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; -def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; - -// VQNEG : Vector Saturating Negate -defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", - int_arm_neon_vqneg>; - -// Vector Bit Counting Operations. - -// VCLS : Vector Count Leading Sign Bits -defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", - int_arm_neon_vcls>; -// VCLZ : Vector Count Leading Zeros -defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", - ctlz>; -// VCNT : Vector Count One Bits -def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, ctpop>; -def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, ctpop>; - -// Vector Swap -def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), - NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", - []>; -def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), - NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", - []>; - -// Vector Move Operations. - -// VMOV : Vector Move (Register) -def : NEONInstAlias<"vmov${p} $Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmov${p} $Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; - -// VMOV : Vector Move (Immediate) - -// Although VMOVs are not strictly speaking cheap, they are as expensive -// as their copies counterpart (VORR), so we should prefer rematerialization -// over splitting when it applies. -let isReMaterializable = 1, isAsCheapAsAMove=1 in { -def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nImmSplatI8:$SIMM), IIC_VMOVImm, - "vmov", "i8", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nImmSplatI8:$SIMM), IIC_VMOVImm, - "vmov", "i8", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; - -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nImmSplatI16:$SIMM), IIC_VMOVImm, - "vmov", "i16", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nImmSplatI16:$SIMM), IIC_VMOVImm, - "vmov", "i16", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { - let Inst{9} = SIMM{9}; -} - -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, - "vmov", "i32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { - let Inst{11-8} = SIMM{11-8}; -} - -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, - "vmov", "i32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { - let Inst{11-8} = SIMM{11-8}; -} - -def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nImmSplatI64:$SIMM), IIC_VMOVImm, - "vmov", "i64", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nImmSplatI64:$SIMM), IIC_VMOVImm, - "vmov", "i64", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; - -def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, - "vmov", "f32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; -def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, - "vmov", "f32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; -} // isReMaterializable, isAsCheapAsAMove - -// Add support for bytes replication feature, so it could be GAS compatible. -multiclass NEONImmReplicateI8InstAlias { - // E.g. instructions below: - // "vmov.i32 d0, #0xffffffff" - // "vmov.i32 d0, #0xabababab" - // "vmov.i16 d0, #0xabab" - // are incorrect, but we could deal with such cases. - // For last two instructions, for example, it should emit: - // "vmov.i8 d0, #0xab" - def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", - (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; - def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", - (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; - // Also add same support for VMVN instructions. So instruction: - // "vmvn.i32 d0, #0xabababab" - // actually means: - // "vmov.i8 d0, #0x54" - def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", - (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate:$Vm, pred:$p)>; - def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", - (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate:$Vm, pred:$p)>; -} - -defm : NEONImmReplicateI8InstAlias; -defm : NEONImmReplicateI8InstAlias; -defm : NEONImmReplicateI8InstAlias; - -// Similar to above for types other than i8, e.g.: -// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" -// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" -// In this case we do not canonicalize VMVN to VMOV -multiclass NEONImmReplicateInstAlias { - def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", - (V8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; - def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", - (V16 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; - def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", - (NV8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; - def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", - (NV16 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; -} - -defm : NEONImmReplicateInstAlias; -defm : NEONImmReplicateInstAlias; -defm : NEONImmReplicateInstAlias; -// TODO: add "VMOV <-> VMVN" conversion for cases like -// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" -// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" - -// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" -// require zero cycles to execute so they should be used wherever possible for -// setting a register to zero. - -// Even without these pseudo-insts we would probably end up with the correct -// instruction, but we could not mark the general ones with "isAsCheapAsAMove" -// since they are sometimes rather expensive (in general). - -let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, - [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], - (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, - Requires<[HasZCZ]>; - def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, - [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], - (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, - Requires<[HasZCZ]>; -} - -// VMOV : Vector Get Lane (move scalar to ARM core register) - -def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, - (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), - IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", - [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), - imm:$lane))]> { - let Inst{21} = lane{2}; - let Inst{6-5} = lane{1-0}; -} -def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, - (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), - IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", - [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), - imm:$lane))]> { - let Inst{21} = lane{1}; - let Inst{6} = lane{0}; -} -def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, - (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), - IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", - [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), - imm:$lane))]> { - let Inst{21} = lane{2}; - let Inst{6-5} = lane{1-0}; -} -def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, - (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), - IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", - [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), - imm:$lane))]> { - let Inst{21} = lane{1}; - let Inst{6} = lane{0}; -} -def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, - (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), - IIC_VMOVSI, "vmov", "32", "$R, $V$lane", - [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]>, - Requires<[HasVFP2, HasFastVGETLNi32]> { - let Inst{21} = lane{0}; -} -// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td -def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), - (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i8_reg imm:$lane))), - (SubReg_i8_lane imm:$lane))>; -def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), - (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane))>; -def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), - (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i8_reg imm:$lane))), - (SubReg_i8_lane imm:$lane))>; -def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), - (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane))>; -def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), - (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>, - Requires<[HasNEON, HasFastVGETLNi32]>; -def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), - (COPY_TO_REGCLASS - (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, - Requires<[HasNEON, HasSlowVGETLNi32]>; -def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), - (COPY_TO_REGCLASS - (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, - Requires<[HasNEON, HasSlowVGETLNi32]>; -def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), - (SSubReg_f32_reg imm:$src2))>; -def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), - (SSubReg_f32_reg imm:$src2))>; -//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), -// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; -def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; - - -// VMOV : Vector Set Lane (move ARM core register to scalar) - -let Constraints = "$src1 = $V" in { -def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), - IIC_VMOVISL, "vmov", "8", "$V$lane, $R", - [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), - GPR:$R, imm:$lane))]> { - let Inst{21} = lane{2}; - let Inst{6-5} = lane{1-0}; -} -def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), - IIC_VMOVISL, "vmov", "16", "$V$lane, $R", - [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), - GPR:$R, imm:$lane))]> { - let Inst{21} = lane{1}; - let Inst{6} = lane{0}; -} -def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), - IIC_VMOVISL, "vmov", "32", "$V$lane, $R", - [(set DPR:$V, (insertelt (v2i32 DPR:$src1), - GPR:$R, imm:$lane))]>, - Requires<[HasVFP2]> { - let Inst{21} = lane{0}; - // This instruction is equivalent as - // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) - let isInsertSubreg = 1; -} -} -def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, - (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i8_reg imm:$lane))), - GPR:$src2, (SubReg_i8_lane imm:$lane))), - (DSubReg_i8_reg imm:$lane)))>; -def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, - (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i16_reg imm:$lane))), - GPR:$src2, (SubReg_i16_lane imm:$lane))), - (DSubReg_i16_reg imm:$lane)))>; -def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i32_reg imm:$lane))), - GPR:$src2, (SubReg_i32_lane imm:$lane))), - (DSubReg_i32_reg imm:$lane)))>; - -def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), - (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), - SPR:$src2, (SSubReg_f32_reg imm:$src3))>; -def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), - (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), - SPR:$src2, (SSubReg_f32_reg imm:$src3))>; - -//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), -// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; -def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), - (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; - -def : Pat<(v2f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; -def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; -def : Pat<(v4f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; - -def : Pat<(v8i8 (scalar_to_vector GPR:$src)), - (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; -def : Pat<(v4i16 (scalar_to_vector GPR:$src)), - (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; -def : Pat<(v2i32 (scalar_to_vector GPR:$src)), - (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; - -def : Pat<(v16i8 (scalar_to_vector GPR:$src)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - dsub_0)>; -def : Pat<(v8i16 (scalar_to_vector GPR:$src)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - dsub_0)>; -def : Pat<(v4i32 (scalar_to_vector GPR:$src)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - dsub_0)>; - -// VDUP : Vector Duplicate (from ARM core register to all elements) - -class VDUPD opcod1, bits<2> opcod3, string Dt, ValueType Ty> - : NVDup; -class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> - : NVDup; - -def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; -def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, - Requires<[HasNEON, HasFastVDUP32]>; -def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; -def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; -def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; - -// NEONvdup patterns for uarchs with fast VDUP.32. -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, - Requires<[HasNEON,HasFastVDUP32]>; -def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; - -// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. -def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, - Requires<[HasNEON,HasSlowVDUP32]>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, - Requires<[HasNEON,HasSlowVDUP32]>; - -// VDUP : Vector Duplicate Lane (from scalar to all elements) - -class VDUPLND op19_16, string OpcodeStr, string Dt, - ValueType Ty, Operand IdxTy> - : NVDupLane; - -class VDUPLNQ op19_16, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Operand IdxTy> - : NVDupLane; - -// Inst{19-16} is partially specified depending on the element size. - -def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { - bits<3> lane; - let Inst{19-17} = lane{2-0}; -} -def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { - bits<2> lane; - let Inst{19-18} = lane{1-0}; -} -def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { - bits<1> lane; - let Inst{19} = lane{0}; -} -def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { - bits<3> lane; - let Inst{19-17} = lane{2-0}; -} -def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { - bits<2> lane; - let Inst{19-18} = lane{1-0}; -} -def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { - bits<1> lane; - let Inst{19} = lane{0}; -} - -def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), - (VDUPLN32d DPR:$Vm, imm:$lane)>; - -def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), - (VDUPLN32q DPR:$Vm, imm:$lane)>; - -def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), - (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i8_reg imm:$lane))), - (SubReg_i8_lane imm:$lane)))>; -def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), - (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i16_reg imm:$lane))), - (SubReg_i16_lane imm:$lane)))>; -def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), - (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; -def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), - (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, - (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; - -def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), - (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$src, ssub_0), (i32 0)))>; -def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), - (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$src, ssub_0), (i32 0)))>; - -// VMOVN : Vector Narrowing Move -defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, - "vmovn", "i", trunc>; -// VQMOVN : Vector Saturating Narrowing Move -defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, - "vqmovn", "s", int_arm_neon_vqmovns>; -defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, - "vqmovn", "u", int_arm_neon_vqmovnu>; -defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, - "vqmovun", "s", int_arm_neon_vqmovnsu>; -// VMOVL : Vector Lengthening Move -defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; -defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; -def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; -def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; -def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; - -// Vector Conversions. - -// VCVT : Vector Convert Between Floating-Point and Integers -def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; - -def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v4i32, v4f32, fp_to_sint>; -def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v4i32, v4f32, fp_to_uint>; -def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v4f32, v4i32, sint_to_fp>; -def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v4f32, v4i32, uint_to_fp>; - -def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", - v4i16, v4f16, fp_to_sint>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", - v4i16, v4f16, fp_to_uint>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", - v4f16, v4i16, sint_to_fp>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", - v4f16, v4i16, uint_to_fp>, - Requires<[HasNEON, HasFullFP16]>; - -def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", - v8i16, v8f16, fp_to_sint>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", - v8i16, v8f16, fp_to_uint>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", - v8f16, v8i16, sint_to_fp>, - Requires<[HasNEON, HasFullFP16]>; -def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", - v8f16, v8i16, uint_to_fp>, - Requires<[HasNEON, HasFullFP16]>; - -// VCVT{A, N, P, M} -multiclass VCVT_FPI op10_8, SDPatternOperator IntS, - SDPatternOperator IntU> { - let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), - "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; - def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), - "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; - def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), - "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; - def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), - "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; - def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), - "s16.f16", v4i16, v4f16, IntS>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), - "s16.f16", v8i16, v8f16, IntS>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), - "u16.f16", v4i16, v4f16, IntU>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), - "u16.f16", v8i16, v8f16, IntU>, - Requires<[HasV8, HasNEON, HasFullFP16]>; - } -} - -defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; -defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; -defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; -defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; - -// VCVT : Vector Convert Between Floating-Point and Fixed-Point. -let DecoderMethod = "DecodeVCVTD" in { -def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", - v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", - v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", - v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", - v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -let Predicates = [HasNEON, HasFullFP16] in { -def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", - v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; -def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", - v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", - v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", - v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; -} // Predicates = [HasNEON, HasFullFP16] -} - -let DecoderMethod = "DecodeVCVTQ" in { -def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", - v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", - v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", - v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", - v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; -let Predicates = [HasNEON, HasFullFP16] in { -def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", - v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; -def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", - v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", - v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", - v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; -} // Predicates = [HasNEON, HasFullFP16] -} - -def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", - (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", - (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", - (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", - (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", - (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", - (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", - (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", - (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; - -def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", - (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", - (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", - (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", - (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", - (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", - (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", - (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", - (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; - - -// VCVT : Vector Convert Between Half-Precision and Single-Precision. -def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, - IIC_VUNAQ, "vcvt", "f16.f32", - v4i16, v4f32, int_arm_neon_vcvtfp2hf>, - Requires<[HasNEON, HasFP16]>; -def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, - IIC_VUNAQ, "vcvt", "f32.f16", - v4f32, v4i16, int_arm_neon_vcvthf2fp>, - Requires<[HasNEON, HasFP16]>; - -// Vector Reverse. - -// VREV64 : Vector Reverse elements within 64-bit doublewords - -class VREV64D op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), - (ins DPR:$Vm), IIC_VMOVD, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; -class VREV64Q op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), - (ins QPR:$Vm), IIC_VMOVQ, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; - -def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; -def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; -def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; -def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; - -def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; -def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; -def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; -def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; - -// VREV32 : Vector Reverse elements within 32-bit words - -class VREV32D op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), - (ins DPR:$Vm), IIC_VMOVD, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; -class VREV32Q op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), - (ins QPR:$Vm), IIC_VMOVQ, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; - -def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; -def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; - -def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; -def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; - -// VREV16 : Vector Reverse elements within 16-bit halfwords - -class VREV16D op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), - (ins DPR:$Vm), IIC_VMOVD, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; -class VREV16Q op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), - (ins QPR:$Vm), IIC_VMOVQ, - OpcodeStr, Dt, "$Vd, $Vm", "", - [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; - -def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; -def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; - -// Other Vector Shuffles. - -// Aligned extractions: really just dropping registers - -class AlignedVEXTq - : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), - (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; - -def : AlignedVEXTq; - -def : AlignedVEXTq; - -def : AlignedVEXTq; - -def : AlignedVEXTq; - -def : AlignedVEXTq; - - -// VEXT : Vector Extract - - -// All of these have a two-operand InstAlias. -let TwoOperandAliasConstraint = "$Vn = $Vd" in { -class VEXTd - : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, - IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", - [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { - bits<3> index; - let Inst{11} = 0b0; - let Inst{10-8} = index{2-0}; -} - -class VEXTq - : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, - IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", - [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; -} -} - -def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{10-8} = index{2-0}; -} -def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{10-9} = index{1-0}; - let Inst{8} = 0b0; -} -def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{10} = index{0}; - let Inst{9-8} = 0b00; -} -def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), - (v2f32 DPR:$Vm), - (i32 imm:$index))), - (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; - -def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { - let Inst{11-8} = index{3-0}; -} -def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { - let Inst{11-9} = index{2-0}; - let Inst{8} = 0b0; -} -def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { - let Inst{11-10} = index{1-0}; - let Inst{9-8} = 0b00; -} -def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { - let Inst{11} = index{0}; - let Inst{10-8} = 0b000; -} -def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), - (v4f32 QPR:$Vm), - (i32 imm:$index))), - (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; - -// VTRN : Vector Transpose - -def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; -def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; -def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; - -def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; -def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; -def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; - -// VUZP : Vector Unzip (Deinterleave) - -def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; -def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. -def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", - (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; - -def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; -def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; -def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; - -// VZIP : Vector Zip (Interleave) - -def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; -def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. -def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", - (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; - -def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; -def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; -def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; - -// Vector Table Lookup and Table Extension. - -// VTBL : Vector Table Lookup -let DecoderMethod = "DecodeTBLInstruction" in { -def VTBL1 - : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), - (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, - "vtbl", "8", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; - -let hasExtraSrcRegAllocReq = 1 in { -def VTBL2 - : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; -def VTBL3 - : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; -def VTBL4 - : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins VecListFourD:$Vn, DPR:$Vm), - NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; -} // hasExtraSrcRegAllocReq = 1 - -def VTBL3Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; -def VTBL4Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; - -// VTBX : Vector Table Extension -def VTBX1 - : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, - "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 - DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; -let hasExtraSrcRegAllocReq = 1 in { -def VTBX2 - : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; -def VTBX3 - : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), - NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, $Vn, $Vm", - "$orig = $Vd", []>; -def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, $Vn, $Vm", - "$orig = $Vd", []>; -} // hasExtraSrcRegAllocReq = 1 - -def VTBX3Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), - IIC_VTBX3, "$orig = $dst", []>; -def VTBX4Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), - IIC_VTBX4, "$orig = $dst", []>; -} // DecoderMethod = "DecodeTBLInstruction" - -def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), - (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1), - v8i8:$Vm))>; -def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, - v8i8:$Vm)), - (v8i8 (VTBX2 v8i8:$orig, - (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1), - v8i8:$Vm))>; - -def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, - v8i8:$Vn2, v8i8:$Vm)), - (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1, - v8i8:$Vn2, dsub_2, - (v8i8 (IMPLICIT_DEF)), dsub_3), - v8i8:$Vm))>; -def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, - v8i8:$Vn2, v8i8:$Vm)), - (v8i8 (VTBX3Pseudo v8i8:$orig, - (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1, - v8i8:$Vn2, dsub_2, - (v8i8 (IMPLICIT_DEF)), dsub_3), - v8i8:$Vm))>; - -def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, - v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), - (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1, - v8i8:$Vn2, dsub_2, - v8i8:$Vn3, dsub_3), - v8i8:$Vm))>; -def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, - v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), - (v8i8 (VTBX4Pseudo v8i8:$orig, - (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, - v8i8:$Vn1, dsub_1, - v8i8:$Vn2, dsub_2, - v8i8:$Vn3, dsub_3), - v8i8:$Vm))>; - -// VRINT : Vector Rounding -multiclass VRINT_FPI op9_7, SDPatternOperator Int> { - let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, - !strconcat("vrint", op), "f32", - v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { - let Inst{9-7} = op9_7; - } - def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, - !strconcat("vrint", op), "f32", - v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { - let Inst{9-7} = op9_7; - } - def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, - !strconcat("vrint", op), "f16", - v4f16, v4f16, Int>, - Requires<[HasV8, HasNEON, HasFullFP16]> { - let Inst{9-7} = op9_7; - } - def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, - !strconcat("vrint", op), "f16", - v8f16, v8f16, Int>, - Requires<[HasV8, HasNEON, HasFullFP16]> { - let Inst{9-7} = op9_7; - } - } - - def : NEONInstAlias(NAME#"Df") DPR:$Dd, DPR:$Dm)>; - def : NEONInstAlias(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; - let Predicates = [HasNEON, HasFullFP16] in { - def : NEONInstAlias(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; - def : NEONInstAlias(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; - } -} - -defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; -defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; -defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; -defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; -defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; -defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; - -// Cryptography instructions -let PostEncoderMethod = "NEONThumb2DataIPostEncoder", - DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { - class AES - : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, - !strconcat("aes", op), "8", v16i8, v16i8, Int>, - Requires<[HasV8, HasCrypto]>; - class AES2Op - : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, - !strconcat("aes", op), "8", v16i8, v16i8, Int>, - Requires<[HasV8, HasCrypto]>; - class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, - SDPatternOperator Int> - : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, - !strconcat("sha", op), "32", v4i32, v4i32, Int>, - Requires<[HasV8, HasCrypto]>; - class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, - SDPatternOperator Int> - : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, - !strconcat("sha", op), "32", v4i32, v4i32, Int>, - Requires<[HasV8, HasCrypto]>; - class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> - : N3VQInt3np, - Requires<[HasV8, HasCrypto]>; -} - -def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; -def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; -def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; -def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; - -def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; -def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; -def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; -def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; -def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; -def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; -def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; -def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; -def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; -def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; - -def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), - (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG - (SHA1H (SUBREG_TO_REG (i64 0), - (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), - ssub_0)), - ssub_0)), GPR)>; - -def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), - (SHA1C v4i32:$hash_abcd, - (SUBREG_TO_REG (i64 0), - (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), - ssub_0), - v4i32:$wk)>; - -def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), - (SHA1M v4i32:$hash_abcd, - (SUBREG_TO_REG (i64 0), - (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), - ssub_0), - v4i32:$wk)>; - -def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), - (SHA1P v4i32:$hash_abcd, - (SUBREG_TO_REG (i64 0), - (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), - ssub_0), - v4i32:$wk)>; - -//===----------------------------------------------------------------------===// -// NEON instructions for single-precision FP math -//===----------------------------------------------------------------------===// - -class N2VSPat - : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG - (v2f32 (COPY_TO_REGCLASS (Inst - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; - -class N3VSPat - : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG - (v2f32 (COPY_TO_REGCLASS (Inst - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$a, ssub_0), - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; - -class N3VSPatFP16 - : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), - (EXTRACT_SUBREG - (v4f16 (COPY_TO_REGCLASS (Inst - (INSERT_SUBREG - (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), - HPR:$a, ssub_0), - (INSERT_SUBREG - (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), - HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; - -class N3VSMulOpPat - : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG - (v2f32 (COPY_TO_REGCLASS (Inst - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$acc, ssub_0), - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$a, ssub_0), - (INSERT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), - SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; - -class NVCVTIFPat - : NEONFPPat<(f32 (OpNode GPR:$a)), - (f32 (EXTRACT_SUBREG - (v2f32 (Inst - (INSERT_SUBREG - (v2f32 (IMPLICIT_DEF)), - (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), - ssub_0))>; -class NVCVTFIPat - : NEONFPPat<(i32 (OpNode SPR:$a)), - (i32 (EXTRACT_SUBREG - (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, ssub_0))), - ssub_0))>; - -def : N3VSPat; -def : N3VSPat; -def : N3VSPat; -def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; -def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; -def : N3VSMulOpPat, - Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; -def : N3VSMulOpPat, - Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; -def : N2VSPat; -def : N2VSPat; -def : N3VSPatFP16, Requires<[HasFullFP16]>; -def : N3VSPatFP16, Requires<[HasFullFP16]>; -def : N3VSPat, Requires<[HasNEON]>; -def : N3VSPat, Requires<[HasNEON]>; -def : NVCVTFIPat; -def : NVCVTFIPat; -def : NVCVTIFPat; -def : NVCVTIFPat; - -// NEON doesn't have any f64 conversions, so provide patterns to make -// sure the VFP conversions match when extracting from a vector. -def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), - (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; -def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), - (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; -def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), - (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; -def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), - (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; - - -// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. -def : Pat<(f32 (bitconvert GPR:$a)), - (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, - Requires<[HasNEON, DontUseVMOVSR]>; -def : Pat<(arm_vmovsr GPR:$a), - (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, - Requires<[HasNEON, DontUseVMOVSR]>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// bit_convert -let Predicates = [IsLE] in { - def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; - def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; - def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; -} -def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; -} -def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; -} -def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; - def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; - def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; - def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; - def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; - def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; -} -def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; -} - -let Predicates = [IsLE] in { - def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; -} -def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; -} -def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; -} -def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; -} -def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; -let Predicates = [IsLE] in { - def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; -} - -let Predicates = [IsBE] in { - // 64 bit conversions - def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; - def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; - def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; - def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; - def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; - def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; - def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; - def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; - def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; - def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; - def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; - - // 128 bit conversions - def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; -} - -// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian -def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), - (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; -def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>; -def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), - (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>; -def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>; - -// Fold extracting an element out of a v2i32 into a vfp register. -def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), - (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; - -// Vector lengthening move with load, matching extending loads. - -// extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "8"> = -// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) -// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, -// (f64 (IMPLICIT_DEF)), (i32 0)))>; -multiclass Lengthen_Single { - let AddedComplexity = 10 in { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadvi" # SrcTy) addrmode6:$addr)), - (!cast("VMOVLuv" # DestLanes # DestTy) - (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; - - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), - (!cast("VMOVLuv" # DestLanes # DestTy) - (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; - - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), - (!cast("VMOVLsv" # DestLanes # DestTy) - (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; - } -} - -// extload, zextload and sextload for a lengthening load which only uses -// half the lanes available. Example: -// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = -// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, -// (f64 (IMPLICIT_DEF)), (i32 0))), -// dsub_0)>; -multiclass Lengthen_HalfSingle { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)>; -} - -// The following class definition is basically a copy of the -// Lengthen_HalfSingle definition above, however with an additional parameter -// "RevLanes" to select the correct VREV32dXX instruction. This is to convert -// data loaded by VLD1LN into proper vector format in big endian mode. -multiclass Lengthen_HalfSingle_Big_Endian { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)>; -} - -// extload, zextload and sextload for a lengthening load followed by another -// lengthening load, to quadruple the initial length. -// -// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = -// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) -// (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, -// (f64 (IMPLICIT_DEF)), -// (i32 0))), -// dsub_0)), -// dsub_0)>; -multiclass Lengthen_Double { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0))>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0))>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0))>; -} - -// The following class definition is basically a copy of the -// Lengthen_Double definition above, however with an additional parameter -// "RevLanes" to select the correct VREV32dXX instruction. This is to convert -// data loaded by VLD1LN into proper vector format in big endian mode. -multiclass Lengthen_Double_Big_Endian { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0))>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0))>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), - (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (!cast("VREV32d" # RevLanes) - (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0))>; -} - -// extload, zextload and sextload for a lengthening load followed by another -// lengthening load, to quadruple the initial length, but which ends up only -// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). -// -// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = -// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) -// (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, -// (f64 (IMPLICIT_DEF)), (i32 0))), -// dsub_0)), -// dsub_0)>; -multiclass Lengthen_HalfDouble { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)), - dsub_0)>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)), - dsub_0)>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), - dsub_0)), - dsub_0)>; -} - -// The following class definition is basically a copy of the -// Lengthen_HalfDouble definition above, however with an additional VREV16d8 -// instruction to convert data loaded by VLD1LN into proper vector format -// in big endian mode. -multiclass Lengthen_HalfDouble_Big_Endian { - def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)), - dsub_0)>; - def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)), - dsub_0)>; - def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode6:$addr)), - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) - (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), - dsub_0)), - dsub_0)>; -} - -defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 -defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 -defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 - -let Predicates = [IsLE] in { - defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 - defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 - - // Double lengthening - v4i8 -> v4i16 -> v4i32 - defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; - // v2i8 -> v2i16 -> v2i32 - defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; - // v2i16 -> v2i32 -> v2i64 - defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; -} - -let Predicates = [IsBE] in { - defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 - defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 - - // Double lengthening - v4i8 -> v4i16 -> v4i32 - defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; - // v2i8 -> v2i16 -> v2i32 - defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; - // v2i16 -> v2i32 -> v2i64 - defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; -} - -// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -let Predicates = [IsLE] in { - def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; - def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; - def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), - (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -} -// The following patterns are basically a copy of the patterns above, -// however with an additional VREV16d instruction to convert data -// loaded by VLD1LN into proper vector format in big endian mode. -let Predicates = [IsBE] in { - def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; - def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; - def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), - (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (!cast("VREV16d8") - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; -} - -def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), - (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; -def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), - (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; -def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), - (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; -def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), - (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; -def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), - (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; - -//===----------------------------------------------------------------------===// -// Assembler aliases -// - -def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", - (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; -def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", - (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; - -// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. -defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", - (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", - (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", - (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", - (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", - (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", - (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", - (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", - (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -// ... two-operand aliases -defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", - (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", - (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", - (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", - (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", - (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", - (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// ... immediates -def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", - (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; -def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", - (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; -def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", - (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; -def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", - (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; - - -// VLD1 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; - -def VLD1LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD1LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VLD1LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD1LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD1LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - rGPR:$Rm, pred:$p)>; -def VLD1LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; - - -// VST1 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; - -def VST1LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST1LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VST1LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST1LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VST1LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, - rGPR:$Rm, pred:$p)>; -def VST1LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; - -// VLD2 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; -def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; - -def VLD2LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VLD2LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD2LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD2LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD2LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD2LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - rGPR:$Rm, pred:$p)>; -def VLD2LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VLD2LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD2LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VLD2LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; - - -// VST2 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; - -def VST2LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - pred:$p)>; -def VST2LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST2LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST2LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST2LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST2LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, - rGPR:$Rm, pred:$p)>; -def VST2LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VST2LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST2LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VST2LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; - -// VLD3 all-lanes pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; - -def VLD3DUPdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - pred:$p)>; -def VLD3DUPdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3DUPdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3DUPdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3DUPqWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3DUPqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3DUPqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, - rGPR:$Rm, pred:$p)>; - - -// VLD3 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; - -def VLD3LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VLD3LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, - addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; -def VLD3LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, - addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; -def VLD3LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; - -// VLD3 multiple structure pseudo-instructions. These need special handling for -// the vector operands that the normal instructions don't yet model. -// FIXME: Remove these when the register classes and instructions are updated. -def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; - -def VLD3dWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3dWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3dWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3qWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VLD3dWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3dWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3dWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3qWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3qWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD3qWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; - -// VST3 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; - -def VST3LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - pred:$p)>; -def VST3LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VST3LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, - addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; -def VST3LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; -def VST3LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, - addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; -def VST3LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, - rGPR:$Rm, pred:$p)>; - - -// VST3 multiple structure pseudo-instructions. These need special handling for -// the vector operands that the normal instructions don't yet model. -// FIXME: Remove these when the register classes and instructions are updated. -def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; - -def VST3dWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3dWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3dWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VST3qWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; -def VST3dWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST3dWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST3dWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST3qWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST3qWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST3qWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; - -// VLD4 all-lanes pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, - pred:$p)>; -def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, - pred:$p)>; -def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, - pred:$p)>; -def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, - pred:$p)>; -def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, - pred:$p)>; -def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, - pred:$p)>; - -def VLD4DUPdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, - pred:$p)>; -def VLD4DUPdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, - pred:$p)>; -def VLD4DUPdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, - pred:$p)>; -def VLD4DUPqWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, - pred:$p)>; -def VLD4DUPqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, - pred:$p)>; -def VLD4DUPqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, - pred:$p)>; -def VLD4DUPdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4DUPdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4DUPdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, - addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; -def VLD4DUPqWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4DUPqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4DUPqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, - addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; - - -// VLD4 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; - -def VLD4LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VLD4LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD4LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VLD4LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VLD4LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VLD4LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, - addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; -def VLD4LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, - addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; - - - -// VLD4 multiple structure pseudo-instructions. These need special handling for -// the vector operands that the normal instructions don't yet model. -// FIXME: Remove these when the register classes and instructions are updated. -def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; - -def VLD4dWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4dWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4dWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4qWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VLD4dWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4dWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4dWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4qWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4qWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VLD4qWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; - -// VST4 single-lane pseudo-instructions. These need special handling for -// the lane index that an InstAlias can't handle, so we use these instead. -def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; - -def VST4LNdWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - pred:$p)>; -def VST4LNdWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST4LNdWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VST4LNqWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - pred:$p)>; -def VST4LNqWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, - pred:$p)>; -def VST4LNdWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, - rGPR:$Rm, pred:$p)>; -def VST4LNdWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST4LNdWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, - addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; -def VST4LNqWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, - rGPR:$Rm, pred:$p)>; -def VST4LNqWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, - addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; - - -// VST4 multiple structure pseudo-instructions. These need special handling for -// the vector operands that the normal instructions don't yet model. -// FIXME: Remove these when the register classes and instructions are updated. -def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; - -def VST4dWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4dWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4dWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qWB_fixed_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qWB_fixed_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4qWB_fixed_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - pred:$p)>; -def VST4dWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VST4dWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VST4dWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VST4qWB_register_Asm_8 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VST4qWB_register_Asm_16 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; -def VST4qWB_register_Asm_32 : - NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, - rGPR:$Rm, pred:$p)>; - -// VMOV/VMVN takes an optional datatype suffix -defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; - -defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", - (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", - (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; - -// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. -// D-register versions. -def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", - (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", - (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", - (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", - (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", - (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", - (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", - (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in -def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", - (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -// Q-register versions. -def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", - (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", - (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", - (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", - (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", - (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", - (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", - (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in -def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", - (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; - -// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. -// D-register versions. -def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", - (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", - (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", - (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", - (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", - (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", - (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", - (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in -def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", - (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; -// Q-register versions. -def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", - (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", - (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", - (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", - (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", - (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", - (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", - (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -let Predicates = [HasNEON, HasFullFP16] in -def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", - (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; - -// VSWP allows, but does not require, a type suffix. -defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", - (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", - (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; - -// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. -defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", - (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", - (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", - (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", - (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; - -// "vmov Rd, #-imm" can be handled via "vmvn". -def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", - (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; -def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", - (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; -def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", - (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; -def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", - (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; - -// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, -// these should restrict to just the Q register variants, but the register -// classes are enough to match correctly regardless, so we keep it simple -// and just use MnemonicAlias. -def : NEONMnemonicAlias<"vbicq", "vbic">; -def : NEONMnemonicAlias<"vandq", "vand">; -def : NEONMnemonicAlias<"veorq", "veor">; -def : NEONMnemonicAlias<"vorrq", "vorr">; - -def : NEONMnemonicAlias<"vmovq", "vmov">; -def : NEONMnemonicAlias<"vmvnq", "vmvn">; -// Explicit versions for floating point so that the FPImm variants get -// handled early. The parser gets confused otherwise. -def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; -def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; - -def : NEONMnemonicAlias<"vaddq", "vadd">; -def : NEONMnemonicAlias<"vsubq", "vsub">; - -def : NEONMnemonicAlias<"vminq", "vmin">; -def : NEONMnemonicAlias<"vmaxq", "vmax">; - -def : NEONMnemonicAlias<"vmulq", "vmul">; - -def : NEONMnemonicAlias<"vabsq", "vabs">; - -def : NEONMnemonicAlias<"vshlq", "vshl">; -def : NEONMnemonicAlias<"vshrq", "vshr">; - -def : NEONMnemonicAlias<"vcvtq", "vcvt">; - -def : NEONMnemonicAlias<"vcleq", "vcle">; -def : NEONMnemonicAlias<"vceqq", "vceq">; - -def : NEONMnemonicAlias<"vzipq", "vzip">; -def : NEONMnemonicAlias<"vswpq", "vswp">; - -def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; -def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; - - -// Alias for loading floating point immediates that aren't representable -// using the vmov.f32 encoding but the bitpattern is representable using -// the .i32 encoding. -def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", - (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; -def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", - (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; diff --git a/suite/synctools/tablegen/ARM/ARMInstrThumb.td b/suite/synctools/tablegen/ARM/ARMInstrThumb.td deleted file mode 100644 index 88aab47a79..0000000000 --- a/suite/synctools/tablegen/ARM/ARMInstrThumb.td +++ /dev/null @@ -1,1707 +0,0 @@ -//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the Thumb instruction set. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Thumb specific DAG Nodes. -// - -def imm_sr_XFORM: SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); -}]>; -def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; } -def imm_sr : Operand, PatLeaf<(imm), [{ - uint64_t Imm = N->getZExtValue(); - return Imm > 0 && Imm <= 32; -}], imm_sr_XFORM> { - let PrintMethod = "printThumbSRImm"; - let ParserMatchClass = ThumbSRImmAsmOperand; -} - -def imm0_7_neg : PatLeaf<(i32 imm), [{ - return (uint32_t)-N->getZExtValue() < 8; -}], imm_neg_XFORM>; - -def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; } -def mod_imm1_7_neg : Operand, PatLeaf<(imm), [{ - unsigned Value = -(unsigned)N->getZExtValue(); - return 0 < Value && Value < 8; - }], imm_neg_XFORM> { - let ParserMatchClass = ThumbModImmNeg1_7AsmOperand; -} - -def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; } -def mod_imm8_255_neg : Operand, PatLeaf<(imm), [{ - unsigned Value = -(unsigned)N->getZExtValue(); - return 7 < Value && Value < 256; - }], imm_neg_XFORM> { - let ParserMatchClass = ThumbModImmNeg8_255AsmOperand; -} - - -def imm0_255_comp : PatLeaf<(i32 imm), [{ - return ~((uint32_t)N->getZExtValue()) < 256; -}]>; - -def imm8_255_neg : PatLeaf<(i32 imm), [{ - unsigned Val = -N->getZExtValue(); - return Val >= 8 && Val < 256; -}], imm_neg_XFORM>; - -// Break imm's up into two pieces: an immediate + a left shift. This uses -// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt -// to get the val/shift pieces. -def thumb_immshifted : PatLeaf<(imm), [{ - return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue()); -}]>; - -def thumb_immshifted_val : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32); -}]>; - -def thumb_immshifted_shamt : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32); -}]>; - -def imm256_510 : ImmLeaf= 256 && Imm < 511; -}]>; - -def thumb_imm256_510_addend : SDNodeXFormgetTargetConstant(N->getZExtValue() - 255, SDLoc(N), MVT::i32); -}]>; - -// Scaled 4 immediate. -def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } -def t_imm0_1020s4 : Operand { - let PrintMethod = "printThumbS4ImmOperand"; - let ParserMatchClass = t_imm0_1020s4_asmoperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; } -def t_imm0_508s4 : Operand { - let PrintMethod = "printThumbS4ImmOperand"; - let ParserMatchClass = t_imm0_508s4_asmoperand; - let OperandType = "OPERAND_IMMEDIATE"; -} -// Alias use only, so no printer is necessary. -def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } -def t_imm0_508s4_neg : Operand { - let ParserMatchClass = t_imm0_508s4_neg_asmoperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -// Define Thumb specific addressing modes. - -// unsigned 8-bit, 2-scaled memory offset -class OperandUnsignedOffset_b8s2 : AsmOperandClass { - let Name = "UnsignedOffset_b8s2"; - let PredicateMethod = "isUnsignedOffset<8, 2>"; -} - -def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; - -// thumb style PC relative operand. signed, 8 bits magnitude, -// two bits shift. can be represented as either [pc, #imm], #imm, -// or relocatable expression... -def ThumbMemPC : AsmOperandClass { - let Name = "ThumbMemPC"; -} - -let OperandType = "OPERAND_PCREL" in { -def t_brtarget : Operand { - let EncoderMethod = "getThumbBRTargetOpValue"; - let DecoderMethod = "DecodeThumbBROperand"; -} - -// ADR instruction labels. -def t_adrlabel : Operand { - let EncoderMethod = "getThumbAdrLabelOpValue"; - let PrintMethod = "printAdrLabelOperand<2>"; - let ParserMatchClass = UnsignedOffset_b8s2; -} - - -def thumb_br_target : Operand { - let ParserMatchClass = ThumbBranchTarget; - let EncoderMethod = "getThumbBranchTargetOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -def thumb_bl_target : Operand { - let ParserMatchClass = ThumbBranchTarget; - let EncoderMethod = "getThumbBLTargetOpValue"; - let DecoderMethod = "DecodeThumbBLTargetOperand"; -} - -// Target for BLX *from* thumb mode. -def thumb_blx_target : Operand { - let ParserMatchClass = ARMBranchTarget; - let EncoderMethod = "getThumbBLXTargetOpValue"; - let DecoderMethod = "DecodeThumbBLXOffset"; -} - -def thumb_bcc_target : Operand { - let ParserMatchClass = ThumbBranchTarget; - let EncoderMethod = "getThumbBCCTargetOpValue"; - let DecoderMethod = "DecodeThumbBCCTargetOperand"; -} - -def thumb_cb_target : Operand { - let ParserMatchClass = ThumbBranchTarget; - let EncoderMethod = "getThumbCBTargetOpValue"; - let DecoderMethod = "DecodeThumbCmpBROperand"; -} - -// t_addrmode_pc :=