diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 5019cf31beee30..1744c1e5717224 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -46,17 +46,13 @@ BreakFunctionNames("break-funcs", cl::Hidden, cl::cat(BoltCategory)); -cl::list - FunctionPadSpec("pad-funcs", cl::CommaSeparated, - cl::desc("list of functions to pad with amount of bytes"), - cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."), - cl::Hidden, cl::cat(BoltCategory)); - -cl::list FunctionPadBeforeSpec( - "pad-funcs-before", cl::CommaSeparated, - cl::desc("list of functions to pad with amount of bytes"), - cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."), cl::Hidden, - cl::cat(BoltCategory)); +static cl::list +FunctionPadSpec("pad-funcs", + cl::CommaSeparated, + cl::desc("list of functions to pad with amount of bytes"), + cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."), + cl::Hidden, + cl::cat(BoltCategory)); static cl::opt MarkFuncs( "mark-funcs", @@ -74,12 +70,11 @@ X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only", cl::init(true), cl::cat(BoltOptCategory)); -size_t padFunction(const cl::list &Spec, - const BinaryFunction &Function) { +size_t padFunction(const BinaryFunction &Function) { static std::map FunctionPadding; - if (FunctionPadding.empty() && !Spec.empty()) { - for (const std::string &Spec : Spec) { + if (FunctionPadding.empty() && !FunctionPadSpec.empty()) { + for (std::string &Spec : FunctionPadSpec) { size_t N = Spec.find(':'); if (N == std::string::npos) continue; @@ -324,32 +319,6 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitCodeAlignment(Function.getAlign(), &*BC.STI); } - if (size_t Padding = - opts::padFunction(opts::FunctionPadBeforeSpec, Function)) { - // Handle padFuncsBefore after the above alignment logic but before - // symbol addresses are decided. - if (!BC.HasRelocations) { - BC.errs() << "BOLT-ERROR: -pad-before-funcs is not supported in " - << "non-relocation mode\n"; - exit(1); - } - - // Preserve Function.getMinAlign(). - if (!isAligned(Function.getMinAlign(), Padding)) { - BC.errs() << "BOLT-ERROR: user-requested " << Padding - << " padding bytes before function " << Function - << " is not a multiple of the minimum function alignment (" - << Function.getMinAlign().value() << ").\n"; - exit(1); - } - - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: padding before function " << Function - << " with " << Padding << " bytes\n"); - - // Since the padding is not executed, it can be null bytes. - Streamer.emitFill(Padding, 0); - } - MCContext &Context = Streamer.getContext(); const MCAsmInfo *MAI = Context.getAsmInfo(); @@ -404,7 +373,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, emitFunctionBody(Function, FF, /*EmitCodeOnly=*/false); // Emit padding if requested. - if (size_t Padding = opts::padFunction(opts::FunctionPadSpec, Function)) { + if (size_t Padding = opts::padFunction(Function)) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: padding function " << Function << " with " << Padding << " bytes\n"); Streamer.emitFill(Padding, MAI->getTextAlignFillValue()); diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp index f8f6a01526dccf..1256d71342b13b 100644 --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -28,9 +28,7 @@ extern cl::OptionCategory BoltOptCategory; extern cl::opt Verbosity; extern cl::opt RandomSeed; -extern size_t padFunction(const cl::list &Spec, - const bolt::BinaryFunction &Function); -extern cl::list FunctionPadSpec, FunctionPadBeforeSpec; +extern size_t padFunction(const bolt::BinaryFunction &Function); extern cl::opt ReorderFunctions; cl::opt ReorderFunctions( @@ -306,12 +304,8 @@ Error ReorderFunctions::runOnFunctions(BinaryContext &BC) { return false; if (B->isIgnored()) return true; - const size_t PadA = - opts::padFunction(opts::FunctionPadSpec, *A) + - opts::padFunction(opts::FunctionPadBeforeSpec, *A); - const size_t PadB = - opts::padFunction(opts::FunctionPadSpec, *B) + - opts::padFunction(opts::FunctionPadBeforeSpec, *B); + const size_t PadA = opts::padFunction(*A); + const size_t PadB = opts::padFunction(*B); if (!PadA || !PadB) { if (PadA) return true; diff --git a/bolt/test/AArch64/pad-before-funcs.s b/bolt/test/AArch64/pad-before-funcs.s deleted file mode 100644 index 3ce0ee5e383894..00000000000000 --- a/bolt/test/AArch64/pad-before-funcs.s +++ /dev/null @@ -1,29 +0,0 @@ -# Test checks that --pad-before-funcs is working as expected. -# It should be able to introduce a configurable offset for the _start symbol. -# It should reject requests which don't obey the code alignment requirement. - -# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o -# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -Wl,--section-start=.text=0x4000 -# RUN: llvm-bolt %t.exe -o %t.bolt.0 --pad-funcs-before=_start:0 -# RUN: llvm-bolt %t.exe -o %t.bolt.4 --pad-funcs-before=_start:4 -# RUN: llvm-bolt %t.exe -o %t.bolt.8 --pad-funcs-before=_start:8 - -# RUN: not llvm-bolt %t.exe -o %t.bolt.8 --pad-funcs-before=_start:1 2>&1 | FileCheck --check-prefix=CHECK-BAD-ALIGN %s - -# CHECK-BAD-ALIGN: user-requested 1 padding bytes before function _start(*2) is not a multiple of the minimum function alignment (4). - -# RUN: llvm-objdump --section=.text --disassemble %t.bolt.0 | FileCheck --check-prefix=CHECK-0 %s -# RUN: llvm-objdump --section=.text --disassemble %t.bolt.4 | FileCheck --check-prefix=CHECK-4 %s -# RUN: llvm-objdump --section=.text --disassemble %t.bolt.8 | FileCheck --check-prefix=CHECK-8 %s - -# Trigger relocation mode in bolt. -.reloc 0, R_AARCH64_NONE - -.section .text -.globl _start - -# CHECK-0: 0000000000400000 <_start> -# CHECK-4: 0000000000400004 <_start> -# CHECK-8: 0000000000400008 <_start> -_start: - ret diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index f8a20a1e224724..b6e0d71c1ae1b9 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -2125,12 +2125,26 @@ def get_field_offsetof(self): def is_anonymous(self): """ - Check if the record is anonymous. + Check whether this is a record type without a name, or a field where + the type is a record type without a name. + + Use is_anonymous_record_decl to check whether a record is an + "anonymous union" as defined in the C/C++ standard. """ if self.kind == CursorKind.FIELD_DECL: return self.type.get_declaration().is_anonymous() return conf.lib.clang_Cursor_isAnonymous(self) # type: ignore [no-any-return] + def is_anonymous_record_decl(self): + """ + Check if the record is an anonymous union as defined in the C/C++ standard + (or an "anonymous struct", the corresponding non-standard extension for + structs). + """ + if self.kind == CursorKind.FIELD_DECL: + return self.type.get_declaration().is_anonymous_record_decl() + return conf.lib.clang_Cursor_isAnonymousRecordDecl(self) # type: ignore [no-any-return] + def is_bitfield(self): """ Check if the field is a bitfield. @@ -3902,12 +3916,13 @@ def write_main_file_to_stdout(self): ("clang_Cursor_getTemplateArgumentType", [Cursor, c_uint], Type), ("clang_Cursor_getTemplateArgumentValue", [Cursor, c_uint], c_longlong), ("clang_Cursor_getTemplateArgumentUnsignedValue", [Cursor, c_uint], c_ulonglong), - ("clang_Cursor_isAnonymous", [Cursor], bool), - ("clang_Cursor_isBitField", [Cursor], bool), ("clang_Cursor_getBinaryOpcode", [Cursor], c_int), ("clang_Cursor_getBriefCommentText", [Cursor], _CXString), ("clang_Cursor_getRawCommentText", [Cursor], _CXString), ("clang_Cursor_getOffsetOfField", [Cursor], c_longlong), + ("clang_Cursor_isAnonymous", [Cursor], bool), + ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool), + ("clang_Cursor_isBitField", [Cursor], bool), ("clang_Location_isInSystemHeader", [SourceLocation], bool), ("clang_Type_getAlignOf", [Type], c_longlong), ("clang_Type_getClassType", [Type], Type), diff --git a/clang/bindings/python/tests/cindex/test_type.py b/clang/bindings/python/tests/cindex/test_type.py index ce05fdb1a1ebc0..e1d8c2aad1c3a4 100644 --- a/clang/bindings/python/tests/cindex/test_type.py +++ b/clang/bindings/python/tests/cindex/test_type.py @@ -463,8 +463,11 @@ def test_offset(self): self.assertNotEqual(children[0].spelling, "typeanon") self.assertEqual(children[1].spelling, "typeanon") self.assertEqual(fields[0].kind, CursorKind.FIELD_DECL) + self.assertTrue(fields[0].is_anonymous()) + self.assertFalse(fields[0].is_anonymous_record_decl()) self.assertEqual(fields[1].kind, CursorKind.FIELD_DECL) self.assertTrue(fields[1].is_anonymous()) + self.assertTrue(fields[1].is_anonymous_record_decl()) self.assertEqual(teststruct.type.get_offset("typeanon"), f1) self.assertEqual(teststruct.type.get_offset("bariton"), bariton) self.assertEqual(teststruct.type.get_offset("foo"), foo) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0333f774fa7316..922dd21bcda7e1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1309,6 +1309,8 @@ Sanitizers Python Binding Changes ---------------------- - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``. +- Added binding for ``clang_Cursor_isAnonymousRecordDecl``, which allows checking if + a declaration is an anonymous union or anonymous struct. OpenMP Support -------------- diff --git a/clang/include/clang/Basic/BuiltinsSPIRV.td b/clang/include/clang/Basic/BuiltinsSPIRV.td index 195c13573d047f..1e66939b822ef8 100644 --- a/clang/include/clang/Basic/BuiltinsSPIRV.td +++ b/clang/include/clang/Basic/BuiltinsSPIRV.td @@ -8,7 +8,7 @@ include "clang/Basic/BuiltinsBase.td" -def HLSLDistance : Builtin { +def SPIRVDistance : Builtin { let Spellings = ["__builtin_spirv_distance"]; let Attributes = [NoThrow, Const]; let Prototype = "void(...)"; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 56c56f564fd09d..067ff55b87ae63 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2102,7 +2102,10 @@ void CodeGenFunction::EmitBranchOnBoolExpr( MDHelper.createConstant(BranchHintConstant)}); BrInst->setMetadata("hlsl.controlflow.hint", llvm::MDNode::get(CGM.getLLVMContext(), Vals)); - } break; + break; + } + case HLSLControlFlowHintAttr::SpellingNotCalculated: + break; } } diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index e42b65cc07deea..a2b07ef4824a1b 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -17,8 +17,7 @@ using namespace llvm::opt; SYCLInstallationDetector::SYCLInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, - const llvm::opt::ArgList &Args) - : D(D) {} + const llvm::opt::ArgList &Args) {} void SYCLInstallationDetector::addSYCLIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { @@ -31,8 +30,8 @@ void SYCLInstallationDetector::addSYCLIncludeArgs( } // Unsupported options for SYCL device compilation. -static ArrayRef getUnsupportedOpts() { - return { +static ArrayRef getUnsupportedOpts() { + static constexpr options::ID UnsupportedOpts[] = { options::OPT_fsanitize_EQ, // -fsanitize options::OPT_fcf_protection_EQ, // -fcf-protection options::OPT_fprofile_generate, @@ -53,7 +52,9 @@ static ArrayRef getUnsupportedOpts() { options::OPT_fprofile_instr_use_EQ, // -fprofile-instr-use options::OPT_forder_file_instrumentation, // -forder-file-instrumentation options::OPT_fcs_profile_generate, // -fcs-profile-generate - options::OPT_fcs_profile_generate_EQ}; + options::OPT_fcs_profile_generate_EQ, + }; + return UnsupportedOpts; } SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 9af2fd0c45c5ed..2a8b4eca9e9f82 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -22,9 +22,6 @@ class SYCLInstallationDetector { void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; - -private: - const Driver &D; }; namespace toolchains { diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index f08ff00c9cbeb8..a8d9fcd8569cfb 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -193,6 +193,7 @@ // CHECK-NEXT: xqcia 0.2 'Xqcia' (Qualcomm uC Arithmetic Extension) // CHECK-NEXT: xqciac 0.2 'Xqciac' (Qualcomm uC Load-Store Address Calculation Extension) // CHECK-NEXT: xqcicli 0.2 'Xqcicli' (Qualcomm uC Conditional Load Immediate Extension) +// CHECK-NEXT: xqcicm 0.2 'Xqcicm' (Qualcomm uC Conditional Move Extension) // CHECK-NEXT: xqcics 0.2 'Xqcics' (Qualcomm uC Conditional Select Extension) // CHECK-NEXT: xqcicsr 0.2 'Xqcicsr' (Qualcomm uC CSR Extension) // CHECK-NEXT: xqcilsm 0.2 'Xqcilsm' (Qualcomm uC Load Store Multiple Extension) diff --git a/clang/test/Driver/sycl-offload-jit.cpp b/clang/test/Driver/sycl-offload-jit.cpp index d7ab630084098e..eb192e08a3bc0c 100644 --- a/clang/test/Driver/sycl-offload-jit.cpp +++ b/clang/test/Driver/sycl-offload-jit.cpp @@ -3,7 +3,7 @@ /// Check the phases graph with -fsycl. Use of -fsycl enables offload // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fsycl %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES %s -// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl -- %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES %s // CHK-PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) // CHK-PHASES-NEXT: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) @@ -35,7 +35,7 @@ // RUN: | FileCheck -check-prefixes=CHK-FSYCL-IS-DEVICE,CHK-FSYCL-IS-HOST %s // RUN: %clang -### -fsycl -fsycl-device-only %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FSYCL-IS-DEVICE %s -// RUN: %clang_cl -### -fsycl -c %s 2>&1 \ +// RUN: %clang_cl -### -fsycl -c -- %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FSYCL-IS-DEVICE,CHK-FSYCL-IS-HOST %s // RUN: %clang -### -fsycl -fsycl-host-only %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FSYCL-IS-HOST %s diff --git a/flang/lib/Common/Fortran.cpp b/flang/lib/Common/Fortran.cpp index 367a51f884e8a5..eec83419f9eb2b 100644 --- a/flang/lib/Common/Fortran.cpp +++ b/flang/lib/Common/Fortran.cpp @@ -136,7 +136,8 @@ bool AreCompatibleCUDADataAttrs(std::optional x, if (*x == CUDADataAttr::Device) { if ((y && (*y == CUDADataAttr::Managed || *y == CUDADataAttr::Unified || - *y == CUDADataAttr::Shared)) || + *y == CUDADataAttr::Shared || + *y == CUDADataAttr::Constant)) || (!y && (isCudaUnified || isCudaManaged))) { if (y && *y == CUDADataAttr::Shared && warning) { *warning = "SHARED attribute ignored"s; diff --git a/flang/test/Driver/parse-error.ll b/flang/test/Driver/parse-error.ll index 939ad7018f51c6..2c36d8b7ed7924 100644 --- a/flang/test/Driver/parse-error.ll +++ b/flang/test/Driver/parse-error.ll @@ -13,7 +13,7 @@ ; RUN: not %flang_fc1 -fdebug-unparse-no-sema -x f95 %s 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: not %flang_fc1 -fsyntax-only %s -x f95 2>&1 | FileCheck %s --check-prefix=ERROR -; ERROR: Could not scan {{.*}}parse-error.f95 +; ERROR: Could not scan {{.*}}parse-error.ll define void @foo() { ret void diff --git a/flang/test/Semantics/cuf10.cuf b/flang/test/Semantics/cuf10.cuf index 047503b3cca4ea..24b596b1fa55db 100644 --- a/flang/test/Semantics/cuf10.cuf +++ b/flang/test/Semantics/cuf10.cuf @@ -2,6 +2,7 @@ module m real, device :: a(4,8) real, managed, allocatable :: b(:,:) + integer, constant :: x = 1 contains attributes(global) subroutine kernel(a,b,c,n,m) integer, value :: n @@ -23,4 +24,10 @@ module m call devsub(c,4) ! not checked in OpenACC construct end do end + attributes(global) subroutine sub1(x) + integer :: x + end + subroutine sub2() + call sub1<<<1,1>>>(x) ! actual constant to device dummy + end end diff --git a/libc/config/gpu/config.json b/libc/config/gpu/amdgpu/config.json similarity index 100% rename from libc/config/gpu/config.json rename to libc/config/gpu/amdgpu/config.json diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt new file mode 100644 index 00000000000000..7a1982808dfeb7 --- /dev/null +++ b/libc/config/gpu/amdgpu/entrypoints.txt @@ -0,0 +1,604 @@ +set(TARGET_LIBC_ENTRYPOINTS + # assert.h entrypoints + libc.src.assert.__assert_fail + + # ctype.h entrypoints + libc.src.ctype.isalnum + libc.src.ctype.isalnum_l + libc.src.ctype.isalpha + libc.src.ctype.isalpha_l + libc.src.ctype.isascii + libc.src.ctype.isblank + libc.src.ctype.isblank_l + libc.src.ctype.iscntrl + libc.src.ctype.iscntrl_l + libc.src.ctype.isdigit + libc.src.ctype.isdigit_l + libc.src.ctype.isgraph + libc.src.ctype.isgraph_l + libc.src.ctype.islower + libc.src.ctype.islower_l + libc.src.ctype.isprint + libc.src.ctype.isprint_l + libc.src.ctype.ispunct + libc.src.ctype.ispunct_l + libc.src.ctype.isspace + libc.src.ctype.isspace_l + libc.src.ctype.isupper + libc.src.ctype.isupper_l + libc.src.ctype.isxdigit + libc.src.ctype.isxdigit_l + libc.src.ctype.toascii + libc.src.ctype.tolower + libc.src.ctype.tolower_l + libc.src.ctype.toupper + libc.src.ctype.toupper_l + + # string.h entrypoints + libc.src.string.memccpy + libc.src.string.memchr + libc.src.string.memcmp + libc.src.string.memcpy + libc.src.string.memmem + libc.src.string.memmove + libc.src.string.mempcpy + libc.src.string.memrchr + libc.src.string.memset + libc.src.string.stpcpy + libc.src.string.stpncpy + libc.src.string.strcasestr + libc.src.string.strcat + libc.src.string.strchr + libc.src.string.strchrnul + libc.src.string.strcmp + libc.src.string.strcoll + libc.src.string.strcoll_l + libc.src.string.strcpy + libc.src.string.strcspn + libc.src.string.strdup + libc.src.string.strerror + libc.src.string.strlcat + libc.src.string.strlcpy + libc.src.string.strlen + libc.src.string.strncat + libc.src.string.strncmp + libc.src.string.strncpy + libc.src.string.strndup + libc.src.string.strnlen + libc.src.string.strpbrk + libc.src.string.strrchr + libc.src.string.strsep + libc.src.string.strspn + libc.src.string.strstr + libc.src.string.strtok + libc.src.string.strtok_r + libc.src.string.strxfrm + libc.src.string.strxfrm_l + + # strings.h entrypoints + libc.src.strings.bcmp + libc.src.strings.bcopy + libc.src.strings.bzero + libc.src.strings.index + libc.src.strings.rindex + libc.src.strings.strcasecmp + libc.src.strings.strncasecmp + + # stdbit.h entrypoints + libc.src.stdbit.stdc_bit_ceil_uc + libc.src.stdbit.stdc_bit_ceil_ui + libc.src.stdbit.stdc_bit_ceil_ul + libc.src.stdbit.stdc_bit_ceil_ull + libc.src.stdbit.stdc_bit_ceil_us + libc.src.stdbit.stdc_bit_floor_uc + libc.src.stdbit.stdc_bit_floor_ui + libc.src.stdbit.stdc_bit_floor_ul + libc.src.stdbit.stdc_bit_floor_ull + libc.src.stdbit.stdc_bit_floor_us + libc.src.stdbit.stdc_bit_width_uc + libc.src.stdbit.stdc_bit_width_ui + libc.src.stdbit.stdc_bit_width_ul + libc.src.stdbit.stdc_bit_width_ull + libc.src.stdbit.stdc_bit_width_us + libc.src.stdbit.stdc_count_ones_uc + libc.src.stdbit.stdc_count_ones_ui + libc.src.stdbit.stdc_count_ones_ul + libc.src.stdbit.stdc_count_ones_ull + libc.src.stdbit.stdc_count_ones_us + libc.src.stdbit.stdc_count_zeros_uc + libc.src.stdbit.stdc_count_zeros_ui + libc.src.stdbit.stdc_count_zeros_ul + libc.src.stdbit.stdc_count_zeros_ull + libc.src.stdbit.stdc_count_zeros_us + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_has_single_bit_uc + libc.src.stdbit.stdc_has_single_bit_ui + libc.src.stdbit.stdc_has_single_bit_ul + libc.src.stdbit.stdc_has_single_bit_ull + libc.src.stdbit.stdc_has_single_bit_us + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_zeros_uc + libc.src.stdbit.stdc_leading_zeros_ui + libc.src.stdbit.stdc_leading_zeros_ul + libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_zeros_us + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_zeros_us + + # stdlib.h entrypoints + libc.src.stdlib._Exit + libc.src.stdlib.abort + libc.src.stdlib.abs + libc.src.stdlib.atexit + libc.src.stdlib.atof + libc.src.stdlib.atoi + libc.src.stdlib.atol + libc.src.stdlib.atoll + libc.src.stdlib.bsearch + libc.src.stdlib.div + libc.src.stdlib.exit + libc.src.stdlib.labs + libc.src.stdlib.ldiv + libc.src.stdlib.llabs + libc.src.stdlib.lldiv + libc.src.stdlib.qsort + libc.src.stdlib.qsort_r + libc.src.stdlib.rand + libc.src.stdlib.srand + libc.src.stdlib.strtod + libc.src.stdlib.strtod_l + libc.src.stdlib.strtof + libc.src.stdlib.strtof_l + libc.src.stdlib.strtol + libc.src.stdlib.strtol_l + libc.src.stdlib.strtold + libc.src.stdlib.strtold_l + libc.src.stdlib.strtoll + libc.src.stdlib.strtoll_l + libc.src.stdlib.strtoul + libc.src.stdlib.strtoul_l + libc.src.stdlib.strtoull + libc.src.stdlib.strtoull_l + libc.src.stdlib.at_quick_exit + libc.src.stdlib.quick_exit + libc.src.stdlib.getenv + libc.src.stdlib.system + + # TODO: Implement these correctly + libc.src.stdlib.aligned_alloc + libc.src.stdlib.calloc + libc.src.stdlib.free + libc.src.stdlib.malloc + libc.src.stdlib.realloc + + # errno.h entrypoints + libc.src.errno.errno + + # stdio.h entrypoints + libc.src.stdio.clearerr + libc.src.stdio.fclose + libc.src.stdio.printf + libc.src.stdio.vprintf + libc.src.stdio.fprintf + libc.src.stdio.vfprintf + libc.src.stdio.snprintf + libc.src.stdio.sprintf + libc.src.stdio.vsnprintf + libc.src.stdio.vsprintf + libc.src.stdio.asprintf + libc.src.stdio.vasprintf + libc.src.stdio.scanf + libc.src.stdio.vscanf + libc.src.stdio.fscanf + libc.src.stdio.vfscanf + libc.src.stdio.sscanf + libc.src.stdio.vsscanf + libc.src.stdio.feof + libc.src.stdio.ferror + libc.src.stdio.fflush + libc.src.stdio.fgetc + libc.src.stdio.fgets + libc.src.stdio.fopen + libc.src.stdio.fputc + libc.src.stdio.fputs + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.ftell + libc.src.stdio.fwrite + libc.src.stdio.getc + libc.src.stdio.getchar + libc.src.stdio.putc + libc.src.stdio.putchar + libc.src.stdio.puts + libc.src.stdio.remove + libc.src.stdio.rename + libc.src.stdio.stderr + libc.src.stdio.stdin + libc.src.stdio.stdout + libc.src.stdio.ungetc + + # inttypes.h entrypoints + libc.src.inttypes.imaxabs + libc.src.inttypes.imaxdiv + libc.src.inttypes.strtoimax + libc.src.inttypes.strtoumax + + # time.h entrypoints + libc.src.time.clock + libc.src.time.clock_gettime + libc.src.time.timespec_get + libc.src.time.nanosleep + + # wchar.h entrypoints + libc.src.wchar.wctob + + # locale.h entrypoints + libc.src.locale.localeconv + libc.src.locale.duplocale + libc.src.locale.freelocale + libc.src.locale.localeconv + libc.src.locale.newlocale + libc.src.locale.setlocale + libc.src.locale.uselocale +) + +set(TARGET_LIBM_ENTRYPOINTS + # math.h entrypoints + libc.src.math.acos + libc.src.math.acosf + libc.src.math.acosh + libc.src.math.acoshf + libc.src.math.asin + libc.src.math.asinf + libc.src.math.asinh + libc.src.math.asinhf + libc.src.math.atan + libc.src.math.atan2 + libc.src.math.atan2f + libc.src.math.atan2l + libc.src.math.atanf + libc.src.math.atanh + libc.src.math.atanhf + libc.src.math.canonicalize + libc.src.math.canonicalizef + libc.src.math.canonicalizel + libc.src.math.cbrt + libc.src.math.cbrtf + libc.src.math.ceil + libc.src.math.ceilf + libc.src.math.ceill + libc.src.math.copysign + libc.src.math.copysignf + libc.src.math.copysignl + libc.src.math.cos + libc.src.math.cosf + libc.src.math.cosh + libc.src.math.coshf + libc.src.math.cospif + libc.src.math.ddivl + libc.src.math.dfmal + libc.src.math.dmull + libc.src.math.dsqrtl + libc.src.math.erf + libc.src.math.erff + libc.src.math.exp + libc.src.math.exp10 + libc.src.math.exp10f + libc.src.math.exp2 + libc.src.math.exp2f + libc.src.math.exp2m1f + libc.src.math.expf + libc.src.math.expm1 + libc.src.math.expm1f + libc.src.math.fabs + libc.src.math.fabsf + libc.src.math.fabsl + libc.src.math.fadd + libc.src.math.faddl + libc.src.math.fdim + libc.src.math.fdimf + libc.src.math.fdiml + libc.src.math.fdiv + libc.src.math.fdivl + libc.src.math.ffma + libc.src.math.ffmal + libc.src.math.floor + libc.src.math.floorf + libc.src.math.floorl + libc.src.math.fma + libc.src.math.fmaf + libc.src.math.fmax + libc.src.math.fmaxf + libc.src.math.fmaximum + libc.src.math.fmaximumf + libc.src.math.fmaximuml + libc.src.math.fmaximum_mag + libc.src.math.fmaximum_magf + libc.src.math.fmaximum_magl + libc.src.math.fmaximum_mag_num + libc.src.math.fmaximum_mag_numf + libc.src.math.fmaximum_mag_numl + libc.src.math.fmaximum_num + libc.src.math.fmaximum_numf + libc.src.math.fmaximum_numl + libc.src.math.fmaxl + libc.src.math.fmin + libc.src.math.fminf + libc.src.math.fminimum + libc.src.math.fminimumf + libc.src.math.fminimuml + libc.src.math.fminimum_mag + libc.src.math.fminimum_magf + libc.src.math.fminimum_magl + libc.src.math.fminimum_mag_num + libc.src.math.fminimum_mag_numf + libc.src.math.fminimum_mag_numl + libc.src.math.fminimum_num + libc.src.math.fminimum_numf + libc.src.math.fminimum_numl + libc.src.math.fminl + libc.src.math.fmod + libc.src.math.fmodf + libc.src.math.fmodl + libc.src.math.fmul + libc.src.math.fmull + libc.src.math.frexp + libc.src.math.frexpf + libc.src.math.frexpl + libc.src.math.fromfp + libc.src.math.fromfpf + libc.src.math.fromfpl + libc.src.math.fromfpx + libc.src.math.fromfpxf + libc.src.math.fromfpxl + libc.src.math.fsqrt + libc.src.math.fsqrtl + libc.src.math.fsub + libc.src.math.fsubl + libc.src.math.getpayload + libc.src.math.getpayloadf + libc.src.math.getpayloadl + libc.src.math.hypot + libc.src.math.hypotf + libc.src.math.ilogb + libc.src.math.ilogbf + libc.src.math.ilogbl + libc.src.math.isnan + libc.src.math.isnanf + libc.src.math.isnanl + libc.src.math.ldexp + libc.src.math.ldexpf + libc.src.math.ldexpl + libc.src.math.lgamma + libc.src.math.lgamma_r + libc.src.math.llogb + libc.src.math.llogbf + libc.src.math.llogbl + libc.src.math.llrint + libc.src.math.llrintf + libc.src.math.llrintl + libc.src.math.llround + libc.src.math.llroundf + libc.src.math.llroundl + libc.src.math.log + libc.src.math.log10 + libc.src.math.log10f + libc.src.math.log1p + libc.src.math.log1pf + libc.src.math.log2 + libc.src.math.log2f + libc.src.math.logb + libc.src.math.logbf + libc.src.math.logbl + libc.src.math.logf + libc.src.math.lrint + libc.src.math.lrintf + libc.src.math.lrintl + libc.src.math.lround + libc.src.math.lroundf + libc.src.math.lroundl + libc.src.math.modf + libc.src.math.modff + libc.src.math.modfl + libc.src.math.nan + libc.src.math.nanf + libc.src.math.nanl + libc.src.math.nearbyint + libc.src.math.nearbyintf + libc.src.math.nearbyintl + libc.src.math.nextafter + libc.src.math.nextafterf + libc.src.math.nextafterl + libc.src.math.nextdown + libc.src.math.nextdownf + libc.src.math.nextdownl + libc.src.math.nexttoward + libc.src.math.nexttowardf + libc.src.math.nexttowardl + libc.src.math.nextup + libc.src.math.nextupf + libc.src.math.nextupl + libc.src.math.pow + libc.src.math.powf + libc.src.math.powi + libc.src.math.powif + libc.src.math.remainder + libc.src.math.remainderf + libc.src.math.remainderl + libc.src.math.remquo + libc.src.math.remquof + libc.src.math.remquol + libc.src.math.rint + libc.src.math.rintf + libc.src.math.rintl + libc.src.math.roundeven + libc.src.math.roundevenf + libc.src.math.roundevenl + libc.src.math.round + libc.src.math.roundf + libc.src.math.roundl + libc.src.math.scalbln + libc.src.math.scalblnf + libc.src.math.scalblnl + libc.src.math.scalbn + libc.src.math.scalbnf + libc.src.math.scalbnl + libc.src.math.setpayload + libc.src.math.setpayloadf + libc.src.math.setpayloadl + libc.src.math.setpayloadsig + libc.src.math.setpayloadsigf + libc.src.math.setpayloadsigl + libc.src.math.sin + libc.src.math.sincos + libc.src.math.sincosf + libc.src.math.sinf + libc.src.math.sinh + libc.src.math.sinhf + libc.src.math.sinpif + libc.src.math.sqrt + libc.src.math.sqrtf + libc.src.math.sqrtl + libc.src.math.tan + libc.src.math.tanf + libc.src.math.tanh + libc.src.math.tanhf + libc.src.math.tgamma + libc.src.math.tgammaf + libc.src.math.totalorder + libc.src.math.totalorderf + libc.src.math.totalorderl + libc.src.math.totalordermag + libc.src.math.totalordermagf + libc.src.math.totalordermagl + libc.src.math.trunc + libc.src.math.truncf + libc.src.math.truncl + libc.src.math.ufromfp + libc.src.math.ufromfpf + libc.src.math.ufromfpl + libc.src.math.ufromfpx + libc.src.math.ufromfpxf + libc.src.math.ufromfpxl +) + +if(LIBC_TYPES_HAS_FLOAT16) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C23 _Float16 entrypoints + libc.src.math.canonicalizef16 + libc.src.math.ceilf16 + libc.src.math.copysignf16 + libc.src.math.coshf16 + libc.src.math.exp10f16 + libc.src.math.exp10m1f16 + libc.src.math.exp2f16 + libc.src.math.expf16 + libc.src.math.f16add + libc.src.math.f16addf + libc.src.math.f16addl + libc.src.math.f16div + libc.src.math.f16divf + libc.src.math.f16divl + libc.src.math.f16fma + libc.src.math.f16fmaf + libc.src.math.f16fmal + libc.src.math.f16mul + libc.src.math.f16mulf + libc.src.math.f16mull + libc.src.math.f16sqrt + libc.src.math.f16sqrtf + libc.src.math.f16sqrtl + libc.src.math.f16sub + libc.src.math.f16subf + libc.src.math.f16subl + libc.src.math.fabsf16 + libc.src.math.fdimf16 + libc.src.math.floorf16 + libc.src.math.fmaxf16 + libc.src.math.fmaximum_mag_numf16 + libc.src.math.fmaximum_magf16 + libc.src.math.fmaximum_numf16 + libc.src.math.fmaximumf16 + libc.src.math.fminf16 + libc.src.math.fminimum_mag_numf16 + libc.src.math.fminimum_magf16 + libc.src.math.fminimum_numf16 + libc.src.math.fminimumf16 + libc.src.math.fmodf16 + libc.src.math.frexpf16 + libc.src.math.fromfpf16 + libc.src.math.fromfpxf16 + libc.src.math.getpayloadf16 + libc.src.math.ilogbf16 + libc.src.math.ldexpf16 + libc.src.math.llogbf16 + libc.src.math.llrintf16 + libc.src.math.llroundf16 + libc.src.math.log10f16 + libc.src.math.log2f16 + libc.src.math.logbf16 + libc.src.math.logf16 + libc.src.math.lrintf16 + libc.src.math.lroundf16 + libc.src.math.modff16 + libc.src.math.nanf16 + libc.src.math.nearbyintf16 + libc.src.math.nextafterf16 + libc.src.math.nextdownf16 + libc.src.math.nexttowardf16 + libc.src.math.nextupf16 + libc.src.math.remainderf16 + libc.src.math.remquof16 + libc.src.math.rintf16 + libc.src.math.roundevenf16 + libc.src.math.roundf16 + libc.src.math.scalblnf16 + libc.src.math.scalbnf16 + libc.src.math.setpayloadf16 + libc.src.math.setpayloadsigf16 + libc.src.math.sinhf16 + libc.src.math.sqrtf16 + libc.src.math.tanhf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 + libc.src.math.truncf16 + libc.src.math.ufromfpf16 + libc.src.math.ufromfpxf16 + ) +endif() + +set(TARGET_LLVMLIBC_ENTRYPOINTS + ${TARGET_LIBC_ENTRYPOINTS} + ${TARGET_LIBM_ENTRYPOINTS} +) diff --git a/libc/config/gpu/headers.txt b/libc/config/gpu/amdgpu/headers.txt similarity index 100% rename from libc/config/gpu/headers.txt rename to libc/config/gpu/amdgpu/headers.txt diff --git a/libc/config/gpu/nvptx/config.json b/libc/config/gpu/nvptx/config.json new file mode 100644 index 00000000000000..d99f48ecbede1c --- /dev/null +++ b/libc/config/gpu/nvptx/config.json @@ -0,0 +1,37 @@ +{ + "errno": { + "LIBC_CONF_ERRNO_MODE": { + "value": "LIBC_ERRNO_MODE_SHARED" + } + }, + "printf": { + "LIBC_CONF_PRINTF_DISABLE_FLOAT": { + "value": true + }, + "LIBC_CONF_PRINTF_DISABLE_INDEX_MODE": { + "value": true + }, + "LIBC_CONF_PRINTF_DISABLE_WRITE_INT": { + "value": true + }, + "LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE": { + "value": false + }, + "LIBC_CONF_PRINTF_DISABLE_STRERROR": { + "value": true + } + }, + "scanf": { + "LIBC_CONF_SCANF_DISABLE_FLOAT": { + "value": true + }, + "LIBC_CONF_SCANF_DISABLE_INDEX_MODE": { + "value": true + } + }, + "math": { + "LIBC_CONF_MATH_OPTIMIZATIONS": { + "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)" + } + } +} diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt similarity index 99% rename from libc/config/gpu/entrypoints.txt rename to libc/config/gpu/nvptx/entrypoints.txt index b008e0e6684fdd..059dc9b20d6dd2 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/nvptx/entrypoints.txt @@ -376,7 +376,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.frexp libc.src.math.frexpf libc.src.math.frexpl - # FIXME: Broken on NVPTX. + # FIXME: Broken. # libc.src.math.fromfp # libc.src.math.fromfpf # libc.src.math.fromfpl @@ -506,7 +506,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.trunc libc.src.math.truncf libc.src.math.truncl - # FIXME: Broken on NVPTX. + # FIXME: Broken. # libc.src.math.ufromfp # libc.src.math.ufromfpf # libc.src.math.ufromfpl diff --git a/libc/config/gpu/nvptx/headers.txt b/libc/config/gpu/nvptx/headers.txt new file mode 100644 index 00000000000000..fa8ad7c11ba8b1 --- /dev/null +++ b/libc/config/gpu/nvptx/headers.txt @@ -0,0 +1,21 @@ +set(TARGET_PUBLIC_HEADERS + libc.include.assert + libc.include.ctype + libc.include.string + libc.include.strings + libc.include.signal + libc.include.float + libc.include.stdint + libc.include.inttypes + libc.include.limits + libc.include.math + libc.include.fenv + libc.include.time + libc.include.errno + libc.include.stdlib + libc.include.stdio + libc.include.wchar + libc.include.uchar + libc.include.features + libc.include.locale +) diff --git a/libcxx/docs/Hardening.rst b/libcxx/docs/Hardening.rst index 4002f40e1dad33..531065afb8e82b 100644 --- a/libcxx/docs/Hardening.rst +++ b/libcxx/docs/Hardening.rst @@ -407,7 +407,7 @@ Hardened containers status - ✅ - ❌ * - ``forward_list`` - - ❌ + - ✅ - ❌ * - ``deque`` - ✅ diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index c1ab155d5a133e..ea854ea828b3be 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -202,6 +202,7 @@ template # include <__algorithm/lexicographical_compare.h> # include <__algorithm/lexicographical_compare_three_way.h> # include <__algorithm/min.h> +# include <__assert> # include <__config> # include <__cstddef/nullptr_t.h> # include <__iterator/distance.h> @@ -766,8 +767,14 @@ public: return std::min(__node_traits::max_size(this->__alloc_), numeric_limits::max()); } - _LIBCPP_HIDE_FROM_ABI reference front() { return __base::__before_begin()->__next_->__get_value(); } - _LIBCPP_HIDE_FROM_ABI const_reference front() const { return __base::__before_begin()->__next_->__get_value(); } + _LIBCPP_HIDE_FROM_ABI reference front() { + _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list"); + return __base::__before_begin()->__next_->__get_value(); + } + _LIBCPP_HIDE_FROM_ABI const_reference front() const { + _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list"); + return __base::__before_begin()->__next_->__get_value(); + } # ifndef _LIBCPP_CXX03_LANG # if _LIBCPP_STD_VER >= 17 @@ -1085,6 +1092,7 @@ void forward_list<_Tp, _Alloc>::push_front(const value_type& __v) { template void forward_list<_Tp, _Alloc>::pop_front() { + _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::pop_front called on an empty list"); __node_pointer __p = __base::__before_begin()->__next_; __base::__before_begin()->__next_ = __p->__next_; this->__delete_node(__p); diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp index d771f200973528..bd37c5af86f6c3 100644 --- a/libcxx/src/filesystem/operations.cpp +++ b/libcxx/src/filesystem/operations.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -32,11 +33,16 @@ # include # include # include +# include # include #endif #include /* values for fchmodat */ #include +// since Linux 4.5 and FreeBSD 13, but the Linux libc wrapper is only provided by glibc and musl +#if (defined(__linux__) && (defined(__GLIBC__) || _LIBCPP_HAS_MUSL_LIBC)) || defined(__FreeBSD__) +# define _LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE +#endif #if __has_include() # include # define _LIBCPP_FILESYSTEM_USE_SENDFILE @@ -44,10 +50,18 @@ # include # define _LIBCPP_FILESYSTEM_USE_COPYFILE #else -# include # define _LIBCPP_FILESYSTEM_USE_FSTREAM #endif +// sendfile and copy_file_range need to fall back +// to the fstream implementation for special files +#if (defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) || defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || \ + defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)) && \ + _LIBCPP_HAS_LOCALIZATION +# include +# define _LIBCPP_FILESYSTEM_NEED_FSTREAM +#endif + #if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB) # pragma comment(lib, "rt") #endif @@ -178,9 +192,83 @@ void __copy(const path& from, const path& to, copy_options options, error_code* namespace detail { namespace { +#if defined(_LIBCPP_FILESYSTEM_NEED_FSTREAM) +bool copy_file_impl_fstream(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { + ifstream in; + in.__open(read_fd.fd, ios::binary); + if (!in.is_open()) { + // This assumes that __open didn't reset the error code. + ec = capture_errno(); + return false; + } + read_fd.fd = -1; + ofstream out; + out.__open(write_fd.fd, ios::binary); + if (!out.is_open()) { + ec = capture_errno(); + return false; + } + write_fd.fd = -1; + + if (in.good() && out.good()) { + using InIt = istreambuf_iterator; + using OutIt = ostreambuf_iterator; + InIt bin(in); + InIt ein; + OutIt bout(out); + copy(bin, ein, bout); + } + if (out.fail() || in.fail()) { + ec = make_error_code(errc::io_error); + return false; + } + + ec.clear(); + return true; +} +#endif + +#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) +bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { + size_t count = read_fd.get_stat().st_size; + // a zero-length file is either empty, or not copyable by this syscall + // return early to avoid the syscall cost + if (count == 0) { + ec = {EINVAL, generic_category()}; + return false; + } + // do not modify the fd positions as copy_file_impl_sendfile may be called after a partial copy + off_t off_in = 0; + off_t off_out = 0; + do { + ssize_t res; + + if ((res = ::copy_file_range(read_fd.fd, &off_in, write_fd.fd, &off_out, count, 0)) == -1) { + ec = capture_errno(); + return false; + } + count -= res; + } while (count > 0); + + ec.clear(); + + return true; +} +#endif + #if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) -bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { +bool copy_file_impl_sendfile(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { size_t count = read_fd.get_stat().st_size; + // a zero-length file is either empty, or not copyable by this syscall + // return early to avoid the syscall cost + // however, we can't afford this luxury in the no-locale build, + // as we can't utilize the fstream impl to copy empty files +# if _LIBCPP_HAS_LOCALIZATION + if (count == 0) { + ec = {EINVAL, generic_category()}; + return false; + } +# endif do { ssize_t res; if ((res = ::sendfile(write_fd.fd, read_fd.fd, nullptr, count)) == -1) { @@ -194,6 +282,54 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod return true; } +#endif + +#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) +// If we have copy_file_range or sendfile, try both in succession (if available). +// If both fail, fall back to using fstream. +bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { +# if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) + if (copy_file_impl_copy_file_range(read_fd, write_fd, ec)) { + return true; + } + // EINVAL: src and dst are the same file (this is not cheaply + // detectable from userspace) + // EINVAL: copy_file_range is unsupported for this file type by the + // underlying filesystem + // ENOTSUP: undocumented, can arise with old kernels and NFS + // EOPNOTSUPP: filesystem does not implement copy_file_range + // ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed + // with normal copying) + // EXDEV: src and dst are on different filesystems that do not support + // cross-fs copy_file_range + // ENOENT: undocumented, can arise with CIFS + // ENOSYS: unsupported by kernel or blocked by seccomp + if (ec.value() != EINVAL && ec.value() != ENOTSUP && ec.value() != EOPNOTSUPP && ec.value() != ETXTBSY && + ec.value() != EXDEV && ec.value() != ENOENT && ec.value() != ENOSYS) { + return false; + } + ec.clear(); +# endif + +# if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) + if (copy_file_impl_sendfile(read_fd, write_fd, ec)) { + return true; + } + // EINVAL: unsupported file type + if (ec.value() != EINVAL) { + return false; + } + ec.clear(); +# endif + +# if defined(_LIBCPP_FILESYSTEM_NEED_FSTREAM) + return copy_file_impl_fstream(read_fd, write_fd, ec); +# else + // since iostreams are unavailable in the no-locale build, just fail after a failed sendfile + ec.assign(EINVAL, std::system_category()); + return false; +# endif +} #elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE) bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { struct CopyFileState { @@ -217,37 +353,7 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod } #elif defined(_LIBCPP_FILESYSTEM_USE_FSTREAM) bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) { - ifstream in; - in.__open(read_fd.fd, ios::binary); - if (!in.is_open()) { - // This assumes that __open didn't reset the error code. - ec = capture_errno(); - return false; - } - read_fd.fd = -1; - ofstream out; - out.__open(write_fd.fd, ios::binary); - if (!out.is_open()) { - ec = capture_errno(); - return false; - } - write_fd.fd = -1; - - if (in.good() && out.good()) { - using InIt = istreambuf_iterator; - using OutIt = ostreambuf_iterator; - InIt bin(in); - InIt ein; - OutIt bout(out); - copy(bin, ein, bout); - } - if (out.fail() || in.fail()) { - ec = make_error_code(errc::io_error); - return false; - } - - ec.clear(); - return true; + return copy_file_impl_fstream(read_fd, write_fd, ec); } #else # error "Unknown implementation for copy_file_impl" diff --git a/libcxx/test/benchmarks/numeric/gcd.bench.cpp b/libcxx/test/benchmarks/numeric/gcd.bench.cpp index abbc7e9dd04f96..ca5fed59463a21 100644 --- a/libcxx/test/benchmarks/numeric/gcd.bench.cpp +++ b/libcxx/test/benchmarks/numeric/gcd.bench.cpp @@ -25,7 +25,7 @@ static std::array generate(std::uniform_int_distribution distributio static void bm_gcd_random(benchmark::State& state) { std::array data = generate(); - while (state.KeepRunningBatch(data.size())) + while (state.KeepRunningBatch(data.size() * data.size())) for (auto v0 : data) for (auto v1 : data) benchmark::DoNotOptimize(std::gcd(v0, v1)); diff --git a/libcxx/test/libcxx/containers/sequences/forwardlist/assert.pass.cpp b/libcxx/test/libcxx/containers/sequences/forwardlist/assert.pass.cpp new file mode 100644 index 00000000000000..6d1748e6450256 --- /dev/null +++ b/libcxx/test/libcxx/containers/sequences/forwardlist/assert.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Test hardening assertions for std::forward_list. + +// REQUIRES: has-unix-headers +// REQUIRES: libcpp-hardening-mode={{extensive|debug}} +// UNSUPPORTED: c++03 +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +#include + +#include "check_assertion.h" + +int main(int, char**) { + { // Default-constructed list. + std::forward_list c; + const auto& const_c = c; + TEST_LIBCPP_ASSERT_FAILURE(c.front(), "forward_list::front called on an empty list"); + TEST_LIBCPP_ASSERT_FAILURE(const_c.front(), "forward_list::front called on an empty list"); + TEST_LIBCPP_ASSERT_FAILURE(c.pop_front(), "forward_list::pop_front called on an empty list"); + } + + { // Non-empty list becomes empty. + std::forward_list c; + const auto& const_c = c; + c.push_front(1); + + // Check that there's no assertion on valid access. + (void)c.front(); + (void)const_c.front(); + + c.pop_front(); + TEST_LIBCPP_ASSERT_FAILURE(c.pop_front(), "forward_list::pop_front called on an empty list"); + TEST_LIBCPP_ASSERT_FAILURE(c.front(), "forward_list::front called on an empty list"); + TEST_LIBCPP_ASSERT_FAILURE(const_c.front(), "forward_list::front called on an empty list"); + } + + return 0; +} diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp new file mode 100644 index 00000000000000..29bc8e41250d22 --- /dev/null +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 +// REQUIRES: linux +// UNSUPPORTED: no-filesystem +// XFAIL: no-localization +// UNSUPPORTED: availability-filesystem-missing + +// + +// bool copy_file(const path& from, const path& to); +// bool copy_file(const path& from, const path& to, error_code& ec) noexcept; +// bool copy_file(const path& from, const path& to, copy_options options); +// bool copy_file(const path& from, const path& to, copy_options options, +// error_code& ec) noexcept; + +#include +#include +#include + +#include "test_macros.h" +#include "filesystem_test_helper.h" + +namespace fs = std::filesystem; + +// Linux has various virtual filesystems such as /proc and /sys +// where files may have no length (st_size == 0), but still contain data. +// This is because the to-be-read data is usually generated ad-hoc by the reading syscall +// These files can not be copied with kernel-side copies like copy_file_range or sendfile, +// and must instead be copied via a traditional userspace read + write loop. +int main(int, char** argv) { + const fs::path procfile{"/proc/self/comm"}; + assert(file_size(procfile) == 0); + + scoped_test_env env; + std::error_code ec = GetTestEC(); + + const fs::path dest = env.make_env_path("dest"); + + assert(copy_file(procfile, dest, ec)); + assert(!ec); + + // /proc/self/comm contains the filename of the executable, plus a null terminator + assert(file_size(dest) == fs::path(argv[0]).filename().string().size() + 1); + + return 0; +} diff --git a/lldb/include/lldb/Core/Progress.h b/lldb/include/lldb/Core/Progress.h index f6cea282842e1c..5876eae717e96f 100644 --- a/lldb/include/lldb/Core/Progress.h +++ b/lldb/include/lldb/Core/Progress.h @@ -59,6 +59,12 @@ namespace lldb_private { class Progress { public: + /// Enum to indicate the origin of a progress event, internal or external. + enum class Origin : uint8_t { + eInternal = 0, + eExternal = 1, + }; + /// Construct a progress object that will report information. /// /// The constructor will create a unique progress reporting object and @@ -83,7 +89,8 @@ class Progress { Progress(std::string title, std::string details = {}, std::optional total = std::nullopt, lldb_private::Debugger *debugger = nullptr, - Timeout minimum_report_time = std::nullopt); + Timeout minimum_report_time = std::nullopt, + Origin origin = Origin::eInternal); /// Destroy the progress object. /// @@ -118,6 +125,9 @@ class Progress { /// The optional debugger ID to report progress to. If this has no value /// then all debuggers will receive this event. std::optional debugger_id; + + /// The origin of the progress event, wheter it is internal or external. + Origin origin; }; private: diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 0094fcd596fdf7..50d2233509de6f 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1357,6 +1357,8 @@ enum DebuggerBroadcastBit { eBroadcastBitError = (1 << 2), eBroadcastSymbolChange = (1 << 3), eBroadcastBitProgressCategory = (1 << 4), + eBroadcastBitExternalProgress = (1 << 5), + eBroadcastBitExternalProgressCategory = (1 << 6), }; /// Used for expressing severity in logs and diagnostics. diff --git a/lldb/source/Core/Progress.cpp b/lldb/source/Core/Progress.cpp index ed8dfb85639b71..63f98043208094 100644 --- a/lldb/source/Core/Progress.cpp +++ b/lldb/source/Core/Progress.cpp @@ -28,12 +28,14 @@ static llvm::ManagedStatic g_progress_signposts; Progress::Progress(std::string title, std::string details, std::optional total, lldb_private::Debugger *debugger, - Timeout minimum_report_time) + Timeout minimum_report_time, + Progress::Origin origin) : m_total(total.value_or(Progress::kNonDeterministicTotal)), m_minimum_report_time(minimum_report_time), m_progress_data{title, ++g_id, debugger ? std::optional(debugger->GetID()) - : std::nullopt}, + : std::nullopt, + origin}, m_last_report_time_ns( std::chrono::nanoseconds( std::chrono::steady_clock::now().time_since_epoch()) @@ -106,9 +108,15 @@ void Progress::ReportProgress() { if (completed < m_prev_completed) return; // An overflow in the m_completed counter. Just ignore these events. + // Change the category bit if we're an internal or external progress. + uint32_t progress_category_bit = + m_progress_data.origin == Progress::Origin::eExternal + ? lldb::eBroadcastBitExternalProgress + : lldb::eBroadcastBitProgress; + Debugger::ReportProgress(m_progress_data.progress_id, m_progress_data.title, m_details, completed, m_total, - m_progress_data.debugger_id); + m_progress_data.debugger_id, progress_category_bit); m_prev_completed = completed; } @@ -201,10 +209,13 @@ void ProgressManager::ReportProgress( // broadcasting to it since that bit doesn't need that information. const uint64_t completed = (type == EventType::Begin) ? 0 : Progress::kNonDeterministicTotal; + const uint32_t progress_category_bit = + progress_data.origin == Progress::Origin::eExternal + ? lldb::eBroadcastBitExternalProgressCategory + : lldb::eBroadcastBitProgressCategory; Debugger::ReportProgress(progress_data.progress_id, progress_data.title, "", completed, Progress::kNonDeterministicTotal, - progress_data.debugger_id, - lldb::eBroadcastBitProgressCategory); + progress_data.debugger_id, progress_category_bit); } void ProgressManager::Expire(llvm::StringRef key) { diff --git a/lldb/source/Plugins/InstrumentationRuntime/ASanLibsanitizers/InstrumentationRuntimeASanLibsanitizers.cpp b/lldb/source/Plugins/InstrumentationRuntime/ASanLibsanitizers/InstrumentationRuntimeASanLibsanitizers.cpp index cd91f4a6ff1bc2..b1151febb7cc45 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/ASanLibsanitizers/InstrumentationRuntimeASanLibsanitizers.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/ASanLibsanitizers/InstrumentationRuntimeASanLibsanitizers.cpp @@ -90,17 +90,9 @@ void InstrumentationRuntimeASanLibsanitizers::Activate() { if (!process_sp) return; - lldb::ModuleSP module_sp = GetRuntimeModuleSP(); - Breakpoint *breakpoint = ReportRetriever::SetupBreakpoint( - module_sp, process_sp, ConstString("sanitizers_address_on_report")); - - if (!breakpoint) { - breakpoint = ReportRetriever::SetupBreakpoint( - module_sp, process_sp, - ConstString("_Z22raise_sanitizers_error23sanitizer_error_context")); - } - + GetRuntimeModuleSP(), process_sp, + ConstString("sanitizers_address_on_report")); if (!breakpoint) return; diff --git a/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp b/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp index 04ce339d8f6610..74e0fa7d49f82d 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp @@ -219,7 +219,6 @@ bool ReportRetriever::NotifyBreakpointHit(ProcessSP process_sp, return true; // Return true to stop the target } -// FIXME: Setup the breakpoint using a less fragile SPI. rdar://124399066 Breakpoint *ReportRetriever::SetupBreakpoint(ModuleSP module_sp, ProcessSP process_sp, ConstString symbol_name) { @@ -235,19 +234,13 @@ Breakpoint *ReportRetriever::SetupBreakpoint(ModuleSP module_sp, if (!symbol->ValueIsAddress() || !symbol->GetAddressRef().IsValid()) return nullptr; - Target &target = process_sp->GetTarget(); - addr_t symbol_address = symbol->GetAddressRef().GetOpcodeLoadAddress(&target); - - if (symbol_address == LLDB_INVALID_ADDRESS) - return nullptr; - + const Address &address = symbol->GetAddressRef(); const bool internal = true; const bool hardware = false; - Breakpoint *breakpoint = - process_sp->GetTarget() - .CreateBreakpoint(symbol_address, internal, hardware) - .get(); + Breakpoint *breakpoint = process_sp->GetTarget() + .CreateBreakpoint(address, internal, hardware) + .get(); return breakpoint; } diff --git a/lldb/unittests/Core/ProgressReportTest.cpp b/lldb/unittests/Core/ProgressReportTest.cpp index 20324e92523874..0943d7b990809a 100644 --- a/lldb/unittests/Core/ProgressReportTest.cpp +++ b/lldb/unittests/Core/ProgressReportTest.cpp @@ -425,3 +425,104 @@ TEST_F(ProgressReportTest, TestProgressManagerDisjointReports) { ASSERT_FALSE(listener_sp->GetEvent(event_sp, TIMEOUT)); } + +TEST_F(ProgressReportTest, TestExternalReportCreation) { + ListenerSP listener_sp = + CreateListenerFor(lldb::eBroadcastBitExternalProgress); + EventSP event_sp; + const ProgressEventData *data; + + // Scope this for RAII on the progress objects. + // Create progress reports and check that their respective events for having + // started and ended are broadcasted. + { + Progress progress1("Progress report 1", "Starting report 1", + /*total=*/std::nullopt, /*debugger=*/nullptr, + /*minimum_report_time=*/std::chrono::seconds(0), + Progress::Origin::eExternal); + Progress progress2("Progress report 2", "Starting report 2", + /*total=*/std::nullopt, /*debugger=*/nullptr, + /*minimum_report_time=*/std::chrono::seconds(0), + Progress::Origin::eExternal); + Progress progress3("Progress report 3", "Starting report 3", + /*total=*/std::nullopt, /*debugger=*/nullptr, + /*minimum_report_time=*/std::chrono::seconds(0), + Progress::Origin::eExternal); + } + + // Start popping events from the queue, they should have been recevied + // in this order: + // Starting progress: 1, 2, 3 + // Ending progress: 3, 2, 1 + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetDetails(), "Starting report 1"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); + + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetDetails(), "Starting report 2"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetDetails(), "Starting report 3"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + + // Progress report objects should be destroyed at this point so + // get each report from the queue and check that they've been + // destroyed in reverse order. + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetTitle(), "Progress report 3"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetTitle(), "Progress report 2"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); + + EXPECT_EQ(data->GetTitle(), "Progress report 1"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); +} + +TEST_F(ProgressReportTest, TestExternalReportNotReceived) { + ListenerSP listener_sp = CreateListenerFor(lldb::eBroadcastBitProgress); + EventSP event_sp; + + // Scope this for RAII on the progress objects. + // Create progress reports and check that their respective events for having + // started and ended are broadcasted. + { + Progress progress1("External Progress report 1", + "Starting external report 1", + /*total=*/std::nullopt, /*debugger=*/nullptr, + /*minimum_report_time=*/std::chrono::seconds(0), + Progress::Origin::eExternal); + } + + ASSERT_FALSE(listener_sp->GetEvent(event_sp, TIMEOUT)); +} diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 835b910ec452da..0dc63f34806b4c 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -438,6 +438,9 @@ The current vendor extensions supported are: ``experimental-Xqcicli`` LLVM implements `version 0.2 of the Qualcomm uC Conditional Load Immediate extension specification `__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32. +``experimental-Xqcicm`` + LLVM implements `version 0.2 of the Qualcomm uC Conditional Move extension specification `__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32. + ``experimental-Xqcics`` LLVM implements `version 0.2 of the Qualcomm uC Conditional Select extension specification `__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32. diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 8112d996c3b476..dff83d72c3aadb 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -234,6 +234,8 @@ Changes to the RISC-V Backend extension. * Adds experimental assembler support for the Qualcomm uC 'Xqcicli` (Conditional Load Immediate) extension. +* Adds experimental assembler support for the Qualcomm uC 'Xqcicm` (Conditonal Move) + extension. * Added ``Sdext`` and ``Sdtrig`` extensions. Changes to the WebAssembly Backend diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h index fd4ac3164c686d..187642257cc522 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -187,13 +187,17 @@ struct FunctionInfo { /// /// \param Addr The address to lookup. /// + /// \param MergedFuncsData A pointer to an optional DataExtractor that, if + /// non-null, will be set to the raw data of the MergedFunctionInfo, if + /// present. + /// /// \returns An LookupResult or an error describing the issue that was /// encountered during decoding. An error should only be returned if the /// address is not contained in the FunctionInfo or if the data is corrupted. - static llvm::Expected lookup(DataExtractor &Data, - const GsymReader &GR, - uint64_t FuncAddr, - uint64_t Addr); + static llvm::Expected + lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr, + uint64_t Addr, + std::optional *MergedFuncsData = nullptr); uint64_t startAddress() const { return Range.start(); } uint64_t endAddress() const { return Range.end(); } diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h index 3d532588a70234..ee7929ae850fd0 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -127,10 +127,29 @@ class GsymReader { /// is much faster for lookups. /// /// \param Addr A virtual address from the orignal object file to lookup. + /// + /// \param MergedFuncsData A pointer to an optional DataExtractor that, if + /// non-null, will be set to the raw data of the MergedFunctionInfo, if + /// present. + /// /// \returns An expected LookupResult that contains only the information /// needed for the current address, or an error object that indicates reason /// for failing to lookup the address. - llvm::Expected lookup(uint64_t Addr) const; + llvm::Expected + lookup(uint64_t Addr, + std::optional *MergedFuncsData = nullptr) const; + + /// Lookup all merged functions for a given address. + /// + /// This function performs a lookup for the specified address and then + /// retrieves additional LookupResults from any merged functions associated + /// with the primary LookupResult. + /// + /// \param Addr The address to lookup. + /// + /// \returns A vector of LookupResult objects, where the first element is the + /// primary result, followed by results for any merged functions + llvm::Expected> lookupAll(uint64_t Addr) const; /// Get a string from the string table. /// diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h index b68f9b6098d9e6..203fb13cada102 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h @@ -31,6 +31,18 @@ struct MergedFunctionsInfo { /// \returns A boolean indicating if this FunctionInfo is valid. bool isValid() { return !MergedFunctions.empty(); } + /// Get a vector of DataExtractor objects for the functions in this + /// MergedFunctionsInfo object. + /// + /// \param Data The binary stream to read the data from. This object must have + /// the data for the MergedFunctionsInfo object starting at offset zero. The + /// data can contain more data than needed. + /// + /// \returns An llvm::Expected containing a vector of DataExtractor objects on + /// success, or an error object if parsing fails. + static llvm::Expected> + getFuncsDataExtractors(DataExtractor &Data); + /// Decode an MergedFunctionsInfo object from a binary data stream. /// /// \param Data The binary stream to read the data from. This object must have diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 3a018d2a95c6b1..504c24c27d84c4 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -448,19 +449,22 @@ struct MCDCRecord { private: CounterMappingRegion Region; TestVectors TV; - TVPairMap IndependencePairs; + std::optional IndependencePairs; BoolVector Folded; CondIDMap PosToID; LineColPairMap CondLoc; public: MCDCRecord(const CounterMappingRegion &Region, TestVectors &&TV, - TVPairMap &&IndependencePairs, BoolVector &&Folded, - CondIDMap &&PosToID, LineColPairMap &&CondLoc) - : Region(Region), TV(std::move(TV)), - IndependencePairs(std::move(IndependencePairs)), - Folded(std::move(Folded)), PosToID(std::move(PosToID)), - CondLoc(std::move(CondLoc)){}; + BoolVector &&Folded, CondIDMap &&PosToID, LineColPairMap &&CondLoc) + : Region(Region), TV(std::move(TV)), Folded(std::move(Folded)), + PosToID(std::move(PosToID)), CondLoc(std::move(CondLoc)) { + findIndependencePairs(); + } + + // Compare executed test vectors against each other to find an independence + // pairs for each condition. This processing takes the most time. + void findIndependencePairs(); const CounterMappingRegion &getDecisionRegion() const { return Region; } unsigned getNumConditions() const { @@ -493,10 +497,10 @@ struct MCDCRecord { /// TestVectors requires a translation from a ordinal position to actual /// condition ID. This is done via PosToID[]. bool isConditionIndependencePairCovered(unsigned Condition) const { + assert(IndependencePairs); auto It = PosToID.find(Condition); - if (It != PosToID.end()) - return IndependencePairs.contains(It->second); - llvm_unreachable("Condition ID without an Ordinal mapping"); + assert(It != PosToID.end() && "Condition ID without an Ordinal mapping"); + return IndependencePairs->contains(It->second); } /// Return the Independence Pair that covers the given condition. Because @@ -506,7 +510,8 @@ struct MCDCRecord { /// via PosToID[]. TVRowPair getConditionIndependencePair(unsigned Condition) { assert(isConditionIndependencePairCovered(Condition)); - return IndependencePairs[PosToID[Condition]]; + assert(IndependencePairs); + return (*IndependencePairs)[PosToID[Condition]]; } float getPercentCovered() const { diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h index 749b7b2bb5d869..18cd923d5601d0 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -120,12 +120,12 @@ class FunctionImportGlobalProcessing { #endif } - bool run(); + void run(); }; /// Perform in-place global value handling on the given Module for /// exported local functions renamed and promoted for ThinLTO. -bool renameModuleForThinLTO( +void renameModuleForThinLTO( Module &M, const ModuleSummaryIndex &Index, bool ClearDSOLocalOnDeclarations, SetVector *GlobalsToImport = nullptr); diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index dd754c701f6240..785a8da64abe4c 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -235,10 +235,10 @@ llvm::Expected FunctionInfo::encode(FileWriter &Out, return FuncInfoOffset; } -llvm::Expected FunctionInfo::lookup(DataExtractor &Data, - const GsymReader &GR, - uint64_t FuncAddr, - uint64_t Addr) { +llvm::Expected +FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR, + uint64_t FuncAddr, uint64_t Addr, + std::optional *MergedFuncsData) { LookupResult LR; LR.LookupAddr = Addr; uint64_t Offset = 0; @@ -289,6 +289,12 @@ llvm::Expected FunctionInfo::lookup(DataExtractor &Data, return ExpectedLE.takeError(); break; + case InfoType::MergedFunctionsInfo: + // Store the merged functions data for later parsing, if needed. + if (MergedFuncsData) + *MergedFuncsData = InfoData; + break; + case InfoType::InlineInfo: // We will parse the inline info after our line table, but only if // we have a line entry. diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index fa5476db191ec4..0a5bb7caaee8c9 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -334,14 +334,52 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const { return ExpectedData.takeError(); } -llvm::Expected GsymReader::lookup(uint64_t Addr) const { +llvm::Expected +GsymReader::lookup(uint64_t Addr, + std::optional *MergedFunctionsData) const { uint64_t FuncStartAddr = 0; if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) - return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr); + return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr, + MergedFunctionsData); else return ExpectedData.takeError(); } +llvm::Expected> +GsymReader::lookupAll(uint64_t Addr) const { + std::vector Results; + std::optional MergedFunctionsData; + + // First perform a lookup to get the primary function info result. + auto MainResult = lookup(Addr, &MergedFunctionsData); + if (!MainResult) + return MainResult.takeError(); + + // Add the main result as the first entry. + Results.push_back(std::move(*MainResult)); + + // Now process any merged functions data that was found during the lookup. + if (MergedFunctionsData) { + // Get data extractors for each merged function. + auto ExpectedMergedFuncExtractors = + MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData); + if (!ExpectedMergedFuncExtractors) + return ExpectedMergedFuncExtractors.takeError(); + + // Process each merged function data. + for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) { + if (auto FI = FunctionInfo::lookup(MergedData, *this, + MainResult->FuncRange.start(), Addr)) { + Results.push_back(std::move(*FI)); + } else { + return FI.takeError(); + } + } + } + + return Results; +} + void GsymReader::dump(raw_ostream &OS) { const auto &Header = getHeader(); // Dump the GSYM header. diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp index 4efae2262271db..d2c28f38799d3e 100644 --- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp @@ -35,22 +35,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const { llvm::Expected MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) { MergedFunctionsInfo MFI; + auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data); + + if (!FuncExtractorsOrError) + return FuncExtractorsOrError.takeError(); + + for (DataExtractor &FuncData : *FuncExtractorsOrError) { + llvm::Expected FI = FunctionInfo::decode(FuncData, BaseAddr); + if (!FI) + return FI.takeError(); + MFI.MergedFunctions.push_back(std::move(*FI)); + } + + return MFI; +} + +llvm::Expected> +MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) { + std::vector Results; uint64_t Offset = 0; + + // Ensure there is enough data to read the function count. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError( + std::errc::io_error, + "unable to read the function count at offset 0x%8.8" PRIx64, Offset); + uint32_t Count = Data.getU32(&Offset); for (uint32_t i = 0; i < Count; ++i) { + // Ensure there is enough data to read the function size. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError( + std::errc::io_error, + "unable to read size of function %u at offset 0x%8.8" PRIx64, i, + Offset); + uint32_t FnSize = Data.getU32(&Offset); - DataExtractor FnData(Data.getData().substr(Offset, FnSize), + + // Ensure there is enough data for the function content. + if (!Data.isValidOffsetForDataOfSize(Offset, FnSize)) + return createStringError( + std::errc::io_error, + "function data is truncated for function %u at offset 0x%8.8" PRIx64 + ", expected size %u", + i, Offset, FnSize); + + // Extract the function data. + Results.emplace_back(Data.getData().substr(Offset, FnSize), Data.isLittleEndian(), Data.getAddressSize()); - llvm::Expected FI = - FunctionInfo::decode(FnData, BaseAddr + Offset); - if (!FI) - return FI.takeError(); - MFI.MergedFunctions.push_back(std::move(*FI)); + Offset += FnSize; } - - return MFI; + return Results; } bool operator==(const MergedFunctionsInfo &LHS, diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 4522f4adcebe68..189f2876b61c0f 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -160,8 +160,7 @@ generateModuleMap(std::vector> &Modules) { static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index, bool ClearDSOLocalOnDeclarations) { - if (renameModuleForThinLTO(TheModule, Index, ClearDSOLocalOnDeclarations)) - report_fatal_error("renameModuleForThinLTO failed"); + renameModuleForThinLTO(TheModule, Index, ClearDSOLocalOnDeclarations); } namespace { diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index f95e311e09de67..e8f60d2ea82f7e 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -221,6 +221,40 @@ Expected CounterMappingContext::evaluate(const Counter &C) const { return LastPoppedValue; } +// Find an independence pair for each condition: +// - The condition is true in one test and false in the other. +// - The decision outcome is true one test and false in the other. +// - All other conditions' values must be equal or marked as "don't care". +void MCDCRecord::findIndependencePairs() { + if (IndependencePairs) + return; + + IndependencePairs.emplace(); + + unsigned NumTVs = TV.size(); + // Will be replaced to shorter expr. + unsigned TVTrueIdx = std::distance( + TV.begin(), + std::find_if(TV.begin(), TV.end(), + [&](auto I) { return (I.second == MCDCRecord::MCDC_True); }) + + ); + for (unsigned I = TVTrueIdx; I < NumTVs; ++I) { + const auto &[A, ACond] = TV[I]; + assert(ACond == MCDCRecord::MCDC_True); + for (unsigned J = 0; J < TVTrueIdx; ++J) { + const auto &[B, BCond] = TV[J]; + assert(BCond == MCDCRecord::MCDC_False); + // If the two vectors differ in exactly one condition, ignoring DontCare + // conditions, we have found an independence pair. + auto AB = A.getDifferences(B); + if (AB.count() == 1) + IndependencePairs->insert( + {AB.find_first(), std::make_pair(J + 1, I + 1)}); + } + } +} + mcdc::TVIdxBuilder::TVIdxBuilder(const SmallVectorImpl &NextIDs, int Offset) : Indices(NextIDs.size()) { @@ -375,9 +409,6 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { /// ExecutedTestVectorBitmap. MCDCRecord::TestVectors &ExecVectors; - /// Number of False items in ExecVectors - unsigned NumExecVectorsF; - #ifndef NDEBUG DenseSet TVIdxs; #endif @@ -447,34 +478,11 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { // Fill ExecVectors order by False items and True items. // ExecVectors is the alias of ExecVectorsByCond[false], so // Append ExecVectorsByCond[true] on it. - NumExecVectorsF = ExecVectors.size(); auto &ExecVectorsT = ExecVectorsByCond[true]; ExecVectors.append(std::make_move_iterator(ExecVectorsT.begin()), std::make_move_iterator(ExecVectorsT.end())); } - // Find an independence pair for each condition: - // - The condition is true in one test and false in the other. - // - The decision outcome is true one test and false in the other. - // - All other conditions' values must be equal or marked as "don't care". - void findIndependencePairs() { - unsigned NumTVs = ExecVectors.size(); - for (unsigned I = NumExecVectorsF; I < NumTVs; ++I) { - const auto &[A, ACond] = ExecVectors[I]; - assert(ACond == MCDCRecord::MCDC_True); - for (unsigned J = 0; J < NumExecVectorsF; ++J) { - const auto &[B, BCond] = ExecVectors[J]; - assert(BCond == MCDCRecord::MCDC_False); - // If the two vectors differ in exactly one condition, ignoring DontCare - // conditions, we have found an independence pair. - auto AB = A.getDifferences(B); - if (AB.count() == 1) - IndependencePairs.insert( - {AB.find_first(), std::make_pair(J + 1, I + 1)}); - } - } - } - public: /// Process the MC/DC Record in order to produce a result for a boolean /// expression. This process includes tracking the conditions that comprise @@ -510,13 +518,8 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { // Using Profile Bitmap from runtime, mark the executed test vectors. findExecutedTestVectors(); - // Compare executed test vectors against each other to find an independence - // pairs for each condition. This processing takes the most time. - findIndependencePairs(); - // Record Test vectors, executed vectors, and independence pairs. - return MCDCRecord(Region, std::move(ExecVectors), - std::move(IndependencePairs), std::move(Folded), + return MCDCRecord(Region, std::move(ExecVectors), std::move(Folded), std::move(PosToID), std::move(CondLoc)); } }; diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index e8679439c81de3..60ae11b7f42612 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -776,13 +776,14 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { return Result; } - if (ParseTemplateArgValueList(Result.TemplateArgs, CurRec, Result.Rec)) { + SmallVector ArgLocs; + if (ParseTemplateArgValueList(Result.TemplateArgs, ArgLocs, CurRec, + Result.Rec)) { Result.Rec = nullptr; // Error parsing value list. return Result; } - if (CheckTemplateArgValues(Result.TemplateArgs, Result.RefRange.Start, - Result.Rec)) { + if (CheckTemplateArgValues(Result.TemplateArgs, ArgLocs, Result.Rec)) { Result.Rec = nullptr; // Error checking value list. return Result; } @@ -812,7 +813,8 @@ ParseSubMultiClassReference(MultiClass *CurMC) { return Result; } - if (ParseTemplateArgValueList(Result.TemplateArgs, &CurMC->Rec, + SmallVector ArgLocs; + if (ParseTemplateArgValueList(Result.TemplateArgs, ArgLocs, &CurMC->Rec, &Result.MC->Rec)) { Result.MC = nullptr; // Error parsing value list. return Result; @@ -2722,11 +2724,12 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, } SmallVector Args; + SmallVector ArgLocs; Lex.Lex(); // consume the < - if (ParseTemplateArgValueList(Args, CurRec, Class)) + if (ParseTemplateArgValueList(Args, ArgLocs, CurRec, Class)) return nullptr; // Error parsing value list. - if (CheckTemplateArgValues(Args, NameLoc.Start, Class)) + if (CheckTemplateArgValues(Args, ArgLocs, Class)) return nullptr; // Error checking template argument values. if (resolveArguments(Class, Args, NameLoc.Start)) @@ -3201,8 +3204,8 @@ void TGParser::ParseValueList(SmallVectorImpl &Result, // PostionalArgValueList ::= [Value {',' Value}*] // NamedArgValueList ::= [NameValue '=' Value {',' NameValue '=' Value}*] bool TGParser::ParseTemplateArgValueList( - SmallVectorImpl &Result, Record *CurRec, - const Record *ArgsRec) { + SmallVectorImpl &Result, + SmallVectorImpl &ArgLocs, Record *CurRec, const Record *ArgsRec) { assert(Result.empty() && "Result vector is not empty"); ArrayRef TArgs = ArgsRec->getTemplateArgs(); @@ -3217,7 +3220,7 @@ bool TGParser::ParseTemplateArgValueList( return true; } - SMLoc ValueLoc = Lex.getLoc(); + SMLoc ValueLoc = ArgLocs.emplace_back(Lex.getLoc()); // If we are parsing named argument, we don't need to know the argument name // and argument type will be resolved after we know the name. const Init *Value = ParseValue( @@ -4417,11 +4420,15 @@ bool TGParser::ParseFile() { // If necessary, replace an argument with a cast to the required type. // The argument count has already been checked. bool TGParser::CheckTemplateArgValues( - SmallVectorImpl &Values, SMLoc Loc, + SmallVectorImpl &Values, ArrayRef ValuesLocs, const Record *ArgsRec) { + assert(Values.size() == ValuesLocs.size() && + "expected as many values as locations"); + ArrayRef TArgs = ArgsRec->getTemplateArgs(); - for (const ArgumentInit *&Value : Values) { + bool HasError = false; + for (auto [Value, Loc] : llvm::zip_equal(Values, ValuesLocs)) { const Init *ArgName = nullptr; if (Value->isPositional()) ArgName = TArgs[Value->getIndex()]; @@ -4439,16 +4446,16 @@ bool TGParser::CheckTemplateArgValues( "result of template arg value cast has wrong type"); Value = Value->cloneWithValue(CastValue); } else { - PrintFatalError(Loc, "Value specified for template argument '" + - Arg->getNameInitAsString() + "' is of type " + - ArgValue->getType()->getAsString() + - "; expected type " + ArgType->getAsString() + - ": " + ArgValue->getAsString()); + HasError |= Error( + Loc, "Value specified for template argument '" + + Arg->getNameInitAsString() + "' is of type " + + ArgValue->getType()->getAsString() + "; expected type " + + ArgType->getAsString() + ": " + ArgValue->getAsString()); } } } - return false; + return HasError; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index cac1ba827f1138..4509893eefc2c8 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -296,6 +296,7 @@ class TGParser { void ParseValueList(SmallVectorImpl &Result, Record *CurRec, const RecTy *ItemType = nullptr); bool ParseTemplateArgValueList(SmallVectorImpl &Result, + SmallVectorImpl &ArgLocs, Record *CurRec, const Record *ArgsRec); void ParseDagArgList( SmallVectorImpl> &Result, @@ -321,7 +322,8 @@ class TGParser { bool ApplyLetStack(Record *CurRec); bool ApplyLetStack(RecordsEntry &Entry); bool CheckTemplateArgValues(SmallVectorImpl &Values, - SMLoc Loc, const Record *ArgsRec); + ArrayRef ValuesLocs, + const Record *ArgsRec); }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index 737fc7390455d8..e23daec97bd2da 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -512,6 +512,12 @@ def N2Write_8c_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, let NumMicroOps = 7; } +def N2Write_7c_7V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 7; + let NumMicroOps = 7; + let ReleaseAtCycles = [7]; +} + //===----------------------------------------------------------------------===// // Define generic 8 micro-op types @@ -547,6 +553,15 @@ def N2Write_9c_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, let NumMicroOps = 8; } +//===----------------------------------------------------------------------===// +// Define generic 9 micro-op types + +def N2Write_9c_9V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 9; + let NumMicroOps = 9; + let ReleaseAtCycles = [9]; +} + //===----------------------------------------------------------------------===// // Define generic 10 micro-op types @@ -557,6 +572,12 @@ def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, let NumMicroOps = 10; } +def N2Write_10c_10V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 10; + let NumMicroOps = 10; + let ReleaseAtCycles = [10]; +} + //===----------------------------------------------------------------------===// // Define generic 12 micro-op types @@ -580,6 +601,21 @@ def N2Write_7c_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, let NumMicroOps = 15; } +def N2Write_15c_15V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 15; + let NumMicroOps = 15; + let ReleaseAtCycles = [15]; +} + +//===----------------------------------------------------------------------===// +// Define generic 16 micro-op types + +def N2Write_16c_16V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 16; + let NumMicroOps = 16; + let ReleaseAtCycles = [16]; +} + //===----------------------------------------------------------------------===// // Define generic 18 micro-op types @@ -795,22 +831,26 @@ def : SchedAlias; // FP compare def : SchedAlias; +// FP divide and square root operations are performed using an iterative +// algorithm and block subsequent similar operations to the same pipeline +// until complete (Arm Neoverse N2 Software Optimization Guide, 3.14). + // FP divide, square root -def : SchedAlias; +def : SchedAlias; // FP divide, H-form -def : InstRW<[N2Write_7c_1V0], (instrs FDIVHrr)>; +def : InstRW<[N2Write_7c_7V0], (instrs FDIVHrr)>; // FP divide, S-form -def : InstRW<[N2Write_10c_1V0], (instrs FDIVSrr)>; +def : InstRW<[N2Write_10c_10V0], (instrs FDIVSrr)>; // FP divide, D-form -def : InstRW<[N2Write_15c_1V0], (instrs FDIVDrr)>; +def : InstRW<[N2Write_15c_15V0], (instrs FDIVDrr)>; // FP square root, H-form -def : InstRW<[N2Write_7c_1V0], (instrs FSQRTHr)>; +def : InstRW<[N2Write_7c_7V0], (instrs FSQRTHr)>; // FP square root, S-form -def : InstRW<[N2Write_9c_1V0], (instrs FSQRTSr)>; +def : InstRW<[N2Write_9c_9V0], (instrs FSQRTSr)>; // FP square root, D-form -def : InstRW<[N2Write_16c_1V0], (instrs FSQRTDr)>; +def : InstRW<[N2Write_16c_16V0], (instrs FSQRTDr)>; // FP multiply def : WriteRes { let Latency = 3; } diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4fb5cb066be7c3..c2199fd587bea6 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) { case AMDGPU::V_FMAC_F16_e64: return AMDGPU::V_FMA_F16_gfx9_e64; case AMDGPU::V_FMAC_F16_fake16_e64: - return AMDGPU::V_FMA_F16_gfx9_e64; + return AMDGPU::V_FMA_F16_gfx9_fake16_e64; case AMDGPU::V_FMAC_LEGACY_F32_e64: return AMDGPU::V_FMA_LEGACY_F32_e64; case AMDGPU::V_FMAC_F64_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index db672139bdcb97..d5359b6c337664 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3856,6 +3856,36 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, } } +static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) { + switch (Opc) { + case AMDGPU::V_MAC_F16_e32: + case AMDGPU::V_MAC_F16_e64: + return AMDGPU::V_MAD_F16_e64; + case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_MAC_F32_e64: + return AMDGPU::V_MAD_F32_e64; + case AMDGPU::V_MAC_LEGACY_F32_e32: + case AMDGPU::V_MAC_LEGACY_F32_e64: + return AMDGPU::V_MAD_LEGACY_F32_e64; + case AMDGPU::V_FMAC_LEGACY_F32_e32: + case AMDGPU::V_FMAC_LEGACY_F32_e64: + return AMDGPU::V_FMA_LEGACY_F32_e64; + case AMDGPU::V_FMAC_F16_e32: + case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_fake16_e64: + return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64 + : AMDGPU::V_FMA_F16_gfx9_e64; + case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_FMAC_F32_e64: + return AMDGPU::V_FMA_F32_e64; + case AMDGPU::V_FMAC_F64_e32: + case AMDGPU::V_FMAC_F64_e64: + return AMDGPU::V_FMA_F64_e64; + default: + llvm_unreachable("invalid instruction"); + } +} + MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const { @@ -4091,14 +4121,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, if (Src0Literal && !ST.hasVOP3Literal()) return nullptr; - unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 - : IsF64 ? AMDGPU::V_FMA_F64_e64 - : IsLegacy - ? AMDGPU::V_FMA_LEGACY_F32_e64 - : AMDGPU::V_FMA_F32_e64 - : IsF16 ? AMDGPU::V_MAD_F16_e64 - : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64 - : AMDGPU::V_MAD_F32_e64; + unsigned NewOpc = getNewFMAInst(ST, Opc); + if (pseudoToMCOpcode(NewOpc) == -1) return nullptr; @@ -6925,9 +6949,8 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); if (RsrcIdx != -1) { MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); - if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) { + if (Rsrc->isReg() && !RI.isSGPRReg(MRI, Rsrc->getReg())) isRsrcLegal = false; - } } // The operands are legal. @@ -9353,6 +9376,7 @@ static bool isRenamedInGFX9(int Opcode) { case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64: case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: + case AMDGPU::V_FMA_F16_gfx9_fake16_e64: case AMDGPU::V_INTERP_P2_F16: case AMDGPU::V_MAD_F16_e64: case AMDGPU::V_MAD_U16_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d2de8e6c99ed84..b09a7c31dba8eb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3076,7 +3076,7 @@ def : GCNPat< (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>; // If fcanonicalize's operand is implicitly canonicalized, we only need a copy. -let AddedComplexity = 1000 in { +let AddedComplexity = 8 in { foreach vt = [f16, v2f16, f32, v2f32, f64] in { def : GCNPat< (fcanonicalize (vt is_canonicalized:$src)), diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 42df4576a774d5..979812e07fc3f7 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: + case AMDGPU::V_FMA_F16_gfx9_fake16_e64: NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 : AMDGPU::V_FMAAK_F16; break; @@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: + case AMDGPU::V_FMA_F16_gfx9_fake16_e64: NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 : AMDGPU::V_FMAMK_F16; break; @@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { MI.getOpcode() == AMDGPU::V_FMA_F32_e64 || MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_e64 || - MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) { + MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) { shrinkMadFma(MI); continue; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f0d2fe0f4f5478..b9c73e6ce8ef2c 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1020,7 +1020,7 @@ defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069, "v_not_b16">; defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; -defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; +defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index d00c810859e3b8..cef1f20f3420a3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -340,7 +340,7 @@ let FPDPRounding = 1 in { let SubtargetPredicate = isGFX9Plus in { defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>; - defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile, any_fma>; + defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>; } // End SubtargetPredicate = isGFX9Plus } // End FPDPRounding = 1 @@ -1708,7 +1708,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>; defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>; defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>; defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>; -defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">; +defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">; defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">; defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">; defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 30122831767f61..a490910154eb4d 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -698,6 +698,8 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE( RISCV::FeatureVendorXqcicli, DecoderTableXqcicli32, "Qualcomm uC Conditional Load Immediate custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXqcicm, DecoderTableXqcicm32, + "Qualcomm uC Conditional Move custom opcode table"); TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table"); return MCDisassembler::Fail; @@ -727,6 +729,9 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE( RISCV::FeatureVendorXqciac, DecoderTableXqciac16, "Qualcomm uC Load-Store Address Calculation custom 16bit opcode table"); + TRY_TO_DECODE_FEATURE( + RISCV::FeatureVendorXqcicm, DecoderTableXqcicm16, + "Qualcomm uC Conditional Move custom 16bit opcode table"); TRY_TO_DECODE_AND_ADD_SP(STI.hasFeature(RISCV::FeatureVendorXwchc), DecoderTableXwchc16, "WCH QingKe XW custom opcode table"); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 0074be35798ac9..01bc5387e672ec 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1294,6 +1294,14 @@ def HasVendorXqcicli AssemblerPredicate<(all_of FeatureVendorXqcicli), "'Xqcicli' (Qualcomm uC Conditional Load Immediate Extension)">; +def FeatureVendorXqcicm + : RISCVExperimentalExtension<0, 2, "Qualcomm uC Conditional Move Extension", + [FeatureStdExtZca]>; +def HasVendorXqcicm + : Predicate<"Subtarget->hasVendorXqcicm()">, + AssemblerPredicate<(all_of FeatureVendorXqcicm), + "'Xqcicm' (Qualcomm uC Conditional Move Extension)">; + //===----------------------------------------------------------------------===// // LLVM specific features and extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a03a6c8049e315..4a0304f3322ed4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10154,7 +10154,10 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, case ISD::VP_REDUCE_AND: { // vcpop ~x == 0 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); - Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); + if (IsVP || VecVT.isFixedLengthVector()) + Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); + else + Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask); Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); CC = ISD::SETEQ; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 5e6722cb4995e2..6f15646852f91b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -150,6 +150,22 @@ class QCILICC funct3, bits<2> funct2, DAGOperand InTyRs2, string opcodes let Inst{31-25} = {simm, funct2}; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class QCIMVCC funct3, string opcodestr> + : RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd), + (ins GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs3), + opcodestr, "$rd, $rs1, $rs2, $rs3">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class QCIMVCCI funct3, string opcodestr, DAGOperand immType> + : RVInstR4<0b10, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd), + (ins GPRNoX0:$rs1, immType:$imm, GPRNoX0:$rs3), + opcodestr, "$rd, $rs1, $imm, $rs3"> { + bits<5> imm; + + let rs2 = imm; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -270,6 +286,32 @@ let Predicates = [HasVendorXqcicli, IsRV32], DecoderNamespace = "Xqcicli" in { def QC_LIGEUI : QCILICC<0b111, 0b11, uimm5, "qc.ligeui">; } // Predicates = [HasVendorXqcicli, IsRV32], DecoderNamespace = "Xqcicli" +let Predicates = [HasVendorXqcicm, IsRV32], DecoderNamespace = "Xqcicm" in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in + def QC_C_MVEQZ : RVInst16CL<0b101, 0b10, (outs GPRC:$rd_wb), + (ins GPRC:$rd, GPRC:$rs1), + "qc.c.mveqz", "$rd, $rs1"> { + let Constraints = "$rd = $rd_wb"; + + let Inst{12-10} = 0b011; + let Inst{6-5} = 0b00; + } + + def QC_MVEQ : QCIMVCC<0b000, "qc.mveq">; + def QC_MVNE : QCIMVCC<0b001, "qc.mvne">; + def QC_MVLT : QCIMVCC<0b100, "qc.mvlt">; + def QC_MVGE : QCIMVCC<0b101, "qc.mvge">; + def QC_MVLTU : QCIMVCC<0b110, "qc.mvltu">; + def QC_MVGEU : QCIMVCC<0b111, "qc.mvgeu">; + + def QC_MVEQI : QCIMVCCI<0b000, "qc.mveqi", simm5>; + def QC_MVNEI : QCIMVCCI<0b001, "qc.mvnei", simm5>; + def QC_MVLTI : QCIMVCCI<0b100, "qc.mvlti", simm5>; + def QC_MVGEI : QCIMVCCI<0b101, "qc.mvgei", simm5>; + def QC_MVLTUI : QCIMVCCI<0b110, "qc.mvltui", uimm5>; + def QC_MVGEUI : QCIMVCCI<0b111, "qc.mvgeui", uimm5>; +} // Predicates = [HasVendorXqcicm, IsRV32], DecoderNamespace = "Xqcicm" + //===----------------------------------------------------------------------===// // Aliases //===----------------------------------------------------------------------===// diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 4f403e9fb6f574..d6e1eac0d85af4 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -742,8 +742,8 @@ Error RISCVISAInfo::checkDependency() { bool HasZvl = MinVLen != 0; bool HasZcmt = Exts.count("zcmt") != 0; static constexpr StringLiteral XqciExts[] = { - {"xqcia"}, {"xqciac"}, {"xqcicli"}, {"xqcics"}, - {"xqcicsr"}, {"xqcilsm"}, {"xqcisls"}}; + {"xqcia"}, {"xqciac"}, {"xqcicli"}, {"xqcicm"}, + {"xqcics"}, {"xqcicsr"}, {"xqcilsm"}, {"xqcisls"}}; if (HasI && HasE) return getIncompatibleError("i", "e"); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index fde43bb354e83e..c3d0a1a3a046eb 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1950,9 +1950,8 @@ Expected FunctionImporter::importFunctions( SrcModule->setPartialSampleProfileRatio(Index); // Link in the specified functions. - if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations, - &GlobalsToImport)) - return true; + renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations, + &GlobalsToImport); if (PrintImports) { for (const auto *GV : GlobalsToImport) @@ -2026,11 +2025,8 @@ static bool doImportingForModuleForTest( // Next we need to promote to global scope and rename any local values that // are potentially exported to other modules. - if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false, - /*GlobalsToImport=*/nullptr)) { - errs() << "Error renaming module\n"; - return true; - } + renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false, + /*GlobalsToImport=*/nullptr); // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 766c7501550da5..ae1af943bc11c7 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -331,15 +331,12 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { } } -bool FunctionImportGlobalProcessing::run() { - processGlobalsForThinLTO(); - return false; -} +void FunctionImportGlobalProcessing::run() { processGlobalsForThinLTO(); } -bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, +void llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, bool ClearDSOLocalOnDeclarations, SetVector *GlobalsToImport) { FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport, ClearDSOLocalOnDeclarations); - return ThinLTOProcessing.run(); + ThinLTOProcessing.run(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 395287bde76f37..3e3f5adf73a0f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step); } +static SmallVector collectUsersRecursively(VPValue *V) { + SetVector Users(V->user_begin(), V->user_end()); + for (unsigned I = 0; I != Users.size(); ++I) { + VPRecipeBase *Cur = cast(Users[I]); + if (isa(Cur)) + continue; + for (VPValue *V : Cur->definedValues()) + Users.insert(V->user_begin(), V->user_end()); + } + return Users.takeVector(); +} + /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as /// VPWidenPointerInductionRecipe will generate vectors only. If some users /// require vectors while other require scalars, the scalar uses need to extract /// the scalars from the generated vectors (Note that this is different to how -/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe, -/// if any of its users needs scalar values, by providing them scalar steps -/// built on the canonical scalar IV and update the original IV's users. This is -/// an optional optimization to reduce the needs of vector extracts. +/// int/fp inductions are handled). Legalize extract-from-ends using uniform +/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so +/// the correct end value is available. Also optimize +/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by +/// providing them scalar steps built on the canonical scalar IV and update the +/// original IV's users. This is an optional optimization to reduce the needs of +/// vector extracts. static void legalizeAndOptimizeInductions(VPlan &Plan) { + using namespace llvm::VPlanPatternMatch; SmallVector ToRemove; VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi()); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *PhiR = dyn_cast(&Phi); + if (!PhiR) + break; + + // Check if any uniform VPReplicateRecipes using the phi recipe are used by + // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to + // ensure the final value is available. + // TODO: Remove once uniformity analysis is done on VPlan. + for (VPUser *U : collectUsersRecursively(PhiR)) { + auto *ExitIRI = dyn_cast(U); + VPValue *Op; + if (!ExitIRI || !match(ExitIRI->getOperand(0), + m_VPInstruction( + m_VPValue(Op), m_VPValue()))) + continue; + auto *RepR = dyn_cast(Op); + if (!RepR || !RepR->isUniform()) + continue; + assert(!RepR->isPredicated() && "RepR must not be predicated"); + Instruction *I = RepR->getUnderlyingInstr(); + auto *Clone = + new VPReplicateRecipe(I, RepR->operands(), /*IsUniform*/ false); + Clone->insertAfter(RepR); + RepR->replaceAllUsesWith(Clone); + } + // Replace wide pointer inductions which have only their scalars used by // PtrAdd(IndStart, ScalarIVSteps (0, Step)). if (auto *PtrIV = dyn_cast(&Phi)) { @@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, return true; } -static SmallVector collectUsersRecursively(VPValue *V) { - SetVector Users(V->user_begin(), V->user_end()); - for (unsigned I = 0; I != Users.size(); ++I) { - VPRecipeBase *Cur = cast(Users[I]); - if (isa(Cur)) - continue; - for (VPValue *V : Cur->definedValues()) - Users.insert(V->user_begin(), V->user_end()); - } - return Users.takeVector(); -} - void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { for (VPRecipeBase &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index 70a638857ce4a9..c8df283aace0b1 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) { ; CHECK-UNSAFE-LABEL: reassociate_adds_half: ; CHECK-UNSAFE: // %bb.0: ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fadd h1, h3, h2 -; CHECK-UNSAFE-NEXT: fadd h0, h1, h0 +; CHECK-UNSAFE-NEXT: fadd h2, h3, h2 +; CHECK-UNSAFE-NEXT: fadd h0, h2, h0 ; CHECK-UNSAFE-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fadd half %x2, %t0 @@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) { ; CHECK-UNSAFE-LABEL: reassociate_muls_half: ; CHECK-UNSAFE: // %bb.0: ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fmul h1, h3, h2 -; CHECK-UNSAFE-NEXT: fmul h0, h1, h0 +; CHECK-UNSAFE-NEXT: fmul h2, h3, h2 +; CHECK-UNSAFE-NEXT: fmul h0, h2, h0 ; CHECK-UNSAFE-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fmul half %x2, %t0 diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll index 005e40159f61be..822d40f7349b0f 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -5,6 +5,8 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL declare half @llvm.fma.f16(half, half, half) declare half @llvm.maxnum.f16(half, half) @@ -27,6 +29,16 @@ define half @test_fma(half %x, half %y, half %z) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: test_fma: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half %y, half %z) ret half %r } @@ -50,6 +62,16 @@ define half @test_fmac(half %x, half %y, half %z) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fmac_f16_e32 v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: test_fmac: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_fmac_f16_e32 v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %y, half %z, half %x) ret half %r } @@ -81,6 +103,16 @@ define half @test_fmaak(half %x, half %y, half %z) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: test_fmaak: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 +; GFX12-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200) ret half %r } @@ -112,6 +144,16 @@ define half @test_fmamk(half %x, half %y, half %z) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: test_fmamk: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z) ret half %r } @@ -193,6 +235,42 @@ define i32 @test_D139469_f16(half %arg) { ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: test_D139469_f16: +; GFX12-SDAG: ; %bb.0: ; %bb +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX12-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX12-SDAG-NEXT: v_min_num_f16_e32 v0, v2, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: test_D139469_f16: +; GFX12-GISEL: ; %bb.0: ; %bb +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX12-GISEL-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v1 +; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] bb: %i = fmul contract half %arg, 0xH291E %i1 = fcmp olt half %i, 0xH0000 @@ -306,6 +384,55 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) { ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: test_D139469_v2f16: +; GFX12-SDAG: ; %bb.0: ; %bb +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0x211e +; GFX12-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v1, v0 +; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1 +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: test_D139469_v2f16: +; GFX12-GISEL: ; %bb.0: ; %bb +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e +; GFX12-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 +; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v0 +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s1, 0, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s2, 0, v3 +; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_or_b32 s0, s1, s2 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] bb: %i = fmul contract <2 x half> %arg, %i1 = fcmp olt <2 x half> %i, diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir index cefd24032871f4..85c65778933964 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir @@ -18,7 +18,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = COPY %0.sub0 @@ -43,7 +43,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = COPY %0.sub0 @@ -68,7 +68,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub0 %2 = COPY %0.sub1 @@ -90,7 +90,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 %0:vgpr_32 = COPY killed $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/swdev502267-use-after-free-last-chance-recoloring-alloc-succeeds.mir b/llvm/test/CodeGen/AMDGPU/swdev502267-use-after-free-last-chance-recoloring-alloc-succeeds.mir index 3135e3c60f5f11..b213f0eb982b3c 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev502267-use-after-free-last-chance-recoloring-alloc-succeeds.mir +++ b/llvm/test/CodeGen/AMDGPU/swdev502267-use-after-free-last-chance-recoloring-alloc-succeeds.mir @@ -1,5 +1,8 @@ +# XFAIL: expensive_checks +# FIXME: This fails the machine verifier after allocation + # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -stress-regalloc=4 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -stress-regalloc=4 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s # This testcase hit a situation where greedy would hit a use after # free during last chance recoloring. This case successfully allocates diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll new file mode 100644 index 00000000000000..6849c8b4e609ee --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll @@ -0,0 +1,23 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O0 2> %t.err < %s | FileCheck %s +; RUN: FileCheck -check-prefix=ERR %s < %t.err + +; FIXME: This error will be fixed by supporting arbitrary divergent +; dynamic allocas by performing a wave umax of the size. + +; ERR: error: :0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy + +; CHECK: ; illegal copy v0 to s32 + +define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) { +entry: + %idx = load i32, ptr addrspace(1) %ptr, align 4 + %zero = extractelement <4 x i32> zeroinitializer, i32 %idx + %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5) + %ld = load i32, ptr addrspace(5) %alloca, align 8 + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 2048, i1 false) + ret i32 %ld +} + +declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0 + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 7e55e0590ec598..c0fcc6f6111118 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -84,6 +84,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcia %s -o - | FileCheck --check-prefix=RV32XQCIA %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqciac %s -o - | FileCheck --check-prefix=RV32XQCIAC %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicli %s -o - | FileCheck --check-prefix=RV32XQCICLI %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm %s -o - | FileCheck --check-prefix=RV32XQCICM %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics %s -o - | FileCheck --check-prefix=RV32XQCICS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicsr %s -o - | FileCheck --check-prefix=RV32XQCICSR %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcilsm %s -o - | FileCheck --check-prefix=RV32XQCILSM %s @@ -397,6 +398,7 @@ ; RV32XQCIA: .attribute 5, "rv32i2p1_xqcia0p2" ; RV32XQCIAC: .attribute 5, "rv32i2p1_zca1p0_xqciac0p2" ; RV32XQCICLI: .attribute 5, "rv32i2p1_xqcicli0p2" +; RV32XQCICM: .attribute 5, "rv32i2p1_zca1p0_xqcicm0p2" ; RV32XQCICS: .attribute 5, "rv32i2p1_xqcics0p2" ; RV32XQCICSR: .attribute 5, "rv32i2p1_xqcicsr0p2" ; RV32XQCILSM: .attribute 5, "rv32i2p1_xqcilsm0p2" diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll index d99fd036b4fc92..ce9d6c5ab91a8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -785,8 +785,7 @@ define zeroext i1 @vreduce_and_nxv128i1( %v) { ; CHECK-LABEL: vreduce_and_nxv128i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmand.mm v8, v0, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v0, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -814,8 +813,7 @@ define zeroext i1 @vreduce_smax_nxv128i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv128i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmand.mm v8, v0, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v0, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -829,8 +827,7 @@ define zeroext i1 @vreduce_umin_nxv128i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv128i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmand.mm v8, v0, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v0, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -892,8 +889,7 @@ define zeroext i1 @vreduce_and_nxv256i1( %v) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v0, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -925,8 +921,7 @@ define zeroext i1 @vreduce_smax_nxv256i1( %v) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v0, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -942,8 +937,7 @@ define zeroext i1 @vreduce_umin_nxv256i1( %v) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v0, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1019,8 +1013,7 @@ define zeroext i1 @vreduce_and_nxv512i1( %v) { ; CHECK-NEXT: vmand.mm v11, v0, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1060,8 +1053,7 @@ define zeroext i1 @vreduce_smax_nxv512i1( %v) { ; CHECK-NEXT: vmand.mm v11, v0, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1081,8 +1073,7 @@ define zeroext i1 @vreduce_umin_nxv512i1( %v) { ; CHECK-NEXT: vmand.mm v11, v0, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1186,8 +1177,7 @@ define zeroext i1 @vreduce_and_nxv1024i1( %v) { ; CHECK-NEXT: vmand.mm v11, v15, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1243,8 +1233,7 @@ define zeroext i1 @vreduce_smax_nxv1024i1( %v) { ; CHECK-NEXT: vmand.mm v11, v15, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret @@ -1272,8 +1261,7 @@ define zeroext i1 @vreduce_umin_nxv1024i1( %v) { ; CHECK-NEXT: vmand.mm v11, v15, v11 ; CHECK-NEXT: vmand.mm v8, v8, v10 ; CHECK-NEXT: vmand.mm v9, v11, v9 -; CHECK-NEXT: vmand.mm v8, v9, v8 -; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmnand.mm v8, v9, v8 ; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll index 2e0eb8c429ac27..b1625c07111e44 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll @@ -15,7 +15,7 @@ entry: ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16]] ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_16]] ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_16]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]] - %hlsl.cross = call <3 x half> @llvm.spv.cross.v4f16(<3 x half> %a, <3 x half> %b) + %hlsl.cross = call <3 x half> @llvm.spv.cross.v3f16(<3 x half> %a, <3 x half> %b) ret <3 x half> %hlsl.cross } @@ -25,9 +25,9 @@ entry: ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32]] ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_32]] ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_32]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]] - %hlsl.cross = call <3 x float> @llvm.spv.cross.v4f32(<3 x float> %a, <3 x float> %b) + %hlsl.cross = call <3 x float> @llvm.spv.cross.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %hlsl.cross } -declare <3 x half> @llvm.spv.cross.v4f16(<3 x half>, <3 x half>) -declare <3 x float> @llvm.spv.cross.v4f32(<3 x float>, <3 x float>) +declare <3 x half> @llvm.spv.cross.v3f16(<3 x half>, <3 x half>) +declare <3 x float> @llvm.spv.cross.v3f32(<3 x float>, <3 x float>) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll index b4a9d8e0664b7e..1ac862b79a3fac 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll @@ -11,19 +11,21 @@ define noundef half @length_half4(<4 x half> noundef %a) { entry: - ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Length %[[#arg0]] - %hlsl.length = call half @llvm.spv.length.v4f16(<4 x half> %a) + %hlsl.length = call half @llvm.spv.length.f16(<4 x half> %a) ret half %hlsl.length } define noundef float @length_float4(<4 x float> noundef %a) { entry: - ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Length %[[#arg0]] - %hlsl.length = call float @llvm.spv.length.v4f32(<4 x float> %a) + %hlsl.length = call float @llvm.spv.length.f32(<4 x float> %a) ret float %hlsl.length } -declare half @llvm.spv.length.v4f16(<4 x half>) -declare float @llvm.spv.length.v4f32(<4 x float>) +declare half @llvm.spv.length.f16(<4 x half>) +declare float @llvm.spv.length.f32(<4 x float>) diff --git a/llvm/test/CodeGen/SPIRV/opencl/degrees.ll b/llvm/test/CodeGen/SPIRV/opencl/degrees.ll index 88f97835fe7194..b8d4f52a287959 100644 --- a/llvm/test/CodeGen/SPIRV/opencl/degrees.ll +++ b/llvm/test/CodeGen/SPIRV/opencl/degrees.ll @@ -3,7 +3,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "OpenCL.std" +; CHECK-DAG: %[[#op_ext_ocl:]] = OpExtInstImport "OpenCL.std" ; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 ; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 @@ -20,7 +20,7 @@ declare <4 x half> @llvm.spv.degrees.v4f16(<4 x half>) define noundef float @degrees_float(float noundef %a) { entry: ; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] -; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] degrees %[[#float_32_arg]] +; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_ocl]] degrees %[[#float_32_arg]] %elt.degrees = call float @llvm.spv.degrees.f32(float %a) ret float %elt.degrees } @@ -28,7 +28,7 @@ entry: define noundef half @degrees_half(half noundef %a) { entry: ; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] -; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] degrees %[[#float_16_arg]] +; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_ocl]] degrees %[[#float_16_arg]] %elt.degrees = call half @llvm.spv.degrees.f16(half %a) ret half %elt.degrees } @@ -36,7 +36,7 @@ entry: define noundef <4 x float> @degrees_float_vector(<4 x float> noundef %a) { entry: ; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] -; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] degrees %[[#vec4_float_32_arg]] +; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_ocl]] degrees %[[#vec4_float_32_arg]] %elt.degrees = call <4 x float> @llvm.spv.degrees.v4f32(<4 x float> %a) ret <4 x float> %elt.degrees } @@ -44,7 +44,7 @@ entry: define noundef <4 x half> @degrees_half_vector(<4 x half> noundef %a) { entry: ; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] -; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] degrees %[[#vec4_float_16_arg]] +; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_ocl]] degrees %[[#vec4_float_16_arg]] %elt.degrees = call <4 x half> @llvm.spv.degrees.v4f16(<4 x half> %a) ret <4 x half> %elt.degrees } diff --git a/llvm/test/CodeGen/SPIRV/opencl/radians.ll b/llvm/test/CodeGen/SPIRV/opencl/radians.ll index f7bb8d5226cd19..5b4f26a13a4c25 100644 --- a/llvm/test/CodeGen/SPIRV/opencl/radians.ll +++ b/llvm/test/CodeGen/SPIRV/opencl/radians.ll @@ -3,7 +3,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "OpenCL.std" +; CHECK-DAG: %[[#op_ext_ocl:]] = OpExtInstImport "OpenCL.std" ; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 ; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 @@ -20,7 +20,7 @@ declare <4 x half> @llvm.spv.radians.v4f16(<4 x half>) define noundef float @radians_float(float noundef %a) { entry: ; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] -; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] radians %[[#float_32_arg]] +; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_ocl]] radians %[[#float_32_arg]] %elt.radians = call float @llvm.spv.radians.f32(float %a) ret float %elt.radians } @@ -28,7 +28,7 @@ entry: define noundef half @radians_half(half noundef %a) { entry: ; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] -; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] radians %[[#float_16_arg]] +; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_ocl]] radians %[[#float_16_arg]] %elt.radians = call half @llvm.spv.radians.f16(half %a) ret half %elt.radians } @@ -36,7 +36,7 @@ entry: define noundef <4 x float> @radians_float_vector(<4 x float> noundef %a) { entry: ; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] -; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] radians %[[#vec4_float_32_arg]] +; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_ocl]] radians %[[#vec4_float_32_arg]] %elt.radians = call <4 x float> @llvm.spv.radians.v4f32(<4 x float> %a) ret <4 x float> %elt.radians } @@ -44,7 +44,7 @@ entry: define noundef <4 x half> @radians_half_vector(<4 x half> noundef %a) { entry: ; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] -; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] radians %[[#vec4_float_16_arg]] +; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_ocl]] radians %[[#vec4_float_16_arg]] %elt.radians = call <4 x half> @llvm.spv.radians.v4f16(<4 x half> %a) ret <4 x half> %elt.radians } diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index 2480be97a7a646..1aefd1f0a7d192 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -1706,11 +1706,11 @@ v_cvt_u32_f64 v5, src_scc v_cvt_u32_f64 v255, 0xaf123456 // GFX11: v_cvt_u32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x2a,0xfe,0x7f,0x56,0x34,0x12,0xaf] -v_cvt_u32_u16 v5, v1 -// GFX11: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e] +v_cvt_u32_u16 v5, v1.l +// GFX11: v_cvt_u32_u16_e32 v5, v1.l ; encoding: [0x01,0xd7,0x0a,0x7e] -v_cvt_u32_u16 v5, v127 -// GFX11: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e] +v_cvt_u32_u16 v5, v127.l +// GFX11: v_cvt_u32_u16_e32 v5, v127.l ; encoding: [0x7f,0xd7,0x0a,0x7e] v_cvt_u32_u16 v5, s1 // GFX11: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e] @@ -1751,6 +1751,12 @@ v_cvt_u32_u16 v5, src_scc v_cvt_u32_u16 v255, 0xfe0b // GFX11: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] +v_cvt_u32_u16 v5, v1.h +// GFX11: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e] + +v_cvt_u32_u16 v5, v127.h +// GFX11: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e] + v_exp_f16 v5.l, v1.l // GFX11: v_exp_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb1,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index 0f77279397485e..2bdb9ecfb7658b 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -1280,47 +1280,56 @@ v_cvt_u32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cvt_u32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_u32_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x35,0x30] -v_cvt_u32_u16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_u32_u16 v5, v1.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] -v_cvt_u32_u16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_u32_u16 v5, v1.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] -v_cvt_u32_u16 v5, v1 row_mirror -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_mirror +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_half_mirror -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_half_mirror +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shl:1 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shl:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shl:15 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shl:15 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shr:1 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shr:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shr:15 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shr:15 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_ror:1 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_ror:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_ror:15 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_ror:15 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_cvt_u32_u16 v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] -v_cvt_u32_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_cvt_u32_u16 v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] -v_cvt_u32_u16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30] +v_cvt_u32_u16 v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cvt_u32_u16_dpp v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30] + +v_cvt_u32_u16 v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_u32_u16_dpp v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x7f,0x5f,0x01,0x01] + +v_cvt_u32_u16 v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_u32_u16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x09,0x13] + +v_cvt_u32_u16 v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_u32_u16_dpp v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x05,0x30] v_exp_f16 v5.l, v1.l quad_perm:[3,2,1,0] // GFX11: v_exp_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index 4a89305a5b353c..ba0c3495de2bbe 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -317,14 +317,23 @@ v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_u32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_u32_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_cvt_u32_u16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +v_cvt_u32_u16 v255, v127.l dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cvt_u32_u16_dpp v255, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +v_cvt_u32_u16 v5, v127.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_dpp v5, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x7f,0x77,0x39,0x05] + +v_cvt_u32_u16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_u32_u16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] + +v_cvt_u32_u16 v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_u32_u16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] v_exp_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_exp_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s index 7d29adcd73cccf..dea33dc4c5cfbc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -431,6 +431,24 @@ v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction +v_cvt_u32_u16_e32 v5.h, v199.h +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5.h, v199.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5.l, v199.l +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5.l, v199.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + v_exp_f16_e32 v128.h, 0xfe0b // GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s index f2dbb782186f67..5cb81c640f4135 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s @@ -1142,14 +1142,23 @@ v_cvt_u16_f16 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0] v_cvt_u16_f16 v5.l, v199.l quad_perm:[3,2,1,0] // GFX11: v_cvt_u16_f16_e64_dpp v5.l, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] -v_cvt_u32_u16 v5, v199 -// GFX11: v_cvt_u32_u16_e64 v5, v199 ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00] +v_cvt_u32_u16 v5, v199.h +// GFX11: v_cvt_u32_u16_e64 v5, v199.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xc7,0x01,0x00,0x00] -v_cvt_u32_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_u32_u16_e64_dpp v5, v199 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] -v_cvt_u32_u16 v5, v199 quad_perm:[3,2,1,0] -// GFX11: v_cvt_u32_u16_e64_dpp v5, v199 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] +v_cvt_u32_u16 v5, v199.h quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] + +v_cvt_u32_u16 v5, v199.l +// GFX11: v_cvt_u32_u16_e64 v5, v199.l ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00] + +v_cvt_u32_u16 v5, v199.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] + +v_cvt_u32_u16 v5, v199.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] v_exp_f16 v128, 0xfe0b // GFX11: v_exp_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xd8,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 628edf7400576d..fc8d2bdc0540a3 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -2231,53 +2231,77 @@ v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_fma_f16 v5, v1, v2, s3 -// GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +v_fma_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -v_fma_f16 v5, v255, s2, s105 -// GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +v_fma_f16 v5.l, v255.l, s2, s105 +// GFX11: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -v_fma_f16 v5, s1, v255, exec_hi -// GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +v_fma_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -v_fma_f16 v5, s105, s105, exec_lo -// GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +v_fma_f16 v5.l, s105, s105, exec_lo +// GFX11: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -v_fma_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -v_fma_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -v_fma_f16 v5, m0, 0.5, m0 -// GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +v_fma_f16 v5.l, m0, 0.5, m0 +// GFX11: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -v_fma_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_fma_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_fma_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX11: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 -// GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 +// GFX11: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5.l, v255.h, s2, s105 +// GFX11: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_fma_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x48,0xd6,0xf0,0xfa,0xc0,0x43] + +v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x48,0xd6,0xfd,0xd4,0x04,0x23] + +v_fma_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: v_fma_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] v_fma_f32 v5, v1, v2, s3 // GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index acbdcfc39d983a..f71569433d3264 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1508,47 +1508,83 @@ v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 ban v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4833,20 +4869,20 @@ v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s index b0a9478203a341..c26834c5d45b1e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -1342,47 +1342,50 @@ v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x05,0x30] -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_mirror -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] -v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] +v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_u32_u16_e64_dpp v255, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] // GFX11: v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index cef501703e69da..2ececc0c78ecdc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -814,41 +814,74 @@ v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x48,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x48,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x48,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3130,20 +3163,20 @@ v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s index eae5d3e799ba79..259be1da2f664e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -397,14 +397,17 @@ v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_u32_u16_e64_dpp v255, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x08,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] v_exp_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_exp_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s index 9ecae211ecd86e..379cf0628138ec 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s @@ -1702,11 +1702,11 @@ v_cvt_u32_f64_e64 v5, -|src_scc| v_cvt_u32_f64_e64 v255, 0xaf123456 clamp // GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_u32_u16_e64 v5, v1 -// GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +v_cvt_u32_u16_e64 v5, v1.l +// GFX11: v_cvt_u32_u16_e64 v5, v1.l ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] -v_cvt_u32_u16_e64 v5, v255 -// GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +v_cvt_u32_u16_e64 v5, v255.l +// GFX11: v_cvt_u32_u16_e64 v5, v255.l ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] v_cvt_u32_u16_e64 v5, s1 // GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] @@ -1747,6 +1747,9 @@ v_cvt_u32_u16_e64 v5, src_scc v_cvt_u32_u16_e64 v255, 0xfe0b // GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_cvt_u32_u16_e64 v5, v255.h +// GFX11: [0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00] + v_exp_f16_e64 v5, v1 // GFX11: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index 089ad41448f007..e21e5bf827ed17 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -1789,11 +1789,11 @@ v_cvt_u32_f64 v5, src_scc v_cvt_u32_f64 v255, 0xaf123456 // GFX12: v_cvt_u32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x2a,0xfe,0x7f,0x56,0x34,0x12,0xaf] -v_cvt_u32_u16 v5, v1 -// GFX12: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e] +v_cvt_u32_u16 v5, v1.l +// GFX12: v_cvt_u32_u16_e32 v5, v1.l ; encoding: [0x01,0xd7,0x0a,0x7e] -v_cvt_u32_u16 v5, v127 -// GFX12: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e] +v_cvt_u32_u16 v5, v127.l +// GFX12: v_cvt_u32_u16_e32 v5, v127.l ; encoding: [0x7f,0xd7,0x0a,0x7e] v_cvt_u32_u16 v5, s1 // GFX12: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e] @@ -1835,6 +1835,12 @@ v_cvt_u32_u16 v5, src_scc v_cvt_u32_u16 v255, 0xfe0b // GFX12: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] +v_cvt_u32_u16 v5, v1.h +// GFX12: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e] + +v_cvt_u32_u16 v5, v127.h +// GFX12: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e] + v_exp_f16 v5.l, v1.l // GFX12: v_exp_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb1,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s index fc6b9f396a6a7f..e821fb30edac1a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s @@ -1336,47 +1336,53 @@ v_cvt_u32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cvt_u32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_u32_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x35,0x30] -v_cvt_u32_u16 v5, v1 quad_perm:[3,2,1,0] -// GFX12: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_u32_u16 v5, v1.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] -v_cvt_u32_u16 v5, v1 quad_perm:[0,1,2,3] -// GFX12: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_u32_u16 v5, v1.l quad_perm:[0,1,2,3] +// GFX12: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] -v_cvt_u32_u16 v5, v1 row_mirror -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_mirror +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_half_mirror -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_half_mirror +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shl:1 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shl:1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shl:15 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shl:15 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shr:1 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shr:1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_shr:15 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_shr:15 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_ror:1 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_ror:1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_ror:15 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_ror:15 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_cvt_u32_u16 v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] -v_cvt_u32_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_cvt_u32_u16 v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] -v_cvt_u32_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_cvt_u32_u16 v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] -v_cvt_u32_u16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30] +v_cvt_u32_u16 v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_u32_u16_dpp v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30] + +v_cvt_u32_u16 v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_u32_u16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x09,0x13] + +v_cvt_u32_u16 v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_u32_u16_dpp v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x05,0x30] v_exp_f16 v5, v1 quad_perm:[3,2,1,0] // GFX12: v_exp_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s index a77b95e1ef0cde..ecf408ee854451 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s @@ -346,14 +346,20 @@ v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_u32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_u32_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_cvt_u32_u16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +v_cvt_u32_u16 v255, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_u32_u16_dpp v255, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +v_cvt_u32_u16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_u32_u16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] + +v_cvt_u32_u16 v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_u32_u16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] v_exp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_exp_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s index 0be79d016b78f8..ad08a5c327dfa5 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s @@ -446,6 +446,24 @@ v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0] // GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_cvt_u32_u16_e32 v5, v199.h +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199.l +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction + v_exp_f16_e32 v128, 0xfe0b // GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s index 440c1f09f60122..cc5870faec3353 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s @@ -1102,14 +1102,23 @@ v_cvt_u16_f16 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0] v_cvt_u16_f16 v5.l, v199.l quad_perm:[3,2,1,0] // GFX12: v_cvt_u16_f16_e64_dpp v5.l, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] -v_cvt_u32_u16 v5, v199 -// GFX12: v_cvt_u32_u16_e64 v5, v199 ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00] +v_cvt_u32_u16 v5, v199.h +// GFX12: v_cvt_u32_u16_e64 v5, v199.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xc7,0x01,0x00,0x00] -v_cvt_u32_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_u32_u16_e64_dpp v5, v199 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] +v_cvt_u32_u16 v5, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] -v_cvt_u32_u16 v5, v199 quad_perm:[3,2,1,0] -// GFX12: v_cvt_u32_u16_e64_dpp v5, v199 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] +v_cvt_u32_u16 v5, v199.h quad_perm:[3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] + +v_cvt_u32_u16 v5, v199.l +// GFX12: v_cvt_u32_u16_e64 v5, v199.l ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00] + +v_cvt_u32_u16 v5, v199.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v199.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05] + +v_cvt_u32_u16 v5, v199.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] v_exp_f16 v128, 0xfe0b // GFX12: v_exp_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xd8,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 5674d263272013..c2db5b90bb4787 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -2234,50 +2234,62 @@ v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX12: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_fma_f16 v5, v1, v2, s3 -// GFX12: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +v_fma_f16 v5.l, v1.l, v2.l, s3 +// GFX12: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -v_fma_f16 v5, v255, s2, s105 -// GFX12: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +v_fma_f16 v5.l, v255.l, s2, s105 +// GFX12: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -v_fma_f16 v5, s1, v255, exec_hi -// GFX12: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +v_fma_f16 v5.l, s1, v255.l, exec_hi +// GFX12: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -v_fma_f16 v5, s105, s105, exec_lo -// GFX12: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +v_fma_f16 v5.l, s105, s105, exec_lo +// GFX12: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -v_fma_f16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -v_fma_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX12: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -v_fma_f16 v5, m0, 0.5, m0 -// GFX12: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +v_fma_f16 v5.l, m0, 0.5, m0 +// GFX12: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -v_fma_f16 v5, |exec_lo|, -1, vcc_hi -// GFX12: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX12: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX12: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX12: v_fma_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_fma_f16 v5.l, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: v_fma_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX12: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX12: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX12: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5.l, v255.h, s2, s105 +// GFX12: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5.l, s1, v255.h, exec_hi +// GFX12: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX12: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] v_fma_f32 v5, v1, v2, s3 // GFX12: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 0fa344f7e73a31..623e66885aaec4 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1775,53 +1775,68 @@ v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 ban v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 +// GFX12: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 +// GFX12: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5245,20 +5260,20 @@ v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 657663f4353bac..056ea80d8a99d9 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -1008,47 +1008,62 @@ v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x48,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3514,20 +3529,20 @@ v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5 v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s index 4824241735140a..d49a7085ba4977 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s @@ -1852,11 +1852,11 @@ v_cvt_u32_f64_e64 v5, -|src_scc| v_cvt_u32_f64_e64 v255, 0xaf123456 clamp // GFX12: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_u32_u16_e64 v5, v1 -// GFX12: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +v_cvt_u32_u16_e64 v5, v1.l +// GFX12: v_cvt_u32_u16_e64 v5, v1.l ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] -v_cvt_u32_u16_e64 v5, v255 -// GFX12: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +v_cvt_u32_u16_e64 v5, v255.l +// GFX12: v_cvt_u32_u16_e64 v5, v255.l ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] v_cvt_u32_u16_e64 v5, s1 // GFX12: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] @@ -1897,6 +1897,9 @@ v_cvt_u32_u16_e64 v5, src_scc v_cvt_u32_u16_e64 v255, 0xfe0b // GFX12: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_cvt_u32_u16_e64 v5, v255.h +// GFX12: v_cvt_u32_u16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00] + v_exp_f16_e64 v5, v1 // GFX12: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s index c09471033d1448..89102ae780f165 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s @@ -1375,47 +1375,50 @@ v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x05,0x30] -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_mirror -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] -v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] +v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_u32_u16_e64_dpp v255, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX12: v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s index be3878878b13db..1b1a91fbf1c8cc 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s @@ -430,14 +430,17 @@ v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_u32_u16_e64_dpp v255, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x08,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt index a3886e6b3a68dd..57a1da68e845c4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt @@ -1758,10 +1758,12 @@ # GFX11: v_cvt_u32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x2a,0xfe,0x7f,0x56,0x34,0x12,0xaf] 0x01,0xd7,0x0a,0x7e -# GFX11: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e] +# GFX11-REAL16: v_cvt_u32_u16_e32 v5, v1.l ; encoding: [0x01,0xd7,0x0a,0x7e] +# GFX11-FAKE16: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e] 0x7f,0xd7,0x0a,0x7e -# GFX11: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e] +# GFX11-REAL16: v_cvt_u32_u16_e32 v5, v127.l ; encoding: [0x7f,0xd7,0x0a,0x7e] +# GFX11-FAKE16: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e] 0x01,0xd6,0x0a,0x7e # GFX11: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e] @@ -1802,6 +1804,15 @@ 0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] +0x81,0xd7,0x0a,0x7e +# GFX11-REAL16: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e] +# GFX11-FAKE16: v_cvt_u32_u16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/ ; encoding: [0x81,0xd7,0x0a,0x7e] + +0xff,0xd7,0x0a,0x7e +# GFX11-REAL16: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e] +# GFX11-FAKE16: v_cvt_u32_u16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/ ; encoding: [0xff,0xd7,0x0a,0x7e] + + 0x01,0xb1,0x0a,0x7e # GFX11-REAL16: v_exp_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb1,0x0a,0x7e] # GFX11-FAKE16: v_exp_f16_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt index 9f857cd05696c6..cabae812925ffb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt @@ -1359,46 +1359,72 @@ # GFX11: v_cvt_u32_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x3d,0x30] 0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] 0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] 0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30 -# GFX11: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] +# GFX11-REAL16: v_cvt_u32_u16_dpp v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] + +0xfa,0xd6,0x0a,0x7e,0x7f,0x5f,0x01,0x01 +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x7f,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x7f,0x5f,0x01,0x01] + +0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13 +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13] + +0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_cvt_u32_u16_dpp v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v255, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30] 0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff # GFX11-REAL16: v_exp_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt index c45033916cd054..fc7cbbaea374a8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt @@ -281,10 +281,24 @@ # GFX11: v_cvt_u32_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] 0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00 -# GFX11: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +# GFX11-REAL16: v_cvt_u32_u16_dpp v255, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +0xe9,0xd6,0x0a,0x7e,0x7f,0x77,0x39,0x05 +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x7f,0x77,0x39,0x05] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x7f,0x77,0x39,0x05] + +0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05 +# GFX11-REAL16: v_cvt_u32_u16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] + +0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_cvt_u32_u16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_dpp v255, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX11-REAL16: v_exp_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 4990c62b9438cb..f9e236977c9734 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -2402,53 +2402,125 @@ # GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] # CHECK: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 -# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_fma_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_fma_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_fma_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_fma_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index d734cd2bba1aad..132fc80dda47d2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -4431,46 +4431,130 @@ # W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] 0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] 0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt index 4f12775fb3796c..282ff229c57e69 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -1441,46 +1441,64 @@ # GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] 0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX11-REAL16: v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 3b3d39844b7921..714fac9fe62a0b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -2547,46 +2547,130 @@ # W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] 0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX11: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt index 638daca3fdd4f3..5995762ce6ff18 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -403,10 +403,16 @@ # GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] 0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +0xff,0x08,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x08,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX11-REAL16: v_exp_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt index 1b7677b8c088cb..d7e73909286a29 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -1779,10 +1779,12 @@ # GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 -# GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +# GFX11-REAL16: v_cvt_u32_u16_e64 v5, v1.l ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 -# GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX11-REAL16: v_cvt_u32_u16_e64 v5, v255.l ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 # GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] @@ -1823,6 +1825,10 @@ 0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00 +# GFX11-REAL16: v_cvt_u32_u16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX11-FAKE16: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] + 0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 # GFX11-REAL16: v_exp_f16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] # GFX11-FAKE16: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt index 1635fdab66d868..181b78fb5ed868 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt @@ -1439,46 +1439,68 @@ # GFX12: v_cvt_u32_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x3d,0x30] 0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] 0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] 0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x01,0x13] 0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30 -# GFX12: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] +# GFX12-REAL16: v_cvt_u32_u16_dpp v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] + +0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13 +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x01,0x13] + +0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30 +# GFX12-REAL16: v_cvt_u32_u16_dpp v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v255, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x0d,0x30] 0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff # GFX12-REAL16: v_exp_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt index c1fa6aa634f498..7f9b268440cfc0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt @@ -286,10 +286,20 @@ # GFX12: v_cvt_u32_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] 0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00 -# GFX12: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +# GFX12-REAL16: v_cvt_u32_u16_dpp v255, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05 +# GFX12-REAL16: v_cvt_u32_u16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05] + +0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_cvt_u32_u16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_dpp v255, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX12-REAL16: v_exp_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 58696613e852f5..6d48440633f4f9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -2377,49 +2377,118 @@ # GFX12: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX12: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX12: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX12: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX12: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX12: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 -# GFX12: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_fma_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_fma_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_fma_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_fma_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 83370defe6349c..561d3a6ca7f90f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4764,49 +4764,124 @@ # W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] 0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] 0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 2a25e1eefae496..06b4bfcc8985fd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2814,52 +2814,130 @@ # W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, 4.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, 4.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, 4.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, 4.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, 4.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] 0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x48,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x48,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt index 43c18a78366878..25c4e4ad43b1b9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt @@ -1831,10 +1831,12 @@ # GFX12: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 -# GFX12: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +# GFX12-REAL16: v_cvt_u32_u16_e64 v5, v1.l ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 -# GFX12: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX12-REAL16: v_cvt_u32_u16_e64 v5, v255.l ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 # GFX12: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] @@ -1875,6 +1877,10 @@ 0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00 +# GFX12-REAL16: v_cvt_u32_u16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xff,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] + 0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 # GFX12-REAL16: v_exp_f16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] # GFX12-FAKE16: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt index cc344f329c2d29..f447fb42afc7b2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt @@ -1471,46 +1471,64 @@ # GFX12: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] 0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12-REAL16: v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt index 428349fec54faf..7cf415aad5a190 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt @@ -433,10 +433,16 @@ # GFX12: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] 0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +0xff,0x08,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_cvt_u32_u16_e64_dpp v255, v255.h op_sel:[1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x08,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12-REAL16: v_exp_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/RISCV/xqcicm-invalid.s b/llvm/test/MC/RISCV/xqcicm-invalid.s new file mode 100644 index 00000000000000..8b37ed4edeb006 --- /dev/null +++ b/llvm/test/MC/RISCV/xqcicm-invalid.s @@ -0,0 +1,152 @@ +# Xqcicm - Qualcomm uC Conditional Move Extension +# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-xqcicm < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-IMM %s +# RUN: not llvm-mc -triple riscv32 -mattr=-experimental-xqcicm < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-EXT %s + +# CHECK: :[[@LINE+1]]:12: error: invalid operand for instruction +qc.c.mveqz 9, x10 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.c.mveqz x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.c.mveqz x9, x10 + + +# CHECK: :[[@LINE+1]]:9: error: invalid operand for instruction +qc.mveq 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mveq x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mveq x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:9: error: invalid operand for instruction +qc.mvge 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvge x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvge x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mvgeu 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvgeu x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvgeu x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:9: error: invalid operand for instruction +qc.mvlt 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvlt x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvlt x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mvltu 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvltu x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvltu x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:9: error: invalid operand for instruction +qc.mvne 9, x10, x11, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvne x9 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvne x9, x10, x11, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mveqi 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mveqi x9 + +# CHECK-IMM: :[[@LINE+1]]:19: error: immediate must be an integer in the range [-16, 15] +qc.mveqi x9, x10, 17, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mveqi x9, x10, 5, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mvgei 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvgei x9 + +# CHECK-IMM: :[[@LINE+1]]:19: error: immediate must be an integer in the range [-16, 15] +qc.mvgei x9, x10, 17, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvgei x9, x10, 5, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mvlti 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvlti x9 + +# CHECK-IMM: :[[@LINE+1]]:19: error: immediate must be an integer in the range [-16, 15] +qc.mvlti x9, x10, 17, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvlti x9, x10, 5, x12 + + +# CHECK: :[[@LINE+1]]:10: error: invalid operand for instruction +qc.mvnei 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvnei x9 + +# CHECK-IMM: :[[@LINE+1]]:19: error: immediate must be an integer in the range [-16, 15] +qc.mvnei x9, x10, 17, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvnei x9, x10, 5, x12 + + +# CHECK: :[[@LINE+1]]:11: error: invalid operand for instruction +qc.mvltui 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvltui x9 + +# CHECK-IMM: :[[@LINE+1]]:20: error: immediate must be an integer in the range [0, 31] +qc.mvltui x9, x10, 37, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvltui x9, x10, 5, x12 + + +# CHECK: :[[@LINE+1]]:11: error: invalid operand for instruction +qc.mvgeui 9, x10, 5, x12 + +# CHECK: :[[@LINE+1]]:1: error: too few operands for instruction +qc.mvgeui x9 + +# CHECK-IMM: :[[@LINE+1]]:20: error: immediate must be an integer in the range [0, 31] +qc.mvgeui x9, x10, 37, x12 + +# CHECK-EXT: :[[@LINE+1]]:1: error: instruction requires the following: 'Xqcicm' (Qualcomm uC Conditional Move Extension) +qc.mvgeui x9, x10, 5, x12 diff --git a/llvm/test/MC/RISCV/xqcicm-valid.s b/llvm/test/MC/RISCV/xqcicm-valid.s new file mode 100644 index 00000000000000..7d0050b6dafa81 --- /dev/null +++ b/llvm/test/MC/RISCV/xqcicm-valid.s @@ -0,0 +1,123 @@ +# Xqcicm - Qualcomm uC Conditional Move Extension +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-xqcicm -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-xqcicm < %s \ +# RUN: | llvm-objdump --mattr=+experimental-xqcicm -M no-aliases --no-print-imm-hex -d - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-xqcicm -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-xqcicm < %s \ +# RUN: | llvm-objdump --mattr=+experimental-xqcicm --no-print-imm-hex -d - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s + +# CHECK-INST: qc.c.mveqz s1, a0 +# CHECK-ENC: encoding: [0x06,0xad] +qc.c.mveqz x9, x10 + + +# CHECK-INST: qc.mveq s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x04,0xb5,0x60] +qc.mveq x9, x10, x11, x12 + + +# CHECK-INST: qc.mvge s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x54,0xb5,0x60] +qc.mvge x9, x10, x11, x12 + + +# CHECK-INST: qc.mvgeu s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x74,0xb5,0x60] +qc.mvgeu x9, x10, x11, x12 + + +# CHECK-INST: qc.mvlt s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x44,0xb5,0x60] +qc.mvlt x9, x10, x11, x12 + + +# CHECK-INST: qc.mvltu s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x64,0xb5,0x60] +qc.mvltu x9, x10, x11, x12 + + +# CHECK-INST: qc.mvne s1, a0, a1, a2 +# CHECK-ENC: encoding: [0xdb,0x14,0xb5,0x60] +qc.mvne x9, x10, x11, x12 + + +# CHECK-INST: qc.mveqi s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x04,0x55,0x64] +qc.mveqi x9, x10, 5, x12 + +# CHECK-INST: qc.mveqi s1, a0, -16, a2 +# CHECK-ENC: encoding: [0xdb,0x04,0x05,0x65] +qc.mveqi x9, x10, -16, x12 + +# CHECK-INST: qc.mveqi s1, a0, 15, a2 +# CHECK-ENC: encoding: [0xdb,0x04,0xf5,0x64] +qc.mveqi x9, x10, 15, x12 + + +# CHECK-INST: qc.mvgei s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x54,0x55,0x64] +qc.mvgei x9, x10, 5, x12 + +# CHECK-INST: qc.mvgei s1, a0, -16, a2 +# CHECK-ENC: encoding: [0xdb,0x54,0x05,0x65] +qc.mvgei x9, x10, -16, x12 + +# CHECK-INST: qc.mvgei s1, a0, 15, a2 +# CHECK-ENC: encoding: [0xdb,0x54,0xf5,0x64] +qc.mvgei x9, x10, 15, x12 + + +# CHECK-INST: qc.mvlti s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x44,0x55,0x64] +qc.mvlti x9, x10, 5, x12 + +# CHECK-INST: qc.mvlti s1, a0, -16, a2 +# CHECK-ENC: encoding: [0xdb,0x44,0x05,0x65] +qc.mvlti x9, x10, -16, x12 + +# CHECK-INST: qc.mvlti s1, a0, 15, a2 +# CHECK-ENC: encoding: [0xdb,0x44,0xf5,0x64] +qc.mvlti x9, x10, 15, x12 + + +# CHECK-INST: qc.mvnei s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x14,0x55,0x64] +qc.mvnei x9, x10, 5, x12 + +# CHECK-INST: qc.mvnei s1, a0, -16, a2 +# CHECK-ENC: encoding: [0xdb,0x14,0x05,0x65] +qc.mvnei x9, x10, -16, x12 + +# CHECK-INST: qc.mvnei s1, a0, 15, a2 +# CHECK-ENC: encoding: [0xdb,0x14,0xf5,0x64] +qc.mvnei x9, x10, 15, x12 + + +# CHECK-INST: qc.mvltui s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x64,0x55,0x64] +qc.mvltui x9, x10, 5, x12 + +# CHECK-INST: qc.mvltui s1, a0, 0, a2 +# CHECK-ENC: encoding: [0xdb,0x64,0x05,0x64] +qc.mvltui x9, x10, 0, x12 + +# CHECK-INST: qc.mvltui s1, a0, 31, a2 +# CHECK-ENC: encoding: [0xdb,0x64,0xf5,0x65] +qc.mvltui x9, x10, 31, x12 + + +# CHECK-INST: qc.mvgeui s1, a0, 5, a2 +# CHECK-ENC: encoding: [0xdb,0x74,0x55,0x64] +qc.mvgeui x9, x10, 5, x12 + +# CHECK-INST: qc.mvgeui s1, a0, 0, a2 +# CHECK-ENC: encoding: [0xdb,0x74,0x05,0x64] +qc.mvgeui x9, x10, 0, x12 + +# CHECK-INST: qc.mvgeui s1, a0, 31, a2 +# CHECK-ENC: encoding: [0xdb,0x74,0xf5,0x65] +qc.mvgeui x9, x10, 31, x12 diff --git a/llvm/test/TableGen/template-args.td b/llvm/test/TableGen/template-args.td index f3eb02dd823ef5..1644b0a12dc3e1 100644 --- a/llvm/test/TableGen/template-args.td +++ b/llvm/test/TableGen/template-args.td @@ -9,6 +9,7 @@ // RUN: not llvm-tblgen -DERROR8 %s 2>&1 | FileCheck --check-prefix=ERROR8 %s // RUN: not llvm-tblgen -DERROR9 %s 2>&1 | FileCheck --check-prefix=ERROR9 %s // RUN: not llvm-tblgen -DERROR10 %s 2>&1 | FileCheck --check-prefix=ERROR10 %s +// RUN: not llvm-tblgen -DERROR11 %s 2>&1 | FileCheck --check-prefix=ERROR11 %s // This file tests that all required arguments are specified and template // arguments are type-checked and cast if necessary. @@ -158,13 +159,13 @@ defm MissingComma : TwoArgs<2 "two">; #ifdef ERROR8 def error8: Class1; // ERROR8: value not specified for template argument 'Class1:nm' -// ERROR8: 18:21: note: declared in 'Class1' +// ERROR8: 19:21: note: declared in 'Class1' #endif #ifdef ERROR9 defm error9: MC1; // ERROR9: value not specified for template argument 'MC1::nm' -// ERROR9: 99:23: note: declared in 'MC1' +// ERROR9: 100:23: note: declared in 'MC1' #endif #ifdef ERROR10 @@ -172,5 +173,15 @@ def error10 { int value = Class2<>.Code; } // ERROR10: value not specified for template argument 'Class2:cd' -// ERROR10: 37:22: note: declared in 'Class2' +// ERROR10: 38:22: note: declared in 'Class2' +#endif + +#ifdef ERROR11 + +class Foo; + +def error11 : Foo<"", "">; +// ERROR11: [[#@LINE-1]]:19: error: Value specified for template argument 'Foo:i' is of type string; expected type int: "" +// ERROR11: [[#@LINE-2]]:23: error: Value specified for template argument 'Foo:j' is of type string; expected type int: "" + #endif diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index e9f67036faf2b1..482f731afd8136 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -979,3 +979,198 @@ loop: exit: ret float %add } + +; Test case for https://github.com/llvm/llvm-project/issues/121745. +define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { +; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( +; VEC-SAME: ptr [[DST:%.*]]) { +; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 +; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 +; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VEC: [[VECTOR_PH]]: +; VEC-NEXT: br label %[[VECTOR_BODY:.*]] +; VEC: [[VECTOR_BODY]]: +; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2 +; VEC-NEXT: [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]] +; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]] +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; VEC: [[MIDDLE_BLOCK]]: +; VEC-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]] +; VEC: [[SCALAR_PH]]: +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VEC-NEXT: br label %[[LOOP:.*]] +; VEC: [[LOOP]]: +; VEC-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] +; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 +; VEC-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] +; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 +; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} +; VEC: [[E_EXIT]]: +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: ret i32 [[RES]] +; +; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( +; INTERLEAVE-SAME: ptr [[DST:%.*]]) { +; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 +; INTERLEAVE-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 +; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; INTERLEAVE: [[VECTOR_PH]]: +; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; INTERLEAVE: [[VECTOR_BODY]]: +; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] +; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 +; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]] +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[MIDDLE_BLOCK]]: +; INTERLEAVE-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]] +; INTERLEAVE: [[SCALAR_PH]]: +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; INTERLEAVE-NEXT: br label %[[LOOP:.*]] +; INTERLEAVE: [[LOOP]]: +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] +; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 +; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] +; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 +; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[E_EXIT]]: +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: ret i32 [[RES]] +; +entry: + %step.1 = sext i8 0 to i32 + %step.2 = add nsw i32 %step.1, 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep.dst = getelementptr inbounds i16, ptr %dst, i32 %iv + store i16 0, ptr %gep.dst, align 2 + %iv.next = add i32 %step.2, %iv + %cmp.i = icmp slt i32 %iv.next, 8 + br i1 %cmp.i, label %loop, label %e.exit + +e.exit: + %res = phi i32 [ %iv.next, %loop ] + ret i32 %res +} + +define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { +; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( +; VEC-SAME: ptr [[DST:%.*]]) { +; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 +; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 +; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VEC: [[VECTOR_PH]]: +; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0 +; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; VEC-NEXT: br label %[[VECTOR_BODY:.*]] +; VEC: [[VECTOR_BODY]]: +; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 +; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] +; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2 +; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2 +; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1) +; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 +; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; VEC: [[MIDDLE_BLOCK]]: +; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 +; VEC-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]] +; VEC: [[SCALAR_PH]]: +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VEC-NEXT: br label %[[LOOP:.*]] +; VEC: [[LOOP]]: +; VEC-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] +; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 +; VEC-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 +; VEC-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[INC]] +; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 +; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} +; VEC: [[E_EXIT]]: +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: ret i32 [[RES]] +; +; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( +; INTERLEAVE-SAME: ptr [[DST:%.*]]) { +; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 +; INTERLEAVE-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 +; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; INTERLEAVE: [[VECTOR_PH]]: +; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; INTERLEAVE: [[VECTOR_BODY]]: +; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] +; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 +; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 +; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]] +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 +; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[MIDDLE_BLOCK]]: +; INTERLEAVE-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]] +; INTERLEAVE: [[SCALAR_PH]]: +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; INTERLEAVE-NEXT: br label %[[LOOP:.*]] +; INTERLEAVE: [[LOOP]]: +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] +; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 +; INTERLEAVE-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 +; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[INC]] +; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 +; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[E_EXIT]]: +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: ret i32 [[RES]] +; +entry: + %step.1 = sext i8 0 to i32 + %step.2 = add nsw i32 %step.1, 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep.dst = getelementptr inbounds i16, ptr %dst, i32 %iv + store i16 0, ptr %gep.dst, align 2 + %inc = add i32 %iv, 1 + %iv.next = add i32 %step.2, %inc + %cmp.i = icmp slt i32 %iv.next, 8 + br i1 %cmp.i, label %loop, label %e.exit + +e.exit: + %res = phi i32 [ %iv.next, %loop ] + ret i32 %res +} diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml index 94a162c5f2120d..97dfc61ce1e1d0 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml @@ -64,6 +64,18 @@ # CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10 # CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6 +## Test the lookup functionality for merged functions: +# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s +# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s + +#### TODO: Fix non-determinism leading that is currently worked around with `{{[1-3]}}` below. + +# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248: +# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5 + +# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5 --- !mach-o diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s index f4c4a20573c4eb..cf1cf0e98c8018 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s @@ -1891,7 +1891,7 @@ drps # CHECK-NEXT: 1 2 0.50 fmov s0, s1 # CHECK-NEXT: 1 2 0.50 fabs s2, s3 # CHECK-NEXT: 1 2 0.50 fneg s4, s5 -# CHECK-NEXT: 1 9 1.00 fsqrt s6, s7 +# CHECK-NEXT: 9 9 9.00 fsqrt s6, s7 # CHECK-NEXT: 1 3 1.00 fcvt d8, s9 # CHECK-NEXT: 1 3 1.00 fcvt h10, s11 # CHECK-NEXT: 1 3 1.00 frintn s12, s13 @@ -1904,7 +1904,7 @@ drps # CHECK-NEXT: 1 2 0.50 fmov d0, d1 # CHECK-NEXT: 1 2 0.50 fabs d2, d3 # CHECK-NEXT: 1 2 0.50 fneg d4, d5 -# CHECK-NEXT: 1 16 1.00 fsqrt d6, d7 +# CHECK-NEXT: 16 16 16.00 fsqrt d6, d7 # CHECK-NEXT: 1 3 1.00 fcvt s8, d9 # CHECK-NEXT: 1 3 1.00 fcvt h10, d11 # CHECK-NEXT: 1 3 1.00 frintn d12, d13 @@ -1917,7 +1917,7 @@ drps # CHECK-NEXT: 1 3 1.00 fcvt s26, h27 # CHECK-NEXT: 1 3 1.00 fcvt d28, h29 # CHECK-NEXT: 1 3 0.50 fmul s20, s19, s17 -# CHECK-NEXT: 1 10 1.00 fdiv s1, s2, s3 +# CHECK-NEXT: 10 10 10.00 fdiv s1, s2, s3 # CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6 # CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9 # CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12 @@ -1926,7 +1926,7 @@ drps # CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21 # CHECK-NEXT: 1 3 0.50 fnmul s22, s23, s2 # CHECK-NEXT: 1 3 0.50 fmul d20, d19, d17 -# CHECK-NEXT: 1 15 1.00 fdiv d1, d2, d3 +# CHECK-NEXT: 15 15 15.00 fdiv d1, d2, d3 # CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6 # CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9 # CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12 @@ -2557,7 +2557,7 @@ drps # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] -# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 169.50 85.50 +# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions: @@ -3075,7 +3075,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s0, s1 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs s2, s3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg s4, s5 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt s6, s7 +# CHECK-NEXT: - - - - - - - - - - - 9.00 - fsqrt s6, s7 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d8, s9 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, s11 # CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn s12, s13 @@ -3088,7 +3088,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov d0, d1 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs d2, d3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg d4, d5 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt d6, d7 +# CHECK-NEXT: - - - - - - - - - - - 16.00 - fsqrt d6, d7 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s8, d9 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, d11 # CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn d12, d13 @@ -3101,7 +3101,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s26, h27 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d28, h29 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul s20, s19, s17 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv s1, s2, s3 +# CHECK-NEXT: - - - - - - - - - - - 10.00 - fdiv s1, s2, s3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd s4, s5, s6 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub s7, s8, s9 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax s10, s11, s12 @@ -3110,7 +3110,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fminnm s19, s20, s21 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fnmul s22, s23, s2 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul d20, d19, d17 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv d1, d2, d3 +# CHECK-NEXT: - - - - - - - - - - - 15.00 - fdiv d1, d2, d3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd d4, d5, d6 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub d7, d8, d9 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax d10, d11, d12 diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index d61b418d2d8439..89cd3ce6fc4138 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -17,7 +17,10 @@ defm convert : Eq<"convert", "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">; def merged_functions : - FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; + FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n" + "Without this option one function per unique address range will be emitted.\n" + "When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n" + "Without this option only one function will be displayed.">; def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">; defm callsites_yaml_file : Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index aed4ae7c615fd1..654da68bb69600 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -98,7 +98,7 @@ static uint64_t SegmentSize; static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; -static bool StoreMergedFunctionInfo = false; +static bool UseMergedFunctions = false; static bool LoadDwarfCallSites = false; static std::string CallSiteYamlPath; @@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) { } LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); - StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); + UseMergedFunctions = Args.hasArg(OPT_merged_functions); if (Args.hasArg(OPT_callsites_yaml_file_EQ)) { CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ); @@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, // functions in the first FunctionInfo with that address range. Do this right // after loading the DWARF data so we don't have to deal with functions from // the symbol table. - if (StoreMergedFunctionInfo) + if (UseMergedFunctions) Gsym.prepareMergedFunctions(Out); // Get the UUID and convert symbol table to GSYM. @@ -508,24 +508,37 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) { } static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) { - if (auto Result = Gsym.lookup(Addr)) { - // If verbose is enabled dump the full function info for the address. - if (Verbose) { - if (auto FI = Gsym.getFunctionInfo(Addr)) { - OS << "FunctionInfo for " << HEX64(Addr) << ":\n"; - Gsym.dump(OS, *FI); - OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; + if (UseMergedFunctions) { + if (auto Results = Gsym.lookupAll(Addr)) { + OS << "Found " << Results->size() << " functions at address " + << HEX64(Addr) << ":\n"; + for (size_t i = 0; i < Results->size(); ++i) { + OS << " " << Results->at(i); + + if (i != Results->size() - 1) + OS << "\n"; } } - OS << Result.get(); - } else { + } else { /* UseMergedFunctions == false */ + if (auto Result = Gsym.lookup(Addr)) { + // If verbose is enabled dump the full function info for the address. + if (Verbose) { + if (auto FI = Gsym.getFunctionInfo(Addr)) { + OS << "FunctionInfo for " << HEX64(Addr) << ":\n"; + Gsym.dump(OS, *FI); + OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; + } + } + OS << Result.get(); + } else { + if (Verbose) + OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; + OS << HEX64(Addr) << ": "; + logAllUnhandledErrors(Result.takeError(), OS, "error: "); + } if (Verbose) - OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; - OS << HEX64(Addr) << ": "; - logAllUnhandledErrors(Result.takeError(), OS, "error: "); + OS << "\n"; } - if (Verbose) - OS << "\n"; } int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) { diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 34bb6ce30b7668..0f4a4d57427a57 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -449,9 +449,8 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L, } // Promotion - if (renameModuleForThinLTO(*M, *Index, - /*ClearDSOLocalOnDeclarations=*/false)) - return true; + renameModuleForThinLTO(*M, *Index, + /*ClearDSOLocalOnDeclarations=*/false); } if (Verbose) diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 3ea5afce56fa34..3955d36fce896a 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -655,8 +655,8 @@ TEST(ParseArchString, RejectsConflictingExtensions) { for (StringRef Input : {"rv64i_xqcisls0p2", "rv64i_xqcia0p2", "rv64i_xqciac0p2", - "rv64i_xqcicsr0p2", "rv64i_xqcilsm0p2", "rv64i_xqcics0p2", - "rv64i_xqcicli0p2"}) { + "rv64i_xqcicsr0p2", "rv64i_xqcilsm0p2", "rv64i_xqcicm0p2", + "rv64i_xqcics0p2", "rv64i_xqcicli0p2"}) { EXPECT_THAT( toString(RISCVISAInfo::parseArchString(Input, true).takeError()), ::testing::EndsWith(" is only supported for 'rv32'")); @@ -1118,6 +1118,7 @@ Experimental extensions xqcia 0.2 xqciac 0.2 xqcicli 0.2 + xqcicm 0.2 xqcics 0.2 xqcicsr 0.2 xqcilsm 0.2 diff --git a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn index 5bd4483d51abf0..d8c4d8abdfd111 100644 --- a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn @@ -95,6 +95,10 @@ clang_tablegen("BuiltinsRISCV") { args = [ "-gen-clang-builtins" ] } +clang_tablegen("BuiltinsSPIRV") { + args = [ "-gen-clang-builtins" ] +} + clang_tablegen("BuiltinsX86") { args = [ "-gen-clang-builtins" ] } diff --git a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn index deba0cfe8ea859..d759ff4429a922 100644 --- a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn @@ -26,6 +26,7 @@ static_library("Basic") { "//clang/include/clang/Basic:Builtins", "//clang/include/clang/Basic:BuiltinsBPF", "//clang/include/clang/Basic:BuiltinsRISCV", + "//clang/include/clang/Basic:BuiltinsSPIRV", "//clang/include/clang/Basic:BuiltinsX86", "//clang/include/clang/Basic:BuiltinsX86_64", "//clang/include/clang/Basic:DiagnosticGroups", diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn index 700c243864633c..5b0b680e078c95 100644 --- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn @@ -95,6 +95,7 @@ static_library("Driver") { "ToolChains/RISCVToolchain.cpp", "ToolChains/SPIRV.cpp", "ToolChains/SPIRVOpenMP.cpp", + "ToolChains/SYCL.cpp", "ToolChains/Solaris.cpp", "ToolChains/TCE.cpp", "ToolChains/UEFI.cpp", diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 4d3ead20fb5cd3..9e3257a62b12fb 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -51,12 +51,14 @@ class AffineApplyExpander loc(loc) {} template - Value buildBinaryExpr(AffineBinaryOpExpr expr) { + Value buildBinaryExpr(AffineBinaryOpExpr expr, + arith::IntegerOverflowFlags overflowFlags = + arith::IntegerOverflowFlags::none) { auto lhs = visit(expr.getLHS()); auto rhs = visit(expr.getRHS()); if (!lhs || !rhs) return nullptr; - auto op = builder.create(loc, lhs, rhs); + auto op = builder.create(loc, lhs, rhs, overflowFlags); return op.getResult(); } @@ -65,7 +67,8 @@ class AffineApplyExpander } Value visitMulExpr(AffineBinaryOpExpr expr) { - return buildBinaryExpr(expr); + return buildBinaryExpr(expr, + arith::IntegerOverflowFlags::nsw); } /// Euclidean modulo operation: negative RHS is not allowed. diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir index 58580a194df0c7..f2e0306073f27b 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir @@ -26,7 +26,7 @@ func.func @affine_vector_store(%arg0 : index) { // CHECK: %[[buf:.*]] = memref.alloc // CHECK: %[[val:.*]] = arith.constant dense // CHECK: %[[c_1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[a:.*]] = arith.muli %arg0, %[[c_1]] : index +// CHECK-NEXT: %[[a:.*]] = arith.muli %arg0, %[[c_1]] overflow : index // CHECK-NEXT: %[[b:.*]] = arith.addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index // CHECK-NEXT: %[[c:.*]] = arith.addi %[[b]], %[[c7]] : index diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir index 00d7b6b8d65f67..550ea71882e144 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir @@ -156,7 +156,7 @@ func.func private @get_idx() -> (index) // CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index // CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] : index +// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow : index // CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index // CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index // CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index @@ -177,7 +177,7 @@ func.func @if_only() { // CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index // CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] : index +// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow : index // CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index // CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index // CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index @@ -202,7 +202,7 @@ func.func @if_else() { // CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index // CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] : index +// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow : index // CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index // CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index // CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index @@ -272,7 +272,7 @@ func.func @if_with_yield() -> (i64) { // CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index // CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] : index +// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow : index // CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %{{.*}} : index // CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index // CHECK-NEXT: %[[v3:.*]] = arith.addi %[[v2]], %[[c1]] : index @@ -316,7 +316,7 @@ func.func @if_for() { %i = call @get_idx() : () -> (index) // CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] : index +// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow : index // CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index // CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index // CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index @@ -371,7 +371,7 @@ func.func @if_for() { // CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index // CHECK-NEXT: for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] { // CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[mul0:.*]] = arith.muli %{{.*}}, %[[cm1]] : index +// CHECK-NEXT: %[[mul0:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow : index // CHECK-NEXT: %[[add0:.*]] = arith.addi %[[mul0]], %{{.*}} : index // CHECK-NEXT: %[[max:.*]] = arith.maxsi %{{.*}}, %[[add0]] : index // CHECK-NEXT: %[[c10:.*]] = arith.constant 10 : index @@ -448,22 +448,22 @@ func.func @affine_applies(%arg0 : index) { %one = affine.apply #map3(%symbZero)[%zero] // CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index -// CHECK-NEXT: %[[v2:.*]] = arith.muli %arg0, %[[c2]] : index +// CHECK-NEXT: %[[v2:.*]] = arith.muli %arg0, %[[c2]] overflow : index // CHECK-NEXT: %[[v3:.*]] = arith.addi %arg0, %[[v2]] : index // CHECK-NEXT: %[[c3:.*]] = arith.constant 3 : index -// CHECK-NEXT: %[[v4:.*]] = arith.muli %arg0, %[[c3]] : index +// CHECK-NEXT: %[[v4:.*]] = arith.muli %arg0, %[[c3]] overflow : index // CHECK-NEXT: %[[v5:.*]] = arith.addi %[[v3]], %[[v4]] : index // CHECK-NEXT: %[[c4:.*]] = arith.constant 4 : index -// CHECK-NEXT: %[[v6:.*]] = arith.muli %arg0, %[[c4]] : index +// CHECK-NEXT: %[[v6:.*]] = arith.muli %arg0, %[[c4]] overflow : index // CHECK-NEXT: %[[v7:.*]] = arith.addi %[[v5]], %[[v6]] : index // CHECK-NEXT: %[[c5:.*]] = arith.constant 5 : index -// CHECK-NEXT: %[[v8:.*]] = arith.muli %arg0, %[[c5]] : index +// CHECK-NEXT: %[[v8:.*]] = arith.muli %arg0, %[[c5]] overflow : index // CHECK-NEXT: %[[v9:.*]] = arith.addi %[[v7]], %[[v8]] : index // CHECK-NEXT: %[[c6:.*]] = arith.constant 6 : index -// CHECK-NEXT: %[[v10:.*]] = arith.muli %arg0, %[[c6]] : index +// CHECK-NEXT: %[[v10:.*]] = arith.muli %arg0, %[[c6]] overflow : index // CHECK-NEXT: %[[v11:.*]] = arith.addi %[[v9]], %[[v10]] : index // CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index -// CHECK-NEXT: %[[v12:.*]] = arith.muli %arg0, %[[c7]] : index +// CHECK-NEXT: %[[v12:.*]] = arith.muli %arg0, %[[c7]] overflow : index // CHECK-NEXT: %[[v13:.*]] = arith.addi %[[v11]], %[[v12]] : index %four = affine.apply #map4(%arg0, %arg0, %arg0, %arg0)[%arg0, %arg0, %arg0] return @@ -610,7 +610,7 @@ func.func @affine_store(%arg0 : index) { affine.store %1, %0[%i0 - symbol(%arg0) + 7] : memref<10xf32> } // CHECK: %[[cm1:.*]] = arith.constant -1 : index -// CHECK-NEXT: %[[a:.*]] = arith.muli %{{.*}}, %[[cm1]] : index +// CHECK-NEXT: %[[a:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow : index // CHECK-NEXT: %[[b:.*]] = arith.addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index // CHECK-NEXT: %[[c:.*]] = arith.addi %[[b]], %[[c7]] : index diff --git a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir index a78db9733b7eef..1fe4217cde9827 100644 --- a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir @@ -59,7 +59,7 @@ func.func @subview(%0 : memref<64x4xf32, strided<[4, 1], offset: 0>>, %arg0 : in // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][0] : !llvm.struct<(ptr, ptr, i64 // CHECK: %[[BASE_ALIGNED:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr, ptr, i64 // CHECK: %[[STRIDE0:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64 + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] overflow : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[DESCSTRIDE0]] : i64 to index // CHECK: %[[DESCSTRIDE0_V2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 @@ -95,10 +95,10 @@ func.func @subview_non_zero_addrspace(%0 : memref<64x4xf32, strided<[4, 1], offs // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE_ALIGNED:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64 + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] overflow : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[DESCSTRIDE0]] : i64 to index // CHECK: %[[DESCSTRIDE0_V2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 - // CHECK: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 + // CHECK: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[OFF2]] : i64 to index // CHECK: %[[OFF2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> @@ -131,10 +131,10 @@ func.func @subview_const_size(%0 : memref<64x4xf32, strided<[4, 1], offset: 0>>, // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE_ALIGNED:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[C4]] : i64 + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[C4]] overflow : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[DESCSTRIDE0]] : i64 to index // CHECK: %[[DESCSTRIDE0_V2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 - // CHECK: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 + // CHECK: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[OFF2]] : i64 to index // CHECK: %[[OFF2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -168,8 +168,8 @@ func.func @subview_const_stride(%0 : memref<64x4xf32, strided<[4, 1], offset: 0> // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE_ALIGNED:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK: %[[OFF0:.*]] = llvm.mul %[[ARG0]], %[[C4]] : i64 - // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF0]], %[[ARG1]] : i64 + // CHECK: %[[OFF0:.*]] = llvm.mul %[[ARG0]], %[[C4]] overflow : i64 + // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF0]], %[[ARG1]] : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[OFF2]] : i64 to index // CHECK: %[[OFF2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -234,12 +234,12 @@ func.func @subview_mixed_static_dynamic(%0 : memref<64x4xf32, strided<[4, 1], of // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE_ALIGNED:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64 + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] overflow : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[DESCSTRIDE0]] : i64 to index // CHECK: %[[DESCSTRIDE0_V2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 - // CHECK: %[[OFF0:.*]] = llvm.mul %[[ARG1]], %[[STRIDE0]] : i64 + // CHECK: %[[OFF0:.*]] = llvm.mul %[[ARG1]], %[[STRIDE0]] overflow : i64 // CHECK: %[[BASE_OFF:.*]] = llvm.mlir.constant(8 : index) : i64 - // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF0]], %[[BASE_OFF]] : i64 + // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF0]], %[[BASE_OFF]] : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[OFF2]] : i64 to index // CHECK: %[[OFF2:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -301,7 +301,7 @@ func.func @subview_leading_operands_dynamic(%0 : memref<5x?xf32>) -> memref<3x?x // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // Compute and insert offset from 2 + dynamic value. // CHECK: %[[CST_OFF0:.*]] = llvm.mlir.constant(2 : index) : i64 - // CHECK: %[[OFF0:.*]] = llvm.mul %[[STRIDE0]], %[[CST_OFF0]] : i64 + // CHECK: %[[OFF0:.*]] = llvm.mul %[[STRIDE0]], %[[CST_OFF0]] overflow : i64 // CHECK: %[[TMP:.*]] = builtin.unrealized_conversion_cast %[[OFF0]] : i64 to index // CHECK: %[[OFF0:.*]] = builtin.unrealized_conversion_cast %[[TMP]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -425,7 +425,7 @@ func.func @collapse_shape_dynamic_with_non_identity_layout( // CHECK: %[[SIZE1:.*]] = llvm.extractvalue %[[MEM]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE1]], %[[SIZE2]] : i64 +// CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE1]], %[[SIZE2]] overflow : i64 // CHECK: %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index // CHECK: %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -547,7 +547,7 @@ func.func @collapse_shape_dynamic(%arg0 : memref<1x2x?xf32>) -> memref<1x?xf32> // CHECK: %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE2]], %[[C2]] : i64 +// CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE2]], %[[C2]] overflow : i64 // CHECK: %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index // CHECK: %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-target-tag.mlir b/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-target-tag.mlir index f6d3387d99b3c3..2785b508861228 100644 --- a/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-target-tag.mlir +++ b/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-target-tag.mlir @@ -28,7 +28,7 @@ func.func @subview(%0 : memref<64x4xf32, strided<[4, 1], offset: 0>>, %arg0 : in // CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64> // CHECK-DAG: %[[STRIDE0:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK-DAG: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64 + // CHECK-DAG: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] overflow : i64 // CHECK-DAG: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 // CHECK-DAG: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-top-level-named-sequence.mlir b/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-top-level-named-sequence.mlir index a74553cc2268ef..c1f30c7eaf6430 100644 --- a/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-top-level-named-sequence.mlir +++ b/mlir/test/Dialect/LLVM/lower-to-llvm-e2e-with-top-level-named-sequence.mlir @@ -27,7 +27,7 @@ func.func @subview(%0 : memref<64x4xf32, strided<[4, 1], offset: 0>>, %arg0 : in // CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64> // CHECK-DAG: %[[STRIDE0:.*]] = llvm.mlir.constant(4 : index) : i64 - // CHECK-DAG: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64 + // CHECK-DAG: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] overflow : i64 // CHECK-DAG: %[[OFF2:.*]] = llvm.add %[[DESCSTRIDE0]], %[[ARG1]] : i64 // CHECK-DAG: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/tblgen-lsp-server/templ-arg-check.test b/mlir/test/tblgen-lsp-server/templ-arg-check.test new file mode 100644 index 00000000000000..cda9b79a1f4633 --- /dev/null +++ b/mlir/test/tblgen-lsp-server/templ-arg-check.test @@ -0,0 +1,15 @@ +// RUN: tblgen-lsp-server -lit-test < %s | FileCheck -strict-whitespace %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"class Foo;\ndef : Foo<\"\">;" +}}} +// CHECK: "method": "textDocument/publishDiagnostics", +// CHECK: "message": "Value specified for template argument 'Foo:i' is of type string; expected type int: \"\"", +// ----- +{"jsonrpc":"2.0","id":3,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index cb4f55a5ca924e..8624028460975d 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -341,6 +341,21 @@ gentbl( ], ) +gentbl( + name = "basic_builtins_spirv_gen", + tbl_outs = [( + "-gen-clang-builtins", + "include/clang/Basic/BuiltinsSPIRV.inc", + )], + tblgen = ":clang-tblgen", + td_file = "include/clang/Basic/BuiltinsSPIRV.td", + td_srcs = [ + "include/clang/Basic/Builtins.td", + "include/clang/Basic/BuiltinsBase.td", + "include/clang/Basic/BuiltinsSPIRV.td", + ], +) + gentbl( name = "basic_builtins_riscv_gen", tbl_outs = [( @@ -723,6 +738,7 @@ cc_library( ":basic_builtins_bpf_gen", ":basic_builtins_gen", ":basic_builtins_riscv_gen", + ":basic_builtins_spirv_gen", ":basic_builtins_x86_gen", ":basic_builtins_x86_64_gen", ":basic_internal_headers", diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 81309f1e373ac6..09811eb06ff02f 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1395,10 +1395,6 @@ libc_support_library( hdrs = [ "src/__support/threads/linux/raw_mutex.h", ], - defines = [ - "LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY", - "LIBC_COPT_RAW_MUTEX_DEFAULT_SPIN_COUNT", - ], target_compatible_with = select({ "@platforms//os:linux": [], "//conditions:default": ["@platforms//:incompatible"], @@ -3594,13 +3590,6 @@ libc_function( ############################### string targets ############################### -no_sanitize_features = [ - "-asan", - "-msan", - "-tsan", - "-ubsan", -] - libc_support_library( name = "string_memory_utils", hdrs = [ @@ -3678,7 +3667,6 @@ libc_function( name = "memcpy", srcs = ["src/string/memcpy.cpp"], hdrs = ["src/string/memcpy.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3690,7 +3678,6 @@ libc_function( name = "memset", srcs = ["src/string/memset.cpp"], hdrs = ["src/string/memset.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3702,7 +3689,6 @@ libc_function( name = "memmove", srcs = ["src/string/memmove.cpp"], hdrs = ["src/string/memmove.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3714,7 +3700,6 @@ libc_function( name = "mempcpy", srcs = ["src/string/mempcpy.cpp"], hdrs = ["src/string/mempcpy.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3726,7 +3711,6 @@ libc_function( name = "bcopy", srcs = ["src/strings/bcopy.cpp"], hdrs = ["src/strings/bcopy.h"], - features = no_sanitize_features, deps = [ ":__support_common", ":string_memory_utils", @@ -3737,7 +3721,6 @@ libc_function( name = "memcmp", srcs = ["src/string/memcmp.cpp"], hdrs = ["src/string/memcmp.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3750,7 +3733,6 @@ libc_function( name = "bcmp", srcs = ["src/strings/bcmp.cpp"], hdrs = ["src/strings/bcmp.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3762,7 +3744,6 @@ libc_function( name = "bzero", srcs = ["src/strings/bzero.cpp"], hdrs = ["src/strings/bzero.h"], - features = no_sanitize_features, weak = True, deps = [ ":__support_common", @@ -3784,7 +3765,6 @@ libc_function( name = "strlen", srcs = ["src/string/strlen.cpp"], hdrs = ["src/string/strlen.h"], - features = no_sanitize_features, deps = [ ":__support_common", ":string_utils", @@ -3795,21 +3775,6 @@ libc_function( name = "strcpy", srcs = ["src/string/strcpy.cpp"], hdrs = ["src/string/strcpy.h"], - features = no_sanitize_features, - deps = [ - ":__support_common", - ":memcpy", - ":string_memory_utils", - ":string_utils", - ], -) - -# A sanitizer instrumented flavor of strcpy to be used with unittests. -libc_function( - name = "strcpy_sanitized", - testonly = 1, - srcs = ["src/string/strcpy.cpp"], - hdrs = ["src/string/strcpy.h"], deps = [ ":__support_common", ":memcpy", @@ -4465,26 +4430,6 @@ libc_support_library( ], ) -# Only used for testing. -libc_support_library( - name = "printf_mock_parser", - hdrs = ["src/stdio/printf_core/parser.h"], - local_defines = ["LIBC_COPT_MOCK_ARG_LIST"], - deps = [ - ":__support_arg_list", - ":__support_common", - ":__support_cpp_bit", - ":__support_cpp_optional", - ":__support_cpp_string_view", - ":__support_cpp_type_traits", - ":__support_ctype_utils", - ":__support_fputil_fp_bits", - ":__support_str_to_integer", - ":printf_config", - ":printf_core_structs", - ], -) - libc_support_library( name = "printf_writer", srcs = ["src/stdio/printf_core/writer.cpp"], diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel index a31c67cfcea80c..a36cf2e0cb7d90 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel @@ -23,7 +23,7 @@ libc_test( name = "strcpy_test", srcs = ["strcpy_test.cpp"], libc_function_deps = [ - "//libc:strcpy_sanitized", + "//libc:strcpy", ], )