From 7fa6d635ee5398240149ad6ffcf61ba76ed2f569 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Thu, 11 Jul 2024 14:09:36 +0100 Subject: [PATCH] [AArch64] Implement GCS ACLE intrinsics (#96903) This adds intrinsics defined in ARM-software/acle#260 Doing this requires some changes to the GCS instruction definitions, as these intrinsics make use of how some instructions don't modify the input register when GCS is disabled, and they need to be correctly marked with mayLoad/mayStore/hasSideEffects for instruction selection to work. --- clang/include/clang/Basic/BuiltinsAArch64.def | 5 ++ clang/lib/Headers/arm_acle.h | 26 +++++++++ clang/test/CodeGen/aarch64-gcs.c | 56 +++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 ++++++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 12 ++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 19 +++++-- llvm/test/CodeGen/AArch64/gcs-intrinsics.ll | 54 ++++++++++++++++++ 7 files changed, 185 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-gcs.c create mode 100644 llvm/test/CodeGen/AArch64/gcs-intrinsics.ll diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 36bd2b69dbbcb9..473b1d4698f04a 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -49,6 +49,7 @@ BUILTIN(__builtin_arm_wfe, "v", "") BUILTIN(__builtin_arm_wfi, "v", "") BUILTIN(__builtin_arm_sev, "v", "") BUILTIN(__builtin_arm_sevl, "v", "") +BUILTIN(__builtin_arm_chkfeat, "WUiWUi", "") // Like __builtin_trap but provide an 16-bit immediate reason code (which goes into `brk #N`). BUILTIN(__builtin_arm_trap, "vUIs", "nr") @@ -136,6 +137,10 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64") TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64") TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64") +// Armv9.3-A Guarded Control Stack +TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs") +TARGET_BUILTIN(__builtin_arm_gcsss, "vC*vC*", "n", "gcs") + TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 5785954c9171ab..1518b0c4c8428f 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v #define __dbg(t) __builtin_arm_dbg(t) #endif +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +#define _CHKFEAT_GCS 1 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__chkfeat(uint64_t __features) { + return __builtin_arm_chkfeat(__features) ^ __features; +} +#endif + /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { @@ -855,6 +863,24 @@ __rndrrs(uint64_t *__p) { } #endif +/* 11.2 Guarded Control Stack intrinsics */ +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +__gcspr() { + return (void *)__builtin_arm_rsr64("gcspr_el0"); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs"))) +__gcspopm() { + return __builtin_arm_gcspopm(0); +} + +static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs"))) +__gcsss(const void *__stack) { + return __builtin_arm_gcsss(__stack); +} +#endif + #if defined(__cplusplus) } #endif diff --git a/clang/test/CodeGen/aarch64-gcs.c b/clang/test/CodeGen/aarch64-gcs.c new file mode 100644 index 00000000000000..767b1b8bfaf2dc --- /dev/null +++ b/clang/test/CodeGen/aarch64-gcs.c @@ -0,0 +1,56 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +gcs -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: define dso_local i64 @test_chkfeat +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__FEATURES_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 1, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.chkfeat(i64 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret i64 [[XOR_I]] +// +uint64_t test_chkfeat() { + return __chkfeat(_CHKFEAT_GCS); +} + +// CHECK-LABEL: define dso_local ptr @test_gcspr +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata [[META2:![0-9]+]]) +// CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr +// CHECK-NEXT: ret ptr [[TMP1]] +// +void *test_gcspr() { + return __gcspr(); +} + +// CHECK-LABEL: define dso_local i64 @test_gcspopm +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.gcspopm(i64 0) +// CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_gcspopm() { + return __gcspopm(); +} + +// CHECK-LABEL: define dso_local ptr @test_gcsss +// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__STACK_ADDR_I:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP0]], ptr [[__STACK_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__STACK_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss(ptr [[TMP1]]) +// CHECK-NEXT: ret ptr [[TMP2]] +// +const void *test_gcsss(const void *p) { + return __gcsss(p); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 65e3403fbf1524..3735bf5222fce3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -92,6 +92,23 @@ def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, // ordering during ISel. def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>; +//===----------------------------------------------------------------------===// +// Guarded Control Stack + +def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +// FIXME: This should be marked as [IntrReadMem, IntrHasSideEffects], as it has +// the side-effect of updating gcspr, but this combination doesn't work +// correctly. +def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + []>; + +def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">, + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 59cfd8d6c27d25..b9c57d1975b6f9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4587,6 +4587,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; + case Intrinsic::aarch64_gcsss: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue Val = Node->getOperand(2); + SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64); + SDNode *SS1 = + CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain); + SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64, + MVT::Other, Zero, SDValue(SS1, 0)); + ReplaceNode(Node, SS2); + return; + } case Intrinsic::aarch64_ldaxp: case Intrinsic::aarch64_ldxp: { unsigned Op = diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index bafcafb7d8d4ac..152a6c2e95b278 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1267,23 +1267,34 @@ class GCSRtIn op1, bits<3> op2, string mnemonic, let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; + let hasSideEffects = 1; } +let mayStore = 1, mayLoad = 1 in def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; +let mayStore = 1 in def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; class GCSRtOut op1, bits<3> op2, string mnemonic, list pattern = []> - : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { + : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> { let Inst{20-19} = 0b01; let Inst{18-16} = op1; let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; + let hasSideEffects = 1; + // The input register is unchanged when GCS is disabled, so we need it as + // both an input and output operand. + let Constraints = "$src = $Rt"; } +let mayStore = 1, mayLoad = 1 in def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; -def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; +// FIXME: mayStore = 1 only needed to match the intrinsic definition +let mayStore = 1, mayLoad = 1 in +def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm", + [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>; def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; @@ -1292,7 +1303,8 @@ def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGC def : TokenAlias<"DSYNC", "dsync">; let Uses = [X16], Defs = [X16], CRm = 0b0101 in { - def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; + def CHKFEAT : SystemNoOperands<0b000, "hint\t#40", + [(set X16, (int_aarch64_chkfeat X16))]>; } def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; @@ -1311,7 +1323,6 @@ class GCSSt op> def GCSSTR : GCSSt<"gcsstr", 0b000>; def GCSSTTR : GCSSt<"gcssttr", 0b001>; - // ARMv8.2-A Dot Product let Predicates = [HasDotProd] in { defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; diff --git a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll new file mode 100644 index 00000000000000..b2f9b4d34ac4c0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+gcs -verify-machineinstrs -o - %s | FileCheck %s + +; We call each intrinsic twice, once with the result being unused and once with +; it being used, to check that dead code elimination is being done correctly. +; chkfeat does not have side effects so can be eliminated, but the others do and +; can't be eliminated. + +define i64 @test_chkfeat(i64 %arg) { +; CHECK-LABEL: test_chkfeat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: chkfeat x16 +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.aarch64.chkfeat(i64 %arg) + %1 = call i64 @llvm.aarch64.chkfeat(i64 %arg) + ret i64 %1 +} + +define i64 @test_gcspopm(i64 %arg) { +; CHECK-LABEL: test_gcspopm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: gcspopm x8 +; CHECK-NEXT: gcspopm x0 +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.aarch64.gcspopm(i64 %arg) + %1 = call i64 @llvm.aarch64.gcspopm(i64 %arg) + ret i64 %1 +} + +define ptr @test_gcsss(ptr %p) { +; CHECK-LABEL: test_gcsss: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: gcsss1 x0 +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: gcsss2 x9 +; CHECK-NEXT: gcsss1 x0 +; CHECK-NEXT: gcsss2 x8 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +entry: + %0 = call ptr @llvm.aarch64.gcsss(ptr %p) + %1 = call ptr @llvm.aarch64.gcsss(ptr %p) + ret ptr %1 +} + +declare i64 @llvm.aarch64.chkfeat(i64) +declare i64 @llvm.aarch64.gcspopm(i64) +declare ptr @llvm.aarch64.gcsss(ptr)