From b2aa7442399ecb655615c9c4c01a253eb0d20776 Mon Sep 17 00:00:00 2001 From: jimmyk Date: Thu, 24 Oct 2024 09:59:51 -0400 Subject: [PATCH 1/2] Accelerate Unsafe CAS Intrinsics on Aarch64 Adds support for the following recognized methods: CompareAndExchangeObject //JDK11 CompareAndExchangeReference //JDK17,21 CompareAndExchangeInt //JDK11,17,21 CompareAndExchangeLong //JDK11,17,21 The accelerated CAE code was built on top of the existing accelerated CAS support on Arm. Changes are similar to previously added support on Power, X and Z. Cleans up isUnsafeCAS to remove the now unused comp parameter. Also cleans up isUnsafeWithObjectArg, isVarHandleAccessMethod and isSignaturePolymorphicMethod to remove unused comp parameters. Signed-off-by: jimmyk --- .../aarch64/codegen/J9TreeEvaluator.cpp | 206 +++++++++++++----- runtime/compiler/env/j9method.cpp | 25 +-- runtime/compiler/env/j9method.h | 8 +- runtime/compiler/il/J9Node.cpp | 1 - .../compiler/optimizer/InlinerTempForJ9.cpp | 15 +- runtime/compiler/optimizer/UnsafeFastPath.cpp | 4 +- .../optimizer/VarHandleTransformer.cpp | 2 +- 7 files changed, 166 insertions(+), 95 deletions(-) diff --git a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp index 53426571186..4cc569b6296 100644 --- a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp @@ -93,6 +93,7 @@ extern void TEMPORARY_initJ9ARM64TreeEvaluatorTable(TR::CodeGenerator *cg) tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator; } +static void genDecompressPointer(TR::CodeGenerator *cg, TR::Node *node, TR::Register *ptrReg); static TR::InstOpCode::Mnemonic getStoreOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool useIdxReg); @@ -4803,12 +4804,22 @@ J9::ARM64::TreeEvaluator::genArrayCopyWithArrayStoreCHK(TR::Node *node, TR::Code static TR::Register * genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *srm, TR::Register *objReg, TR::Register *offsetReg, intptr_t offset, bool offsetInReg, TR::Register *oldVReg, TR::Register *newVReg, - TR::LabelSymbol *doneLabel, int32_t oldValue, bool oldValueInReg, bool is64bit, bool casWithoutSync = false) + TR::LabelSymbol *doneLabel, int32_t oldValue, bool oldValueInReg, bool is64bit, bool isReference, bool isExchange, bool casWithoutSync = false) { TR::Compilation * comp = cg->comp(); TR::Register *addrReg = srm->findOrCreateScratchRegister(); - TR::Register *resultReg = cg->allocateRegister(); - TR::InstOpCode::Mnemonic op; + TR::Register *tempReg = isExchange ? srm->findOrCreateScratchRegister() : NULL; + TR::Register *resultReg = NULL; + TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad; + + if (isReference && isExchange) + { + resultReg = cg->allocateCollectedReferenceRegister(); + } + else + { + resultReg = cg->allocateRegister(); + } if (offsetInReg) @@ -4836,7 +4847,16 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr op = casWithoutSync ? (is64bit ? TR::InstOpCode::casx : TR::InstOpCode::casw) : (is64bit ? TR::InstOpCode::casalx : TR::InstOpCode::casalw); generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg); generateCompareInstruction(cg, node, resultReg, oldVReg, is64bit); - generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); + + if (!isExchange) + { + generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); + } + else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + { + genDecompressPointer(cg, node, resultReg); + } + if (!createDoneLabel) { generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE); @@ -4871,7 +4891,16 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr else generateCompareImmInstruction(cg, node, resultReg, oldValue, is64bit); if (!createDoneLabel) - generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0); // failure + { + if (!isExchange) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0); // failure + } + else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + { + genDecompressPointer(cg, node, resultReg); + } + } generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE); if (casWithoutSync) @@ -4882,8 +4911,8 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr { op = is64bit ? TR::InstOpCode::stlxrx : TR::InstOpCode::stlxrw; } - generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg); - generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, resultReg, loopLabel); + generateTrg1MemSrc1Instruction(cg, op, node, isExchange ? tempReg : resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg); + generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, isExchange ? tempReg : resultReg, loopLabel); if (!casWithoutSync) generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, TR::InstOpCode::ish); @@ -4891,33 +4920,44 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr if (createDoneLabel) { generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel); - generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); + if (!isExchange) + { + generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); + } + else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + { + genDecompressPointer(cg, node, resultReg); + } } else { - generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success + if (!isExchange) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success + } } } srm->reclaimScratchRegister(addrReg); + srm->reclaimScratchRegister(tempReg); node->setRegister(resultReg); return resultReg; } static TR::Register * -VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong) +VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong, bool isExchange = false) { TR::Compilation * comp = cg->comp(); TR::Node *firstChild = node->getFirstChild(); - TR::Node *secondChild = node->getSecondChild(); - TR::Node *thirdChild = node->getChild(2); - TR::Node *fourthChild = node->getChild(3); - TR::Node *fifthChild = node->getChild(4); + TR::Node *objNode = node->getSecondChild(); + TR::Node *offsetNode = node->getChild(2); + TR::Node *oldVNode = node->getChild(3); + TR::Node *newVNode = node->getChild(4); TR::Register *offsetReg = NULL; TR::Register *oldVReg = NULL; TR::Register *newVReg = NULL; TR::Register *resultReg = NULL; - TR::Register *objReg = cg->evaluate(secondChild); + TR::Register *objReg = cg->evaluate(objNode); TR::RegisterDependencyConditions *conditions = NULL; TR::LabelSymbol *doneLabel = generateLabelSymbol(cg); intptr_t oldValue = 0; @@ -4925,29 +4965,29 @@ VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong) intptr_t offset; bool offsetInReg = true; - if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL) + if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister() == NULL) { - offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt(); + offset = (offsetNode->getOpCodeValue() == TR::iconst) ? offsetNode->getInt() : offsetNode->getLongInt(); offsetInReg = !constantIsUnsignedImm12(offset); } if (offsetInReg) - offsetReg = cg->evaluate(thirdChild); + offsetReg = cg->evaluate(offsetNode); static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL; const bool useLSE = comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE); // Obtain values to be checked for, and swapped in: - if ((!useLSE) && fourthChild->getOpCode().isLoadConst() && fourthChild->getRegister() == NULL) + if ((!useLSE) && oldVNode->getOpCode().isLoadConst() && oldVNode->getRegister() == NULL) { if (isLong) - oldValue = fourthChild->getLongInt(); + oldValue = oldVNode->getLongInt(); else - oldValue = fourthChild->getInt(); + oldValue = oldVNode->getInt(); if (constantIsUnsignedImm12(oldValue)) oldValueInReg = false; } if (oldValueInReg) - oldVReg = cg->evaluate(fourthChild); - newVReg = cg->evaluate(fifthChild); + oldVReg = cg->evaluate(oldVNode); + newVReg = cg->evaluate(newVNode); // Determine if synchronization needed: bool casWithoutSync = false; @@ -4966,7 +5006,7 @@ VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong) TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager(); // Compare and swap: - resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oldVReg, newVReg, NULL, oldValue, oldValueInReg, isLong, casWithoutSync); + resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oldVReg, newVReg, NULL, oldValue, oldValueInReg, isLong, false, isExchange, casWithoutSync); const int regnum = 3 + (oldValueInReg ? 1 : 0) + (offsetInReg ? 1 : 0) + srm->numAvailableRegisters(); conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, regnum, cg->trMemory()); @@ -4986,26 +5026,26 @@ VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong) srm->stopUsingRegisters(); cg->recursivelyDecReferenceCount(firstChild); - cg->decReferenceCount(secondChild); + cg->decReferenceCount(objNode); if (offsetInReg) - cg->decReferenceCount(thirdChild); + cg->decReferenceCount(offsetNode); else - cg->recursivelyDecReferenceCount(thirdChild); + cg->recursivelyDecReferenceCount(offsetNode); if (oldValueInReg) - cg->decReferenceCount(fourthChild); + cg->decReferenceCount(oldVNode); else - cg->recursivelyDecReferenceCount(fourthChild); - cg->decReferenceCount(fifthChild); + cg->recursivelyDecReferenceCount(oldVNode); + cg->decReferenceCount(newVNode); return resultReg; } -static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenerator *cg, bool isExchange = false) { TR::Compilation *comp = cg->comp(); TR_J9VMBase *fej9 = reinterpret_cast(comp->fe()); TR::Register *objReg, *offsetReg, *resultReg; - TR::Node *firstChild, *secondChild, *thirdChild, *fourthChild, *fifthChild; + TR::Node *firstChild, *objNode, *offsetNode, *oldVNode, *newVNode; TR::LabelSymbol *doneLabel; bool offsetInReg = true; intptr_t offset; @@ -5023,23 +5063,23 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera * aload (newValueNode) */ firstChild = node->getFirstChild(); - secondChild = node->getSecondChild(); - thirdChild = node->getChild(2); - fourthChild = node->getChild(3); - fifthChild = node->getChild(4); + objNode = node->getSecondChild(); + offsetNode = node->getChild(2); + oldVNode = node->getChild(3); + newVNode = node->getChild(4); - objReg = cg->evaluate(secondChild); + objReg = cg->evaluate(objNode); - if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL) + if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister() == NULL) { - offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt(); + offset = (offsetNode->getOpCodeValue() == TR::iconst) ? offsetNode->getInt() : offsetNode->getLongInt(); offsetInReg = !constantIsUnsignedImm12(offset); } if (offsetInReg) - offsetReg = cg->evaluate(thirdChild); + offsetReg = cg->evaluate(offsetNode); - TR::Register *oldVReg = cg->evaluate(fourthChild); - TR::Register *newVReg = cg->evaluate(fifthChild); + TR::Register *oldVReg = cg->evaluate(oldVNode); + TR::Register *newVReg = cg->evaluate(newVNode); doneLabel = generateLabelSymbol(cg); TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager(); @@ -5085,7 +5125,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera { if (comp->getOption(TR_TraceCG)) { - traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, secondChild); + traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, objNode); } cg->setImplicitExceptionPoint(faultingInstruction); } @@ -5129,26 +5169,33 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0); if (useShiftedOffsets) { - if (!fourthChild->isNull()) + if (!oldVNode->isNull()) { oReg = srm->findOrCreateScratchRegister(); generateLogicalShiftRightImmInstruction(cg, node, oReg, oldVReg, TR::Compiler->om.compressedReferenceShiftOffset()); } - if (!fifthChild->isNull()) + if (!newVNode->isNull()) { nReg = srm->findOrCreateScratchRegister(); generateLogicalShiftRightImmInstruction(cg, node, nReg, newVReg, TR::Compiler->om.compressedReferenceShiftOffset()); } } - resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oReg, nReg, doneLabel, 0, true, !comp->useCompressedPointers()); + resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oReg, nReg, doneLabel, 0, true, !comp->useCompressedPointers(), true, isExchange); if (useShiftedOffsets) { - srm->reclaimScratchRegister(oReg); - srm->reclaimScratchRegister(nReg); + if (!oldVNode->isNull()) + { + srm->reclaimScratchRegister(oReg); + } + + if (!newVNode->isNull()) + { + srm->reclaimScratchRegister(nReg); + } } - const bool skipWrtBar = (gcMode == gc_modron_wrtbar_none) || (fifthChild->isNull() && (gcMode != gc_modron_wrtbar_always)); + const bool skipWrtBar = (gcMode == gc_modron_wrtbar_none) || (newVNode->isNull() && (gcMode != gc_modron_wrtbar_always)); if (!skipWrtBar) { TR::Register *wrtBarSrcReg = newVReg; @@ -5162,7 +5209,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera generateMovInstruction(cg, node, wrtBarSrcReg, objReg, true); } - const bool srcNonNull = fifthChild->isNonNull(); + const bool srcNonNull = newVNode->isNonNull(); if (doWrtBar) // generational or gencon { @@ -5229,18 +5276,18 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera srm->stopUsingRegisters(); cg->recursivelyDecReferenceCount(firstChild); - cg->decReferenceCount(secondChild); + cg->decReferenceCount(objNode); if (offsetInReg) { - cg->decReferenceCount(thirdChild); + cg->decReferenceCount(offsetNode); } else { - cg->recursivelyDecReferenceCount(thirdChild); + cg->recursivelyDecReferenceCount(offsetNode); } - cg->decReferenceCount(fourthChild); - cg->decReferenceCount(fifthChild); + cg->decReferenceCount(oldVNode); + cg->decReferenceCount(newVNode); return resultReg; } @@ -6800,6 +6847,7 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result break; } + static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL; switch (methodSymbol->getRecognizedMethod()) { case TR::java_nio_Bits_keepAlive: @@ -6911,6 +6959,54 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result break; } + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + { + if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSwap(node, cg, false, true); + return true; + } + } + break; + } + + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + { + if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSwap(node, cg, true, true); + return true; + } + } + break; + } + + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + /* + * Starting from Java 12, compareAndExchangeObject was changed from a native call to a + * Java wrapper calling compareAndExchangeReference. + * We only want to inline the JNI native method, so add an explicit test for isNative(). + */ + if (!methodSymbol->isNative()) + break; + /* If native, fall through. */ + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + { + if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSwapObject(node, cg, true); + return true; + } + } + break; + } + #ifndef OSX case TR::java_util_zip_CRC32_update: { diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index c2e3eef7804..4816e5f073f 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -5552,17 +5552,8 @@ TR_ResolvedJ9Method::isJITInternalNative() } bool -TR_J9MethodBase::isUnsafeCAS(TR::Compilation * c) +TR_J9MethodBase::isUnsafeCAS() { - /* - * The TR::Compilation parameter "c" is sometimes null. But, it is needed to perform a platform target check. - * So, if it is null, this code goes and gets comp. - */ - if (NULL == c) - { - c = TR::comp(); - } - TR::RecognizedMethod rm = getRecognizedMethod(); switch (rm) { @@ -5570,10 +5561,6 @@ TR_J9MethodBase::isUnsafeCAS(TR::Compilation * c) case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: - { - TR_ASSERT_FATAL(c, "comp should not be NULL"); - return (c->target().cpu.isPower() || c->target().cpu.isX86() || c->target().cpu.isZ()); - } case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: @@ -5587,10 +5574,8 @@ TR_J9MethodBase::isUnsafeCAS(TR::Compilation * c) } bool -//TR_ResolvedJ9Method::isUnsafeWithObjectArg(TR::Compilation * c) -TR_J9MethodBase::isUnsafeWithObjectArg(TR::Compilation * c) +TR_J9MethodBase::isUnsafeWithObjectArg() { - //TR::RecognizedMethod rm = TR_ResolvedMethod::getRecognizedMethod(); TR::RecognizedMethod rm = getRecognizedMethod(); switch (rm) { @@ -5936,7 +5921,7 @@ TR_J9MethodBase::isVarHandleOperationMethod(TR::RecognizedMethod rm) } bool -TR_J9MethodBase::isVarHandleAccessMethod(TR::Compilation * comp) +TR_J9MethodBase::isVarHandleAccessMethod() { TR::RecognizedMethod rm = getMandatoryRecognizedMethod(); switch (rm) @@ -5981,9 +5966,9 @@ TR_J9MethodBase::isVarHandleAccessMethod(TR::Compilation * comp) } bool -TR_J9MethodBase::isSignaturePolymorphicMethod(TR::Compilation * comp) +TR_J9MethodBase::isSignaturePolymorphicMethod() { - if (isVarHandleAccessMethod(comp)) return true; + if (isVarHandleAccessMethod()) return true; TR::RecognizedMethod rm = getMandatoryRecognizedMethod(); switch (rm) diff --git a/runtime/compiler/env/j9method.h b/runtime/compiler/env/j9method.h index f12cb69592c..29473b7f23c 100644 --- a/runtime/compiler/env/j9method.h +++ b/runtime/compiler/env/j9method.h @@ -123,11 +123,11 @@ class TR_J9MethodBase : public TR::Method static TR::DataType unsafeDataTypeForArray(TR::RecognizedMethod rm); static TR::DataType unsafeDataTypeForObject(TR::RecognizedMethod rm); static bool isVarHandleOperationMethod(TR::RecognizedMethod rm); - virtual bool isVarHandleAccessMethod(TR::Compilation * = NULL); - virtual bool isSignaturePolymorphicMethod(TR::Compilation * = NULL); + virtual bool isVarHandleAccessMethod(); + virtual bool isSignaturePolymorphicMethod(); - virtual bool isUnsafeWithObjectArg( TR::Compilation * comp = NULL); - virtual bool isUnsafeCAS(TR::Compilation * = NULL); + virtual bool isUnsafeWithObjectArg(); + virtual bool isUnsafeCAS(); virtual uint32_t numberOfExplicitParameters(); virtual TR::DataType parmType(uint32_t parmNumber); // returns the type of the parmNumber'th parameter (0-based) diff --git a/runtime/compiler/il/J9Node.cpp b/runtime/compiler/il/J9Node.cpp index 9b53722dfb6..98085c81e4a 100644 --- a/runtime/compiler/il/J9Node.cpp +++ b/runtime/compiler/il/J9Node.cpp @@ -2164,7 +2164,6 @@ J9::Node::isUnsafeGetPutCASCallOnNonArray() if (!symbol) return false; - //TR_ASSERT(symbol->castToResolvedMethodSymbol()->getResolvedMethod()->isUnsafeWithObjectArg(), "Attempt to check flag on a method that is not JNI Unsafe that needs special care for arraylets\n"); TR_ASSERT(symbol->getMethod()->isUnsafeWithObjectArg() || symbol->getMethod()->isUnsafeCAS(),"Attempt to check flag on a method that is not JNI Unsafe that needs special care for arraylets\n"); return _flags.testAny(unsafeGetPutOnNonArray); } diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index b0b07adb30f..9d1032c452e 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -406,11 +406,7 @@ TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::No case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: - if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ()) - { - return false; - } - break; + return false; /* In Java9 the compareAndSwap[Int|Long|Object] and copyMemory enums match * both sun.misc.Unsafe and jdk.internal.misc.Unsafe. The sun.misc.Unsafe @@ -427,11 +423,6 @@ TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::No * failed the isInlineableJNI check and should not be force inlined. */ case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: - if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ()) - { - return !calleeMethod->isNative(); - } - break; case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: @@ -2669,7 +2660,7 @@ TR_J9InlinerPolicy::inlineUnsafeCall(TR::ResolvedMethodSymbol *calleeSymbol, TR: case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: - if (disableCAEIntrinsic || !(comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ())) + if (disableCAEIntrinsic) { break; } @@ -2744,7 +2735,7 @@ TR_J9InlinerPolicy::isInlineableJNI(TR_ResolvedMethod *method,TR::Node *callNode !comp->fej9()->traceableMethodsCanBeInlined())) return false; - if (method->convertToMethod()->isUnsafeWithObjectArg(comp) || method->convertToMethod()->isUnsafeCAS(comp)) + if (method->convertToMethod()->isUnsafeWithObjectArg() || method->convertToMethod()->isUnsafeCAS()) { // In Java9 sun/misc/Unsafe methods are simple Java wrappers to JNI // methods in jdk.internal, and the enum values above match both. Only diff --git a/runtime/compiler/optimizer/UnsafeFastPath.cpp b/runtime/compiler/optimizer/UnsafeFastPath.cpp index 2e9705f8285..73ffea609d0 100644 --- a/runtime/compiler/optimizer/UnsafeFastPath.cpp +++ b/runtime/compiler/optimizer/UnsafeFastPath.cpp @@ -272,7 +272,7 @@ bool TR_UnsafeFastPath::tryTransformUnsafeAtomicCallInVarHandleAccessMethod(TR:: TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol(); // Codegen will inline the call with the flag // - if (symbol->getMethod()->isUnsafeCAS(comp())) + if (symbol->getMethod()->isUnsafeCAS()) { // codegen doesn't optimize CAS on a static field // @@ -415,7 +415,7 @@ int32_t TR_UnsafeFastPath::perform() if (isVarHandleOperationMethod(caller) && (isTransformableUnsafeAtomic(comp(), callee) || - symbol->getMethod()->isUnsafeCAS(comp()))) + symbol->getMethod()->isUnsafeCAS())) { if (tryTransformUnsafeAtomicCallInVarHandleAccessMethod(tt, caller, callee)) { diff --git a/runtime/compiler/optimizer/VarHandleTransformer.cpp b/runtime/compiler/optimizer/VarHandleTransformer.cpp index c8e01f33fb5..ff896eb0b83 100644 --- a/runtime/compiler/optimizer/VarHandleTransformer.cpp +++ b/runtime/compiler/optimizer/VarHandleTransformer.cpp @@ -159,7 +159,7 @@ TR::RecognizedMethod TR_VarHandleTransformer::getVarHandleAccessMethod(TR::Node } else { - if (method->isVarHandleAccessMethod(comp())) + if (method->isVarHandleAccessMethod()) varHandleAccessMethod = method->getMandatoryRecognizedMethod(); } return varHandleAccessMethod; From 9b6ed0304a36c6ec9344493f87d82138ee9b8ec2 Mon Sep 17 00:00:00 2001 From: jimmyk Date: Wed, 30 Oct 2024 09:52:53 -0400 Subject: [PATCH 2/2] Updates comments and remove unneeded 64bit checks Updates comments and remove unneeded 64bit checks for genCAS on Aarch64. --- .../aarch64/codegen/J9TreeEvaluator.cpp | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp index 4cc569b6296..c656ea012eb 100644 --- a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp @@ -4838,10 +4838,14 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr { TR_ASSERT_FATAL(oldValueInReg, "Expecting oldValue is in register if LSE is enabled"); /* - * movx resultReg, oldVReg - * casal resultReg, newVReg, [addrReg] - * cmp resultReg, oldReg - * cset resultReg, eq + * mov resultReg, oldVReg + * casal resultReg, newVReg, [addrReg] + * cmp resultReg, oldVReg + * #if (!isExchange) + * cset resultReg, eq + * #else if (isReference && comp->useCompressedPointers()) + * lslx resultReg, resultReg, TR::Compiler->om.compressedReferenceShift() + * #endif */ generateMovInstruction(cg, node, resultReg, oldVReg, is64bit); op = casWithoutSync ? (is64bit ? TR::InstOpCode::casx : TR::InstOpCode::casw) : (is64bit ? TR::InstOpCode::casalx : TR::InstOpCode::casalw); @@ -4852,7 +4856,7 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr { generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); } - else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + else if (isReference && comp->useCompressedPointers()) { genDecompressPointer(cg, node, resultReg); } @@ -4873,16 +4877,30 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr /* * Generating the same instruction sequence as __sync_bool_compare_and_swap * - * loop: - * ldxrx resultReg, [addrReg] - * cmpx resultReg, oldVReg - * bne done - * stlxrx resultReg, newVReg, [addrReg] - * cbnz resultReg, loop - * dmb ish - * done: - * cset resultReg, eq - * + * loopLabel: + * ldxr resultReg, [addrReg] + * cmp resultReg, oldVreg/oldValue + * #if (!createDoneLabel) + * #if (!isExchange) + * movzx resultReg, 0 + * #else if (isReference && comp->useCompressedPointers()) + * lslx resultReg, resultReg, TR::Compiler->om.compressedReferenceShift() + * #endif + * #endif + * bne doneLabel + * stlxr tempReg, newVReg, [addrReg] + * cbnz tempReg, loopLabel + * dmb ish + * #if (createDoneLabel) + * doneLabel: + * #if (!isExchange) + * cset resultReg, eq + * #else if (isReference && comp->useCompressedPointers()) + * lslx resultReg, resultReg, TR::Compiler->om.compressedReferenceShift() + * #endif + * #else if (!isExchange) + * movzx resultReg, 0 + * #endif */ op = is64bit ? TR::InstOpCode::ldxrx : TR::InstOpCode::ldxrw; generateTrg1MemInstruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0)); @@ -4896,7 +4914,7 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr { generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0); // failure } - else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + else if (isReference && comp->useCompressedPointers()) { genDecompressPointer(cg, node, resultReg); } @@ -4924,17 +4942,14 @@ genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *sr { generateCSetInstruction(cg, node, resultReg, TR::CC_EQ); } - else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + else if (isReference && comp->useCompressedPointers()) { genDecompressPointer(cg, node, resultReg); } } - else + else if (!isExchange) { - if (!isExchange) - { - generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success - } + generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success } } srm->reclaimScratchRegister(addrReg);