Skip to content

Commit

Permalink
Support Java and IEEE-754 SPEC w.r.t floating point min/max ops
Browse files Browse the repository at this point in the history
- +0.0 compares as strictly greater than -0.0
- Refactored xmaxxminHelper to into a more generic helper function that
  does not change NaNs
- added helper for fmin/fmax/... nodes that uses SIMD instructions when
  available that has same behaviour as xmaxxminHelper on floats
- idea is to enable other evaluators to use these helpers and deal with
  NaNs as they see fit
- omr evaluators support IEEE-754 w.r.t zeros and NaNs (returns the
  quiet NaN corresponding to the first NaN given if present)

Signed-off-by: Matthew Hall <[email protected]>
  • Loading branch information
matthewhall2 committed Oct 10, 2024
1 parent fe772f7 commit cc9736b
Show file tree
Hide file tree
Showing 2 changed files with 246 additions and 40 deletions.
250 changes: 210 additions & 40 deletions compiler/z/codegen/ControlFlowEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#include "z/codegen/BinaryAnalyser.hpp"
#include "z/codegen/BinaryCommutativeAnalyser.hpp"
#include "z/codegen/CompareAnalyser.hpp"
#include "z/codegen/OMRTreeEvaluator.hpp"
#include "z/codegen/S390GenerateInstructions.hpp"
#include "z/codegen/S390HelperCallSnippet.hpp"
#include "z/codegen/S390Instruction.hpp"
Expand Down Expand Up @@ -323,52 +324,175 @@ generateS390Compare(TR::Node * node, TR::CodeGenerator * cg, TR::InstOpCode::Mne
return branchOpCond;
}

static TR::Register*
xmaxxminHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic compareRROp, TR::InstOpCode::S390BranchCondition branchCond, TR::InstOpCode::Mnemonic moveRROp)
TR::Register *
OMR::Z::TreeEvaluator::xmaxxminHelper(TR::Node * node, TR::CodeGenerator * cg)
{
TR::Node* lhsNode = node->getChild(0);
TR::Node* rhsNode = node->getChild(1);

TR::Register* lhsReg = cg->gprClobberEvaluate(lhsNode);
TR::Register* rhsReg = cg->evaluate(rhsNode);
TR::Register * operand1 = cg->gprClobberEvaluate(node->getChild(0));
TR::Register * operand2 = cg->evaluate(node->getChild(1));

TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol(cg);
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol(cg);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
cFlowRegionStart->setStartInternalControlFlow();

generateRREInstruction(cg, compareRROp, node, lhsReg, rhsReg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, branchCond, node, cFlowRegionEnd);

//Check for NaN operands for float and double
if (node->getOpCode().isFloatingPoint())
TR::InstOpCode::Mnemonic compareRROp = TR::InstOpCode::NOP;
TR::InstOpCode::S390BranchCondition returnFirstArgCond = TR::InstOpCode::COND_NOP;
bool isMaxOp = node->getOpCode().isMax();
bool isFloatingPointOp = node->getOpCode().isFloatingPoint();
if (isFloatingPointOp)
{
compareRROp = node->getOpCode().isDouble() ? TR::InstOpCode::CDBR : TR::InstOpCode::CEBR;
returnFirstArgCond = isMaxOp ? TR::InstOpCode::COND_MASK2 : TR::InstOpCode::COND_MASK4;
}
else
{
TR_ASSERT_FATAL(node->getOpCode().isInt() || node->getOpCode().isLong(), "invalid operand type");
compareRROp = node->getOpCode().isLong() ? TR::InstOpCode::CGR : TR::InstOpCode::CR;
returnFirstArgCond = isMaxOp ? TR::InstOpCode::COND_BHR : TR::InstOpCode::COND_BLR;
}

TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol(cg);
TR::LabelSymbol* swapValues = generateLabelSymbol(cg);
generateRREInstruction(cg, compareRROp, node, operand1, operand2);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, returnFirstArgCond, node, cFlowRegionEnd);
if (isFloatingPointOp)
{
/**
* Check for NaN operands for float and double
* Support float and double +0/-0 comparisons adhering to IEEE 754 standard
* Checking if operands are equal, then branching to equalRegion, otherwise
* fall through for NaN case handling
*/
TR::LabelSymbol* equalRegion = generateLabelSymbol(cg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, equalRegion);

// If first operand is NaN, then we are done, otherwise fallthrough to move second operand as result
generateRREInstruction(cg, node->getOpCode().isDouble() ? TR::InstOpCode::LTDBR : TR::InstOpCode::LTEBR, node, lhsReg, lhsReg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC3, node, cFlowRegionEnd);
TR::InstOpCode::Mnemonic tdcOpcode = node->getOpCode().isDouble() ? TR::InstOpCode::TCDB : TR::InstOpCode::TCEB;
generateRXEInstruction(cg, tdcOpcode, node, operand1, generateS390MemoryReference(0x00F, cg), 0); // can't use load and test here since it would set the quiet bit
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd);

// branch to swap label, since either second operand is NaN, or entire satisfies the alternate condition code
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, swapValues);

// code for handling +0/-0 comparisons when operands are equal
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, equalRegion);
if (node->getOpCode().isMax())
{
// For Max calls, checking if first operand is +0, then we are done, otherwise fall through for swap
generateRXEInstruction(cg, tdcOpcode, node, operand1, generateS390MemoryReference(0x800, cg), 0); // operand1 is +0 ?
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd); // it is +0
}
else if (node->getOpCode().isMin())
{
// For Min calls, checking if first operand is not +0, then we are done, otherwise fall through for swap
generateRXEInstruction(cg, tdcOpcode, node, operand1, generateS390MemoryReference(0x400, cg), 0); // operand1 is -0 ?
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd);
}
}

//Move resulting operand to lhsReg as fallthrough for alternate Condition Code
generateRREInstruction(cg, moveRROp, node, lhsReg, rhsReg);
// Move resulting operand to operand1 as fallthrough for alternate Condition Code
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, swapValues);
TR::InstOpCode::Mnemonic moveRROp = TR::InstOpCode::NOP;
if (isFloatingPointOp)
{
moveRROp = node->getOpCode().isDouble() ? TR::InstOpCode::LDR : TR::InstOpCode::LER;
}
else
{
moveRROp = node->getOpCode().isLong() ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
}
generateRREInstruction(cg, moveRROp, node, operand1, operand2);

TR::RegisterDependencyConditions* deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
deps->addPostConditionIfNotAlreadyInserted(lhsReg, TR::RealRegister::AssignAny);
deps->addPostConditionIfNotAlreadyInserted(rhsReg, TR::RealRegister::AssignAny);
deps->addPostConditionIfNotAlreadyInserted(operand1, TR::RealRegister::AssignAny);
deps->addPostConditionIfNotAlreadyInserted(operand2, TR::RealRegister::AssignAny);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);
cFlowRegionEnd->setEndInternalControlFlow();

node->setRegister(lhsReg);
cg->decReferenceCount(lhsNode);
cg->decReferenceCount(rhsNode);
node->setRegister(operand1);
cg->decReferenceCount(node->getChild(0));
cg->decReferenceCount(node->getChild(1));
return operand1;
}


TR::Register *
OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT_FATAL(node->getNumChildren() >= 1 || node->getNumChildren() <= 2 || node->getOpCode().isFloatingPoint(),
"node has incorrect number of children or is not floating point type");

/* ===================== Allocating Registers ===================== */

TR::Register * argumentSelectorReg = cg->allocateRegister(TR_VRF);
TR::Register * compareResultReg = cg->allocateRegister(TR_VRF);
TR::Register * zeroCheckReg = cg->allocateRegister(TR_VRF);

/* ====== LD FPR0,16(GPR5) Load a ====== */
TR::Register * operand1 = cg->fprClobberEvaluate(node->getFirstChild());

/* ====== LD FPR2, 0(GPR5) Load b ====== */
TR::Register * operand2 = cg->evaluate(node->getSecondChild());

return lhsReg;
/* ===================== Generating instructions ===================== */

const uint8_t typeMask = node->getOpCode().isDouble() ? 3 : 2;
/* ====== WFTCIDB argumentSelectorReg,operand1,X'F' a == NaN ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, argumentSelectorReg, operand1, 0xF, 8, typeMask);

/* ====== For Max: WFCHE compareResultReg,operand1,operand2 Compare a > b ====== */
bool isMaxOp = node->getOpCode().isMax();
if(isMaxOp)
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, compareResultReg, operand1, operand2, 0, 8, typeMask);
}
/* ====== For Min: WFCHE compareResultReg,operand1,operand2 Compare a < b ====== */
else
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, compareResultReg, operand2, operand1, 0, 8, typeMask);
}

/* ====== VO argumentSelectorReg,argumentSelectorReg,compareResultReg (a > b) || (a == NaN) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, argumentSelectorReg, argumentSelectorReg, compareResultReg, 0, 0, 0);

/* ====== For Max: WFTCIDB compareResultReg,operand1,X'800' a == +0 ====== */
if(isMaxOp)
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, compareResultReg, operand1, 0x800, 8, typeMask);
}
/* ====== For Min: WFTCIDB compareResultReg,operand1,X'400' a == -0 ====== */
else
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, compareResultReg, operand1, 0x400, 8, typeMask);
}
/* ====== WFTCIDB zeroCheckReg,operand2,X'C00' b == 0 ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, zeroCheckReg, operand2, 0xC00, 8, typeMask);

/* ====== VN compareResultReg,compareResultReg,zeroCheckReg (a == -0) && (b == 0) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, compareResultReg, compareResultReg, zeroCheckReg, 0, 0, 0);

/* ====== VO argumentSelectorReg,argumentSelectorReg,compareResultReg (a >= b) || (a == NaN) || ((a == -0) && (b == 0)) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, argumentSelectorReg, argumentSelectorReg, compareResultReg, 0, 0, 0);

/* ====== VSEL operand1,operand1,operand2,argumentSelectorReg ====== */
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, operand1, operand1, operand2, argumentSelectorReg);

/* ===================== Deallocating Registers ===================== */
cg->stopUsingRegister(operand2);
cg->stopUsingRegister(argumentSelectorReg);
cg->stopUsingRegister(compareResultReg);
cg->stopUsingRegister(zeroCheckReg);

node->setRegister(operand1);
cg->decReferenceCount(node->getFirstChild());
cg->decReferenceCount(node->getSecondChild());
return operand1;
}

static TR::Register*
imaximinHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic compareRROp, TR::InstOpCode::S390BranchCondition branchCond, TR::InstOpCode::Mnemonic moveRROp)
imaximinHelper(TR::Node* node, TR::CodeGenerator* cg)
{
TR::InstOpCode::S390BranchCondition branchCond = node->getOpCode().isMax() ? TR::InstOpCode::COND_BHR : TR::InstOpCode::COND_BLR;
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z15))
{
TR::Node* lhsNode = node->getChild(0);
Expand Down Expand Up @@ -408,13 +532,14 @@ imaximinHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic c
}
else
{
return xmaxxminHelper(node, cg, compareRROp, branchCond, moveRROp);
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
}

static TR::Register*
lmaxlminHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic compareRROp, TR::InstOpCode::S390BranchCondition branchCond, TR::InstOpCode::Mnemonic moveRROp)
lmaxlminHelper(TR::Node* node, TR::CodeGenerator* cg)
{
TR::InstOpCode::S390BranchCondition branchCond = node->getOpCode().isMax() ? TR::InstOpCode::COND_BHR : TR::InstOpCode::COND_BLR;
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z15))
{
TR::Node* lhsNode = node->getChild(0);
Expand Down Expand Up @@ -454,60 +579,105 @@ lmaxlminHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic c
}
else
{
return xmaxxminHelper(node, cg, compareRROp, branchCond, moveRROp);
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
}

TR::Register*
OMR::Z::TreeEvaluator::imaxEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
return imaximinHelper(node, cg, TR::InstOpCode::CR, TR::InstOpCode::COND_BHR, TR::InstOpCode::LR);
return imaximinHelper(node, cg);
}

TR::Register*
OMR::Z::TreeEvaluator::lmaxEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
return lmaxlminHelper(node, cg, TR::InstOpCode::CGR, TR::InstOpCode::COND_BHR, TR::InstOpCode::LGR);
return lmaxlminHelper(node, cg);
}

TR::Register*
OMR::Z::TreeEvaluator::fmaxEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
//Passing COND_MASK10 to check CC if operand 1 is already greater than, or equal to operand 2
return xmaxxminHelper(node, cg, TR::InstOpCode::CEBR, TR::InstOpCode::COND_MASK10, TR::InstOpCode::LER);
TR::Register * result = NULL;
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/floatMax", 1, TR::DebugCounter::Free);
result = OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
else
{
result = OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
// load and test (result <- result) - sets qiuet bit on NaN
generateRREInstruction(cg, TR::InstOpCode::LTEBR, node, result, result);
return result;
}

TR::Register*
OMR::Z::TreeEvaluator::dmaxEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
//Passing COND_MASK10 to check CC if operand 1 is already greater than, or equal to operand 2
return xmaxxminHelper(node, cg, TR::InstOpCode::CDBR, TR::InstOpCode::COND_MASK10, TR::InstOpCode::LDR);
TR::Register * result = NULL;
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/doubleMax", 1, TR::DebugCounter::Free);
result = OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
else
{
result = OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
// load and test (result <- result) - sets qiuet bit on NaN
generateRREInstruction(cg, TR::InstOpCode::LTDBR, node, result, result);
return result;
}

TR::Register *
OMR::Z::TreeEvaluator::iminEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return imaximinHelper(node, cg, TR::InstOpCode::CR, TR::InstOpCode::COND_BLR, TR::InstOpCode::LR);
return imaximinHelper(node, cg);
}

TR::Register *
OMR::Z::TreeEvaluator::lminEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return lmaxlminHelper(node, cg, TR::InstOpCode::CGR, TR::InstOpCode::COND_BLR, TR::InstOpCode::LGR);
return lmaxlminHelper(node, cg);
}

TR::Register*
OMR::Z::TreeEvaluator::fminEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
//Passing COND_MASK12 to check CC if operand 1 is already less than, or equal to operand 2
return xmaxxminHelper(node, cg, TR::InstOpCode::CEBR, TR::InstOpCode::COND_MASK12, TR::InstOpCode::LER);
TR::Register * reg = NULL;
TR::Register * result = NULL;
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/floatMin", 1, TR::DebugCounter::Free);
result = OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
else
{
result = OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
// load and test (result <- result) - sets qiuet bit on NaN
generateRREInstruction(cg, TR::InstOpCode::LTEBR, node, result, result);
return result;
}

TR::Register*
OMR::Z::TreeEvaluator::dminEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
//Passing COND_MASK12 to check CC if operand 1 is already less than, or equal to operand 2
return xmaxxminHelper(node, cg, TR::InstOpCode::CDBR, TR::InstOpCode::COND_MASK12, TR::InstOpCode::LDR);
TR::Register * result = NULL;
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/doubleMin", 1, TR::DebugCounter::Free);
result = OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
else
{
result = OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}
// load and test (result <- result) - sets qiuet bit on NaN
generateRREInstruction(cg, TR::InstOpCode::LTDBR, node, result, result);
return result;
}

/**
Expand Down
36 changes: 36 additions & 0 deletions compiler/z/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,42 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
static TR::Register *MethodExitHookEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *PassThroughEvaluator(TR::Node *node, TR::CodeGenerator *cg);

/** \brief
* This is a helper function used for floating point max/min operations
* when SIMD instructions are available. +0.0 compares as strictly
* greater than -0.0, and NaNs are returned unchanged if given
* (the quiet bit is not set for Signalling NaNs).
* Generates vector instructions.
*
* \param node
* The node to evaluate
*
* \param cg
* The code generator used to generate the instructions
*
* \return
* The register containing the result of the evaluation
*
*/
static TR::Register *fpMinMaxVectorHelper(TR::Node *node, TR::CodeGenerator *cg);

/** \brief
* This is a helper function used for integral and floating point max/min operations.
* For floating points, +0.0 compares as strictly greater than -0.0, and NaNs are
* returned unchanged if given (the quiet bit is not set for Signalling NaNs).
*
* \param node
* The node to evaluate
*
* \param cg
* The code generator used to generate the instructions
*
* \return
* The register containing the result of the evaluation
*
*/
static TR::Register *xmaxxminHelper(TR::Node *node, TR::CodeGenerator *cg);

// mask evaluators
static TR::Register *mAnyTrueEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *mAllTrueEvaluator(TR::Node *node, TR::CodeGenerator *cg);
Expand Down

0 comments on commit cc9736b

Please sign in to comment.