Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Value Types: Replace arraycopy source and destination with temps #7582

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 121 additions & 35 deletions compiler/optimizer/ValuePropagationCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,47 @@ bool OMR::ValuePropagation::transformUnsafeCopyMemoryCall(TR::Node *arrayCopyNod
return false;
}

static void setNodeOnList(TR::Node *currNode, TR_BitVector *nodeList)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask you to add some Doxygen comments or at least some description of what it's doing? I know there's a bit of a description that appears where it's called, but I think it would help to have it described here.

{
if (nodeList->isSet(currNode->getGlobalIndex()))
return;

nodeList->set(currNode->getGlobalIndex());

for (int32_t i = 0; i < currNode->getNumChildren(); i++)
{
setNodeOnList(currNode->getChild(i), nodeList);
}
}

static void findAndReplaceCommonedNodeWithLoadFromTemp(TR::Node *currNode, TR::Node *matchingNode, TR::SymbolReference *symRef, TR_BitVector *nodeSkipList, bool trace, TR::Compilation *comp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask you to add some Doxygen comments or at least some description of what it's doing? I know there's a bit of a description that appears where it's called, but I think it would help to have it described here.

{
for (int32_t i = 0; i < currNode->getNumChildren(); i++)
{
TR::Node *childNode = currNode->getChild(i);

if ((childNode == matchingNode)
|| ((childNode->getOpCodeValue() == TR::aload) &&
(matchingNode->getOpCodeValue() == TR::aload) &&
childNode->getOpCode().hasSymbolReference() &&
matchingNode->getOpCode().hasSymbolReference() &&
(childNode->getSymbolReference()->getReferenceNumber() == matchingNode->getSymbolReference()->getReferenceNumber())))
{
TR::Node *loadNode = TR::Node::createLoad(symRef);

childNode->recursivelyDecReferenceCount();
currNode->setAndIncChild(i, loadNode);

if (trace)
traceMsg(comp, "%s: currNode n%dn replace childNode n%dn with loadNode n%dn\n", __FUNCTION__, currNode->getGlobalIndex(), childNode->getGlobalIndex(), loadNode->getGlobalIndex());
}
else if (!nodeSkipList->isSet(childNode->getGlobalIndex()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it a little bit confusing that a node that is in nodeSkipList might get processed by the if that appears just before this else if if its parent was not in nodeSkipList. I wonder if it should be called skipDescendantsList or something like that. Just a thought.

{
findAndReplaceCommonedNodeWithLoadFromTemp(childNode, matchingNode, symRef, nodeSkipList, trace, comp);
}
}
}

void OMR::ValuePropagation::transformArrayCopyCall(TR::Node *node)
{
bool is64BitTarget = comp()->target().is64Bit();
Expand Down Expand Up @@ -1440,41 +1481,50 @@ void OMR::ValuePropagation::transformArrayCopyCall(TR::Node *node)
comp()->getDebug()->print(comp()->getOutFile(), comp()->getFlowGraph());
}
/*
==== Before ===
_curTree---> n9n treetop
n8n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V
n3n aload <parm 0 [LSomeInterface;>[#419 Parm]
n4n iconst 0
n5n aload <parm 1 [LSomeInterface;>[#420 Parm]
n6n iconst 0
n7n iload TestSystemArraycopy4.ARRAY_SIZE I[#421 Static]

==== After ===
n48n astore <temp slot 3>[#429 Auto]
n3n aload <parm 0 [LSomeInterface;>[#419 Parm]
n52n astore <temp slot 5>[#431 Auto]
n5n aload <parm 1 [LSomeInterface;>[#420 Parm]
prevTT---> n56n istore <temp slot 7>[#433 Auto]
n7n iload TestSystemArraycopy4.ARRAY_SIZE I[#421 Static]
_curTree---> n9n treetop
n8n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V
n3n ==>aload
n4n iconst 0
n5n ==>aload
n6n iconst 0
n7n ==>iload
nextTT---> ...
...
...
slowBlock---> n39n BBStart <block_-1>
n41n treetop
n42n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V [#428 final native static Method] [flags 0x20500 0x0 ] (dontTransformArrayCopyCall )
n49n aload <temp slot 3>[#429 Auto]
n51n iconst 0
n53n aload <temp slot 5>[#431 Auto]
n55n iconst 0
n57n iload <temp slot 7>[#433 Auto]
n40n BBEnd </block_-1>
==== Before modifying for null-restricted array check ===
_curTree---> n43n treetop
n42n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V
n257n aload <temp slot 9>[#464 Auto]
n38n iload <auto slot 4>[#423 Auto]
n39n aload <auto slot 5>[#431 Auto]
n40n iload <auto slot 4>[#423 Auto]
n41n iload <auto slot 3>[#422 Auto]
n47n areturn
n46n aload <auto slot 5>[#431 Auto]
n2n BBEnd </block_20> =====

==== After modifying for null-restricted array check ===
n328n astore <temp slot 11>[#470 Auto]
n257n aload <temp slot 9>[#464 Auto]
n330n istore <temp slot 12>[#471 Auto]
n38n iload <auto slot 4>[#423 Auto]
n332n astore <temp slot 13>[#472 Auto]
n39n aload <auto slot 5>[#431 Auto]
n334n istore <temp slot 14>[#473 Auto]
n40n iload <auto slot 4>[#423 Auto]
prevTT---> n336n istore <temp slot 15>[#474 Auto]
n41n iload <auto slot 3>[#422 Auto]
_curTree---> n43n treetop
n42n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V
n339n aload <temp slot 11>[#470 Auto]
n38n ==>iload
n338n aload <temp slot 13>[#472 Auto]
n40n ==>iload
n41n ==>iload
nextTT---> n47n areturn
n340n aload <temp slot 13>[#472 Auto]


slowBlock---> n324n BBStart <block_27> (freq 0) (cold)
n327n treetop
n326n call java/lang/System.arraycopy(Ljava/lang/Object;ILjava/lang/Object;II)V
n329n aload <temp slot 11>[#470 Auto]
n331n iload <temp slot 12>[#471 Auto]
n333n aload <temp slot 13>[#472 Auto]
n335n iload <temp slot 14>[#473 Auto]
n337n iload <temp slot 15>[#474 Auto]
n448n goto --> block_24 BBStart at n433n
n325n BBEnd </block_27> (cold)
*/

// Create the block that contains the System.arraycopy call which will be the slow path
Expand All @@ -1492,6 +1542,30 @@ void OMR::ValuePropagation::transformArrayCopyCall(TR::Node *node)

slowBlock->append(newCallTree);

// Collect a list of all the nodes appear before the current System.arraycopy tree.
// This should run before inserting the new store nodes.
// The purpose of this list is to prevent the source/destination nodes that are commoned
// before the System.arraycopy tree from being replaced.
// e.g.
// n1n Op1
// n2n load src
// n3n treetop
// n4n System.arraycopy
// n5n load src
// ...
// n8n Op2
// ==> n1n <--- n2n under n1n should not be replaced with the new temp because n1n appears before System.arraycopy tree
// n11n Op3
// ==> n2n <--- will be replaced in with new temp in findAndReplaceCommonedNodeWithLoadFromTemp
//
TR::TreeTop *itTT = _curTree->getEnclosingBlock()->startOfExtendedBlock()->getEntry();
TR_BitVector *nodeSkipList = new (comp()->trStackMemory()) TR_BitVector(20, comp()->trMemory(), stackAlloc, growable);
while (itTT != _curTree)
{
setNodeOnList(itTT->getNode(), nodeSkipList);
itTT = itTT->getNextTreeTop();
}

TR::Node *oldCallNode = node;
if (trace())
traceMsg(comp(),"Creating temps for children of the original call node n%dn %p. new call node n%dn %p\n", oldCallNode->getGlobalIndex(), oldCallNode, newCallNode->getGlobalIndex(), newCallNode);
Expand Down Expand Up @@ -1548,6 +1622,18 @@ void OMR::ValuePropagation::transformArrayCopyCall(TR::Node *node)
(nextTT->getNode()->getOpCodeValue() == TR::BBStart))
nextTT = nextTT->getNextTreeTop();

itTT = _curTree;
TR::TreeTop *exitTT = _curTree->getEnclosingBlock()->getEntry()->getExtendedBlockExitTreeTop();

// Replace all the commoned destination array reference and source array reference nodes with the new temps
while (itTT != exitTT)
{
TR::Node *itNode = itTT->getNode();
findAndReplaceCommonedNodeWithLoadFromTemp(itNode, dstObjNode, dstArrRefSymRef, nodeSkipList, trace(), comp());
findAndReplaceCommonedNodeWithLoadFromTemp(itNode, srcObjNode, srcArrRefSymRef, nodeSkipList, trace(), comp());
Comment on lines +1632 to +1633
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that the problem this is fixing appeared with temporaries for the destination or source arrays, but is it conceivable that the same problem could occur with the offset or length values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question! I think theoretically speaking the same problem could occur with the offset and length as well. I tried to tweak my unit tests to show this problem on the offset or length, but so far I haven't been able to get the expected IL trees. I'll think over the proper fix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm able to reproduce the same problem with copy size now. Below example of IL trees shows the copy size is stored to a temp #474 after the preparation of null restricted array copy transformation. If the slow path block_23 is invoked, #474 is uninitialized.

I will expand this change to all children of arraycopy in the next commit.

...
n180n     istore  <temp slot 12>[#465  Auto] [flags 0x3 0x0 ]                                 [0x7f63f9f82800] bci=[-1,37,129] rc=0 vc=0 vn=- li=- udi=- nc=1
n32n        iload  <auto slot 3>[#421  Auto] [flags 0x3 0x0 ] (cannotOverflow )               [0x7f63f9ee5120] bci=[-1,36,129] rc=2 vc=51 vn=- li=- udi=- nc=0 flg=0x1000
n368n     istore  <temp slot 24>[#477  Auto] [flags 0x3 0x0 ]                                 [0x7f63f9f862c0] bci=[-1,36,129] rc=0 vc=0 vn=- li=- udi=- nc=1
n32n        ==>iload
n365n     ificmpne --> block_23 BBStart at n168n ()                                           [0x7f63f9f861d0] bci=[-1,0,125] rc=0 vc=0 vn=- li=- udi=- nc=2 flg=0x20
n363n       iand                                                                              [0x7f63f9f86130] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=2
n361n         iloadi  <isClassFlags>[#340  Shadow +36] [flags 0x603 0x0 ]                     [0x7f63f9f86090] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=1
n360n           aloadi  <vft-symbol>[#341  Shadow] [flags 0x18607 0x0 ]                       [0x7f63f9f86040] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=1
n357n             aload  <temp slot 10>[#463  Auto] [flags 0x7 0x0 ]                          [0x7f63f9f85f50] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=0
n362n         iconst 0x2000000 (X!=0 X>=0 )                                                   [0x7f63f9f860e0] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=0 flg=0x104
n364n       iconst 0 (X==0 X>=0 X<=0 )                                                        [0x7f63f9f86180] bci=[-1,0,125] rc=1 vc=0 vn=- li=- udi=- nc=0 flg=0x302
n405n     BBEnd </block_14> =====                                                             [0x7f63f9f86e50] bci=[-1,0,125] rc=0 vc=0 vn=- li=- udi=- nc=0

...

n366n     BBStart <block_24> (freq 9500)                                                      [0x7f63f9f86220] bci=[-1,36,129] rc=0 vc=0 vn=- li=- udi=- nc=0
n292n     istore  <temp slot 21>[#474  Auto] [flags 0x3 0x0 ]                                 [0x7f63f9f84b00] bci=[-1,36,129] rc=0 vc=0 vn=- li=- udi=- nc=1
n372n       iload  <temp slot 24>[#477  Auto] [flags 0x3 0x0 ]                                [0x7f63f9f86400] bci=[-1,36,129] rc=1 vc=0 vn=- li=- udi=- nc=0
...

n289n     BBStart <block_17> (freq 9500)                                                      [0x7f63f9f84a10] bci=[-1,37,129] rc=0 vc=0 vn=- li=- udi=- nc=0
n36n      ireturn                                                                             [0x7f63f9ee5260] bci=[-1,41,130] rc=0 vc=51 vn=- li=- udi=- nc=1
n298n       iload  <temp slot 21>[#474  Auto] [flags 0x3 0x0 ]                                [0x7f63f9f84ce0] bci=[-1,36,129] rc=1 vc=0 vn=- li=- udi=- nc=0
n2n       BBEnd </block_17> =====                                                             [0x7f63f9ee47c0] bci=[-1,41,130] rc=0 vc=50 vn=- li=- udi=- nc=0

itTT = itTT->getNextTreeTop();
}

if (trace())
{
traceMsg(comp(), "%s: n%dn %p current block_%d slowBlock block_%d newCallTree n%dn %p prevTT n%dn %p nextTT n%dn %p\n", __FUNCTION__, node->getGlobalIndex(), node,
Expand Down