From 1ebd8d7ce2a9c13461f2b0db570b83f3e1359bcf Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 15 Aug 2018 12:11:00 -0700 Subject: [PATCH] Optimize OMRCAS8Helper This patch reduces the number of instructions used to implement OMRCAS8Helper by reducing register shuffling and removing the unecessary stdcx that is performed when the comparison fails. Signed-off-by: Younes Manton --- util/omrutil/unix/aix/32/cas8help.s | 29 ++++++++++------------ util/omrutil/unix/linux/ppc/32/cas8help.s | 30 +++++++++-------------- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/util/omrutil/unix/aix/32/cas8help.s b/util/omrutil/unix/aix/32/cas8help.s index 3267f92ac74..dfcfa96b8a7 100644 --- a/util/omrutil/unix/aix/32/cas8help.s +++ b/util/omrutil/unix/aix/32/cas8help.s @@ -53,6 +53,7 @@ .set r29,29 .set r30,30 .set r31,31 + .set cr0,0 .toc TOC.static: .tc .static[tc],_static[ro] .csect _static[ro] @@ -80,23 +81,19 @@ TOC.OMRCAS8Helper: .tc .OMRCAS8Helper[tc],OMRCAS8Helper[ds] # # r3 = high part of read value # r4 = low part of read value - ori r12, r3, 0 - ori r8, r4, 0 + .machine "push" + .machine "ppc64" + rldimi r4, r5, 32, 0 + rldimi r6, r7, 32, 0 loop: - .long 0x7d2060a8 # ldarx r9, 0, r12 - .long 0x79230022 # srdi r3, r9, 32 - ori r4, r9, 0 - ori r10, r8, 0 - ori r11, r6, 0 - .long 0x78aa000e # rldimi r10, r5, 32, 0 - .long 0x78eb000e # rldimi r11, r7, 32, 0 - .long 0x7c295040 # cmpl 0, 1, r9, r10 - bne fail - .long 0x7d6061ad # stdcx. r11, 0, r12 - bne loop - blr + ldarx r8, 0, r3 + cmpld cr0, r8, r4 + bne- fail + stdcx. r6, 0, r3 + bne- loop fail: - .long 0x7d2061ad # stdcx. r9, 0, r12 - bne loop + mr r4, r8 + srdi r3, r8, 32 blr + .machine "pop" endproc.OMRCAS8Helper: diff --git a/util/omrutil/unix/linux/ppc/32/cas8help.s b/util/omrutil/unix/linux/ppc/32/cas8help.s index 5633a758a83..f376b05cf41 100644 --- a/util/omrutil/unix/linux/ppc/32/cas8help.s +++ b/util/omrutil/unix/linux/ppc/32/cas8help.s @@ -52,6 +52,7 @@ .set r29,29 .set r30,30 .set r31,31 + .set cr0,0 .section ".rodata" .global OMRCAS8Helper .type OMRCAS8Helper@function @@ -70,22 +71,15 @@ OMRCAS8Helper: # # r3 = high part of read value # r4 = low part of read value - ori r12, r3, 0 - ori r8, r4, 0 -loop: - ldarx r9, 0, r12 - srdi r3, r9, 32 - ori r4, r9, 0 - ori r10, r8, 0 - ori r11, r6, 0 - rldimi r10, r5, 32, 0 - rldimi r11, r7, 32, 0 - cmpl cr0, 1, r9, r10 - bne fail - stdcx. r11, 0, r12 - bne loop - blr -fail: - stdcx. r9, 0, r12 - bne loop + rldimi r4, r5, 32, 0 + rldimi r6, r7, 32, 0 +0: + ldarx r8, 0, r3 + cmpld cr0, r8, r4 + bne- 1f + stdcx. r6, 0, r3 + bne- 0b +1: + mr r4, r8 + srdi r3, r8, 32 blr