diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index 2a1a0c7f..1855cbf3 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -2410,12 +2410,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { ]) { bindHandler(t.0); load_v128_xmm0_tmp0(); - var width = byte.view(t.2); - var mask = (1 << width) - 1; - asm.movq_r_r(r_tmp1, r_tmp0); - asm.and_r_i(r_tmp1, mask); - asm.movq_s_r(r_xmm1, r_tmp1); - t.1(r_xmm0, r_xmm1); + masm.emit_v128_shift(r_xmm0, r_tmp0, byte.view(t.2), r_tmp1, r_xmm1, t.1); asm.movdqu_m_s(vsph[-2].value, r_xmm0); decrementVsp(); endHandler(); diff --git a/src/engine/x86-64/X86_64MacroAssembler.v3 b/src/engine/x86-64/X86_64MacroAssembler.v3 index ea03ec4f..647ab3c7 100644 --- a/src/engine/x86-64/X86_64MacroAssembler.v3 +++ b/src/engine/x86-64/X86_64MacroAssembler.v3 @@ -981,6 +981,14 @@ class X86_64MacroAssembler extends MacroAssembler { asm.psrlw_s_s(dst, tmp3); asm.packuswb_s_s(dst, tmp2); } + def emit_v128_shift(dst: X86_64Xmmr, shift: X86_64Gpr, width: byte, gtmp: X86_64Gpr, xtmp: X86_64Xmmr, + asm_pshfit_s_s: (X86_64Xmmr, X86_64Xmmr) -> T) { + var mask = (1 << width) - 1; + asm.movq_r_r(gtmp, shift); + asm.and_r_i(gtmp, mask); + asm.movq_s_r(xtmp, gtmp); + asm_pshfit_s_s(dst, xtmp); + } def emit_i64x2_abs(dst: X86_64Xmmr, scratch: X86_64Xmmr) { asm.movshdup_s_s(scratch, dst); asm.psrad_i(scratch, 31);