diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index e2c9c1ea..239e041b 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -2600,22 +2600,16 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movdqu_m_s(vsph[-1].value, r_xmm0); endHandler(); } - for (t in [ - (Opcode.F32X4_CEIL, X86_64Rounding.TO_POS_INF, asm.roundps_s_m), - (Opcode.F32X4_FLOOR, X86_64Rounding.TO_NEG_INF, asm.roundps_s_m), - (Opcode.F32X4_TRUNC, X86_64Rounding.TO_ZERO, asm.roundps_s_m), - (Opcode.F32X4_NEAREST, X86_64Rounding.TO_NEAREST, asm.roundps_s_m), - - (Opcode.F64X2_CEIL, X86_64Rounding.TO_POS_INF, asm.roundpd_s_m), - (Opcode.F64X2_FLOOR, X86_64Rounding.TO_NEG_INF, asm.roundpd_s_m), - (Opcode.F64X2_TRUNC, X86_64Rounding.TO_ZERO, asm.roundpd_s_m), - (Opcode.F64X2_NEAREST, X86_64Rounding.TO_NEAREST, asm.roundpd_s_m) - ]) { - bindHandler(t.0); - t.2(r_xmm0, vsph[-1].value, t.1); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); - endHandler(); - } + + genSimdUnop(Opcode.F32X4_CEIL, asm.roundps_s_s(_, _, X86_64Rounding.TO_POS_INF)); + genSimdUnop(Opcode.F32X4_FLOOR, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEG_INF)); + genSimdUnop(Opcode.F32X4_TRUNC, asm.roundps_s_s(_, _, X86_64Rounding.TO_ZERO)); + genSimdUnop(Opcode.F32X4_NEAREST, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEAREST)); + genSimdUnop(Opcode.F64X2_CEIL, asm.roundpd_s_s(_, _, X86_64Rounding.TO_POS_INF)); + genSimdUnop(Opcode.F64X2_FLOOR, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEG_INF)); + genSimdUnop(Opcode.F64X2_TRUNC, asm.roundpd_s_s(_, _, X86_64Rounding.TO_ZERO)); + genSimdUnop(Opcode.F64X2_NEAREST, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEAREST)); + // Unary operations that need masks for (t in [ (Opcode.F32X4_NEG, masm.emit_v128_negps), @@ -2792,6 +2786,13 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { decrementVsp(); endHandler(); } + def genSimdUnop(opcode: Opcode, f: (X86_64Xmmr, X86_64Xmmr) -> T) { + bindHandler(opcode); + asm.movdqu_s_m(r_xmm0, vsph[-1].value); + f(r_xmm0, r_xmm0); + asm.movdqu_m_s(vsph[-1].value, r_xmm0); + endHandler(); + } def bindHandler(opcode: Opcode) { if (FastIntTuning.handlerAlignment > 1) w.align(FastIntTuning.handlerAlignment); var pos = w.atEnd().pos; diff --git a/src/engine/x86-64/X86_64SinglePassCompiler.v3 b/src/engine/x86-64/X86_64SinglePassCompiler.v3 index 57837581..86364cbd 100644 --- a/src/engine/x86-64/X86_64SinglePassCompiler.v3 +++ b/src/engine/x86-64/X86_64SinglePassCompiler.v3 @@ -578,6 +578,10 @@ class X86_64SinglePassCompiler extends SinglePassCompiler { def visit_F32X4_MIN() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f32x4_min); } def visit_F32X4_MAX() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f32x4_max); } def visit_F32X4_ABS() { do_op1_x_gtmp_xtmp(ValueKind.V128, mmasm.emit_v128_absps); } + def visit_F32X4_CEIL() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_POS_INF)); } + def visit_F32X4_FLOOR() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEG_INF)); } + def visit_F32X4_TRUNC() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_ZERO)); } + def visit_F32X4_NEAREST() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEAREST)); } def visit_F64X2_ADD() { do_op2_x_x(ValueKind.V128, asm.addpd_s_s); } def visit_F64X2_SUB() { do_op2_x_x(ValueKind.V128, asm.subpd_s_s); } @@ -596,7 +600,10 @@ class X86_64SinglePassCompiler extends SinglePassCompiler { def visit_F64X2_MIN() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f64x2_min); } def visit_F64X2_MAX() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f64x2_max); } def visit_F64X2_ABS() { do_op1_x_gtmp_xtmp(ValueKind.V128, mmasm.emit_v128_abspd); } - + def visit_F64X2_CEIL() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_POS_INF)); } + def visit_F64X2_FLOOR() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEG_INF)); } + def visit_F64X2_TRUNC() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_ZERO)); } + def visit_F64X2_NEAREST() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEAREST)); } def visit_V128_BITSELECT() { var c = popReg();