Skip to content

Commit

Permalink
[simd/jit]: Implement floating point rounding instructions (#106 from…
Browse files Browse the repository at this point in the history
… haoyu-zc/jit-f32x4-rounding)
  • Loading branch information
titzer authored Aug 5, 2023
2 parents d941a11 + e9cc14b commit 35c6c1c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 17 deletions.
33 changes: 17 additions & 16 deletions src/engine/x86-64/X86_64Interpreter.v3
Original file line number Diff line number Diff line change
Expand Up @@ -2600,22 +2600,16 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) {
asm.movdqu_m_s(vsph[-1].value, r_xmm0);
endHandler();
}
for (t in [
(Opcode.F32X4_CEIL, X86_64Rounding.TO_POS_INF, asm.roundps_s_m),
(Opcode.F32X4_FLOOR, X86_64Rounding.TO_NEG_INF, asm.roundps_s_m),
(Opcode.F32X4_TRUNC, X86_64Rounding.TO_ZERO, asm.roundps_s_m),
(Opcode.F32X4_NEAREST, X86_64Rounding.TO_NEAREST, asm.roundps_s_m),

(Opcode.F64X2_CEIL, X86_64Rounding.TO_POS_INF, asm.roundpd_s_m),
(Opcode.F64X2_FLOOR, X86_64Rounding.TO_NEG_INF, asm.roundpd_s_m),
(Opcode.F64X2_TRUNC, X86_64Rounding.TO_ZERO, asm.roundpd_s_m),
(Opcode.F64X2_NEAREST, X86_64Rounding.TO_NEAREST, asm.roundpd_s_m)
]) {
bindHandler(t.0);
t.2(r_xmm0, vsph[-1].value, t.1);
asm.movdqu_m_s(vsph[-1].value, r_xmm0);
endHandler();
}

genSimdUnop(Opcode.F32X4_CEIL, asm.roundps_s_s(_, _, X86_64Rounding.TO_POS_INF));
genSimdUnop(Opcode.F32X4_FLOOR, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEG_INF));
genSimdUnop(Opcode.F32X4_TRUNC, asm.roundps_s_s(_, _, X86_64Rounding.TO_ZERO));
genSimdUnop(Opcode.F32X4_NEAREST, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEAREST));
genSimdUnop(Opcode.F64X2_CEIL, asm.roundpd_s_s(_, _, X86_64Rounding.TO_POS_INF));
genSimdUnop(Opcode.F64X2_FLOOR, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEG_INF));
genSimdUnop(Opcode.F64X2_TRUNC, asm.roundpd_s_s(_, _, X86_64Rounding.TO_ZERO));
genSimdUnop(Opcode.F64X2_NEAREST, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEAREST));

// Unary operations that need masks
for (t in [
(Opcode.F32X4_NEG, masm.emit_v128_negps),
Expand Down Expand Up @@ -2792,6 +2786,13 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) {
decrementVsp();
endHandler();
}
def genSimdUnop<T>(opcode: Opcode, f: (X86_64Xmmr, X86_64Xmmr) -> T) {
bindHandler(opcode);
asm.movdqu_s_m(r_xmm0, vsph[-1].value);
f(r_xmm0, r_xmm0);
asm.movdqu_m_s(vsph[-1].value, r_xmm0);
endHandler();
}
def bindHandler(opcode: Opcode) {
if (FastIntTuning.handlerAlignment > 1) w.align(FastIntTuning.handlerAlignment);
var pos = w.atEnd().pos;
Expand Down
9 changes: 8 additions & 1 deletion src/engine/x86-64/X86_64SinglePassCompiler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,10 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
def visit_F32X4_MIN() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f32x4_min); }
def visit_F32X4_MAX() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f32x4_max); }
def visit_F32X4_ABS() { do_op1_x_gtmp_xtmp(ValueKind.V128, mmasm.emit_v128_absps); }
def visit_F32X4_CEIL() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_POS_INF)); }
def visit_F32X4_FLOOR() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEG_INF)); }
def visit_F32X4_TRUNC() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_ZERO)); }
def visit_F32X4_NEAREST() { do_op1_x_x(ValueKind.V128, asm.roundps_s_s(_, _, X86_64Rounding.TO_NEAREST)); }

def visit_F64X2_ADD() { do_op2_x_x(ValueKind.V128, asm.addpd_s_s); }
def visit_F64X2_SUB() { do_op2_x_x(ValueKind.V128, asm.subpd_s_s); }
Expand All @@ -596,7 +600,10 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
def visit_F64X2_MIN() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f64x2_min); }
def visit_F64X2_MAX() { do_op2_x_x_xtmp(ValueKind.V128, mmasm.emit_f64x2_max); }
def visit_F64X2_ABS() { do_op1_x_gtmp_xtmp(ValueKind.V128, mmasm.emit_v128_abspd); }

def visit_F64X2_CEIL() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_POS_INF)); }
def visit_F64X2_FLOOR() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEG_INF)); }
def visit_F64X2_TRUNC() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_ZERO)); }
def visit_F64X2_NEAREST() { do_op1_x_x(ValueKind.V128, asm.roundpd_s_s(_, _, X86_64Rounding.TO_NEAREST)); }

def visit_V128_BITSELECT() {
var c = popReg();
Expand Down

0 comments on commit 35c6c1c

Please sign in to comment.