Skip to content

Commit

Permalink
Merge pull request #122 from haoyu-zc/v3i-basic-arith-float
Browse files Browse the repository at this point in the history
[simd/v3i]: Implement float/double basic arithmetic instructions
  • Loading branch information
titzer authored Oct 17, 2023
2 parents f6ea019 + 60776c3 commit bc91710
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 1 deletion.
76 changes: 75 additions & 1 deletion src/engine/V3Eval.v3
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ component V3Eval {
return double.view(aa | bb);
}

// ---- v128 arithmetic -------------------------------------------------
// ---- v128 arithmetic ---------------------------------------------
def V128_NOT(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, (u64.max, u64.max), u64.^);
}
Expand Down Expand Up @@ -346,6 +346,56 @@ component V3Eval {
def I8X16_NEG(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x16((0, 0), a, u8.-);
}
def F32X4_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, F32_ADD_U);
}
def F32X4_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, F32_SUB_U);
}
def F32X4_MUL(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, F32_MUL_U);
}
def F32X4_DIV(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, F32_DIV_U);
}
def F32X4_NEG(a: (u64, u64)) -> (u64, u64) {
return do_v_v_x4(a, F32_NEG_U);
}
def F32X4_SQRT(a: (u64, u64)) -> (u64, u64) {
return do_v_v_x4(a, F32_SQRT_U);
}
def F64X2_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, F64_ADD_U);
}
def F64X2_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, F64_SUB_U);
}
def F64X2_MUL(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, F64_MUL_U);
}
def F64X2_DIV(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, F64_DIV_U);
}
def F64X2_NEG(a: (u64, u64)) -> (u64, u64) {
return do_v_v_x2(a, F64_NEG_U);
}
def F64X2_SQRT(a: (u64, u64)) -> (u64, u64) {
return do_v_v_x2(a, F64_SQRT_U);
}

// ---- v128 arithmetic helpers ----------------------------------------
private def F32_ADD_U = float_binop(_, _, float.+);
private def F32_SUB_U = float_binop(_, _, float.-);
private def F32_MUL_U = float_binop(_, _, float.*);
private def F32_DIV_U = float_binop(_, _, float./);
private def F32_NEG_U = float_unop(_, F32_NEG);
private def F32_SQRT_U = float_unop(_, float.sqrt);
private def F64_ADD_U = double_binop(_, _, double.+);
private def F64_SUB_U = double_binop(_, _, double.-);
private def F64_MUL_U = double_binop(_, _, double.*);
private def F64_DIV_U = double_binop(_, _, double./);
private def F64_NEG_U = double_unop(_, F64_NEG);
private def F64_SQRT_U = double_unop(_, double.sqrt);

// ---- rounding and conversion ----------------------------------------
def I32_WRAP_I64 = u32.view<u64>;
Expand Down Expand Up @@ -419,6 +469,30 @@ component V3Eval {
_ => return val;
}
}
private def float_binop(a: u32, b: u32, f: (float, float) -> float) -> u32 { // Adapts a floating point binop to a u32 binop
return u32.view(f(float.view(a), float.view(b)));
}
private def float_unop(a: u32, f: float -> float) -> u32 { // Adapts a floating point unop to a u32 unop
return u32.view(f(float.view(a)));
}
private def double_binop(a: u64, b: u64, f: (double, double) -> double) -> u64 { // Adapts a floating point binop to a u64 binop
return u64.view(f(double.view(a), double.view(b)));
}
private def double_unop(a: u64, f: double -> double) -> u64 { // Adapts a floating point unop to a u64 unop
return u64.view(f(double.view(a)));
}
private def do_v_v_x2(a: (u64, u64), f: (u64) -> u64) -> (u64, u64) { // Performs a 2-lane unop
var r0 = f(a.0);
var r1 = f(a.1);
return (r0, r1);
}
private def do_v_v_x4(a: (u64, u64), f: (u32) -> u32) -> (u64, u64) { // Performs a 4-lane unop
var r0 = f(u32.view(a.0));
var r1 = f(u32.view(a.0 >> 32));
var r2 = f(u32.view(a.1));
var r3 = f(u32.view(a.1 >> 32));
return ((u64.view(r1) << 32) | r0, (u64.view(r3) << 32) | r2);
}
private def do_vv_v_x2(a: (u64, u64), b: (u64, u64), f: (u64, u64) -> u64) -> (u64, u64) { // Performs a 2-lane binop
var r0 = f(a.0, b.0);
var r1 = f(a.1, b.1);
Expand Down
12 changes: 12 additions & 0 deletions src/engine/v3/V3Interpreter.v3
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,18 @@ component V3Interpreter {
I8X16_ADD => do_vv_v(V3Eval.I8X16_ADD);
I8X16_SUB => do_vv_v(V3Eval.I8X16_SUB);
I8X16_NEG => do_v_v(V3Eval.I8X16_NEG);
F32X4_ADD => do_vv_v(V3Eval.F32X4_ADD);
F32X4_SUB => do_vv_v(V3Eval.F32X4_SUB);
F32X4_MUL => do_vv_v(V3Eval.F32X4_MUL);
F32X4_DIV => do_vv_v(V3Eval.F32X4_DIV);
F32X4_NEG => do_v_v(V3Eval.F32X4_NEG);
F32X4_SQRT => do_v_v(V3Eval.F32X4_SQRT);
F64X2_ADD => do_vv_v(V3Eval.F64X2_ADD);
F64X2_SUB => do_vv_v(V3Eval.F64X2_SUB);
F64X2_MUL => do_vv_v(V3Eval.F64X2_MUL);
F64X2_DIV => do_vv_v(V3Eval.F64X2_DIV);
F64X2_SQRT => do_v_v(V3Eval.F64X2_SQRT);
F64X2_NEG => do_v_v(V3Eval.F64X2_NEG);
INVALID => trap(TrapReason.INVALID_OPCODE);
CRASH_EXEC => System.error("WizengError", "crash-exec opcode executed");
CRASH_COMPILER => System.error("WizengError", "crash-compiler opcode executed");
Expand Down

0 comments on commit bc91710

Please sign in to comment.