Skip to content

Commit

Permalink
Merge pull request #121 from haoyu-zc/v3i-basic-arith
Browse files Browse the repository at this point in the history
[simd/v3i]: Implement v128 integer basic arithmetic instructions
  • Loading branch information
titzer authored Oct 17, 2023
2 parents e5e15ab + 9f04cf3 commit f6ea019
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 0 deletions.
88 changes: 88 additions & 0 deletions src/engine/V3Eval.v3
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,51 @@ component V3Eval {
var not_b = V128_NOT(b);
return V128_AND(a, not_b);
}
def I64X2_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, u64.+);
}
def I64X2_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, u64.-);
}
def I64X2_MUL(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2(a, b, u64.*);
}
def I64X2_NEG(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x2((0, 0), a, u64.-);
}
def I32X4_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, u32.+);
}
def I32X4_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, u32.-);
}
def I32X4_MUL(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4(a, b, u32.*);
}
def I32X4_NEG(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x4((0, 0), a, u32.-);
}
def I16X8_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x8(a, b, u16.+);
}
def I16X8_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x8(a, b, u16.-);
}
def I16X8_MUL(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x8(a, b, u16.*);
}
def I16X8_NEG(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x8((0, 0), a, u16.-);
}
def I8X16_ADD(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x16(a, b, u8.+);
}
def I8X16_SUB(a: (u64, u64), b: (u64, u64)) -> (u64, u64) {
return do_vv_v_x16(a, b, u8.-);
}
def I8X16_NEG(a: (u64, u64)) -> (u64, u64) {
return do_vv_v_x16((0, 0), a, u8.-);
}

// ---- rounding and conversion ----------------------------------------
def I32_WRAP_I64 = u32.view<u64>;
Expand Down Expand Up @@ -379,6 +424,49 @@ component V3Eval {
var r1 = f(a.1, b.1);
return (r0, r1);
}
private def do_vv_v_x4(a: (u64, u64), b: (u64, u64), f: (u32, u32) -> u32) -> (u64, u64) { // Performs a 4-lane binop
var r0 = f(u32.view(a.0), u32.view(b.0));
var r1 = f(u32.view(a.0 >> 32), u32.view(b.0 >> 32));
var r2 = f(u32.view(a.1), u32.view(b.1));
var r3 = f(u32.view(a.1 >> 32), u32.view(b.1 >> 32));
return ((u64.view(r1) << 32) | r0, (u64.view(r3) << 32) | r2);
}
private def do_vv_v_x8(a: (u64, u64), b: (u64, u64), f: (u16, u16) -> u16) -> (u64, u64) { // Performs an 8-lane binop
var low: u64 = 0;
var high: u64 = 0;

for (shift: byte = 0; shift < 64; shift += 16) {
var r_a = u16.view((a.0 >> shift) & 0xFFFF);
var r_b = u16.view((b.0 >> shift) & 0xFFFF);
var res = f(r_a, r_b);
low |= (u64.view(res) << shift);

r_a = u16.view((a.1 >> shift) & 0xFFFF);
r_b = u16.view((b.1 >> shift) & 0xFFFF);
res = f(r_a, r_b);
high |= (u64.view(res) << shift);
}

return (low, high);
}
private def do_vv_v_x16(a: (u64, u64), b: (u64, u64), f: (u8, u8) -> u8) -> (u64, u64) { // Performs a 16-lane binop
var low: u64 = 0;
var high: u64 = 0;

for (shift: byte = 0; shift < 64; shift += 8) {
var r_a = u8.view((a.0 >> shift) & 0xFF);
var r_b = u8.view((b.0 >> shift) & 0xFF);
var res = f(r_a, r_b);
low |= (u64.view(res) << shift);

r_a = u8.view((a.1 >> shift) & 0xFF);
r_b = u8.view((b.1 >> shift) & 0xFF);
res = f(r_a, r_b);
high |= (u64.view(res) << shift);
}

return (low, high);
}
private def canonf(a: float) -> float {
return if(a == a, a, float.nan);
}
Expand Down
15 changes: 15 additions & 0 deletions src/engine/v3/V3Interpreter.v3
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,21 @@ component V3Interpreter {
V128_XOR => do_vv_v(V3Eval.V128_XOR);
V128_BITSELECT => do_vvv_v(V3Eval.V128_BITSELECT);
V128_ANDNOT => do_vv_v(V3Eval.V128_ANDNOT);
I64X2_ADD => do_vv_v(V3Eval.I64X2_ADD);
I64X2_SUB => do_vv_v(V3Eval.I64X2_SUB);
I64X2_MUL => do_vv_v(V3Eval.I64X2_MUL);
I64X2_NEG => do_v_v(V3Eval.I64X2_NEG);
I32X4_ADD => do_vv_v(V3Eval.I32X4_ADD);
I32X4_SUB => do_vv_v(V3Eval.I32X4_SUB);
I32X4_MUL => do_vv_v(V3Eval.I32X4_MUL);
I32X4_NEG => do_v_v(V3Eval.I32X4_NEG);
I16X8_ADD => do_vv_v(V3Eval.I16X8_ADD);
I16X8_SUB => do_vv_v(V3Eval.I16X8_SUB);
I16X8_MUL => do_vv_v(V3Eval.I16X8_MUL);
I16X8_NEG => do_v_v(V3Eval.I16X8_NEG);
I8X16_ADD => do_vv_v(V3Eval.I8X16_ADD);
I8X16_SUB => do_vv_v(V3Eval.I8X16_SUB);
I8X16_NEG => do_v_v(V3Eval.I8X16_NEG);
INVALID => trap(TrapReason.INVALID_OPCODE);
CRASH_EXEC => System.error("WizengError", "crash-exec opcode executed");
CRASH_COMPILER => System.error("WizengError", "crash-compiler opcode executed");
Expand Down

0 comments on commit f6ea019

Please sign in to comment.