Skip to content

Commit

Permalink
[simd/jit]: Implement v128_load_extend instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
haoyu-zc committed Aug 13, 2023
1 parent a612471 commit 49bb085
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 6 deletions.
9 changes: 6 additions & 3 deletions src/engine/x86-64/X86_64MacroAssembler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,13 @@ class X86_64MacroAssembler extends MacroAssembler {
ABS, V128 => asm.movdqu_s_m(X(dst), X86_64Addr.new(b, t.0, 1, t.1));
}
}
def emit_v128_load_lane_r_m<T>(dst: Reg, base: Reg, index: Reg, offset: u32, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T) {
var b = G(base), t = handle_large_offset(index, offset);
def emit_v128_load_lane_r_m<T>(dst: Reg, addr: X86_64Addr, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T) {
recordCurSourceLoc();
asm_mov_r_m(G(dst), X86_64Addr.new(b, t.0, 1, t.1));
asm_mov_r_m(G(dst), addr);
}
def decode_memarg_addr(base: Reg, index: Reg, offset: u32) -> X86_64Addr {
var t = handle_large_offset(index, offset);
return X86_64Addr.new(G(base), t.0, 1, t.1);
}
def emit_storeb_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32) {
var t = handle_large_offset(index, offset);
Expand Down
30 changes: 27 additions & 3 deletions src/engine/x86-64/X86_64SinglePassCompiler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,12 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
def visit_V128_LOAD_16_LANE(imm: MemArg, lane: byte) { visit_V128_LOAD_LANE(imm, lane, loadMemarg_w, asm.pinsrw_s_r_i); }
def visit_V128_LOAD_32_LANE(imm: MemArg, lane: byte) { visit_V128_LOAD_LANE(imm, lane, loadMemarg_d, asm.pinsrd_s_r_i); }
def visit_V128_LOAD_64_LANE(imm: MemArg, lane: byte) { visit_V128_LOAD_LANE(imm, lane, loadMemarg_q, asm.pinsrq_s_r_i); }
def visit_V128_LOAD_8X8_S(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovsxbw_s_m); }
def visit_V128_LOAD_8X8_U(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovzxbw_s_m); }
def visit_V128_LOAD_16X4_S(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovsxwd_s_m); }
def visit_V128_LOAD_16X4_U(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovzxwd_s_m); }
def visit_V128_LOAD_32X2_S(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovsxdq_s_m); }
def visit_V128_LOAD_32X2_U(imm: MemArg) { visit_V128_LOAD_EXTEND(imm, asm.pmovzxdq_s_m); }
def visit_V128_LOAD_32_ZERO(imm: MemArg) { visit_V128_LOAD_ZERO(imm, loadMemarg_d, asm.pinsrd_s_r_i); }
def visit_V128_LOAD_64_ZERO(imm: MemArg) { visit_V128_LOAD_ZERO(imm, loadMemarg_q, asm.pinsrq_s_r_i); }
def visit_V128_LOAD_8_SPLAT(imm: MemArg) { visit_V128_LOAD_SPLAT(imm, loadMemarg_b, mmasm.emit_i8x16_splat(_, _, X(allocTmp(ValueKind.V128)))); }
Expand Down Expand Up @@ -732,7 +738,8 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
state.push(a.kindFlagsMatching(ValueKind.V128, IN_REG), a.reg, 0);
}

private def loadMemarg<T>(dst: Reg, imm: MemArg, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T) {
// Decode memarg and return the mem address and trap reason if any
private def decodeMemarg(imm: MemArg) -> (X86_64Addr, TrapReason) {
var base_reg = regs.mem0_base;
if (imm.memory_index != 0) {
// XXX: cache the base register for memories > 0
Expand All @@ -748,12 +755,19 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
var offset = imm.offset;
if (iv.isConst()) {
var sum = u64.view(offset) + u32.view(iv.const); // fold offset calculation
if (sum > u32.max) return emitTrap(TrapReason.MEM_OUT_OF_BOUNDS); // statically OOB
if (sum > u32.max) return (null, TrapReason.MEM_OUT_OF_BOUNDS);
offset = u32.view(sum);
} else {
index_reg = ensureReg(iv, state.sp);
}
mmasm.emit_v128_load_lane_r_m(dst, base_reg, index_reg, u32.!(offset), asm_mov_r_m);
return (mmasm.decode_memarg_addr(base_reg, index_reg, u32.!(offset)), TrapReason.NONE);
}
// Utilities to load a memarg into a register
private def loadMemarg<T>(dst: Reg, imm: MemArg, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T) {
def t = decodeMemarg(imm);
if (t.1 != TrapReason.NONE) return emitTrap(t.1);
def addr = t.0;
mmasm.emit_v128_load_lane_r_m(dst, addr, asm_mov_r_m);
}
private def loadMemarg_b(dst: Reg, imm: MemArg) { loadMemarg(dst, imm, asm.q.movb_r_m); }
private def loadMemarg_w(dst: Reg, imm: MemArg) { loadMemarg(dst, imm, asm.q.movw_r_m); }
Expand Down Expand Up @@ -790,6 +804,16 @@ class X86_64SinglePassCompiler extends SinglePassCompiler {
state.push(SpcConsts.kindToFlags(kind) | IN_REG, d, 0);
}

private def visit_V128_LOAD_EXTEND<T>(imm: MemArg, asm_pmov_s_m: (X86_64Xmmr, X86_64Addr) -> T) {
var d = allocRegTos(ValueKind.V128);
var val = allocTmp(ValueKind.I64);
def t = decodeMemarg(imm);
if (t.1 != TrapReason.NONE) return emitTrap(t.1);
def addr = t.0;
asm_pmov_s_m(X(d), addr);
state.push(KIND_V128 | IN_REG, d, 0);
}

private def visit_V128_LOAD_ZERO<T>(imm: MemArg, loadMem: (Reg, MemArg) -> void, asm_pins_s_r_i: (X86_64Xmmr, X86_64Gpr, byte) -> T) {
var val = allocTmp(ValueKind.I64);
var d = allocRegTos(ValueKind.V128);
Expand Down

0 comments on commit 49bb085

Please sign in to comment.