Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Whamm intrinsification baseline (wasm to wasm call) #222

Merged
merged 14 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/engine/Tuning.v3
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ component SpcTuning {
def postOsrTierUpThreshold = 1; // threshold after completing one OSR tierup
var intrinsifyCountProbe = true; // recognize and optimize CountProbes
var intrinsifyOperandProbe = true; // recognize and optimize OperandProbes
var intrinsifyWhammProbe = true; // recognize and optimize WhammProbes
def probeCallFreesRegs = true; // probe calls frees registers in abstract state
def runtimeCallFreesRegs = true; // runtime calls frees registers in abstract state
}
2 changes: 1 addition & 1 deletion src/engine/WasmStack.v3
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ enum StackState {
// and cached thereafter. It becomes obsolete after the frame is unwound, either because the function returned,
// or a trap or exception unwound the stack.
class FrameAccessor {
private var metaRef: FrameAccessorRef;
def var metaRef: FrameAccessorRef;

// Returns the Wasm function in this frame.
def func() -> WasmFunction;
Expand Down
3 changes: 3 additions & 0 deletions src/engine/compiler/MacroAssembler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) {
def curDataBytes() -> u64 {
return 0;
}
def printCodeBytes(sb: StringBuilder) {
}

// Label operations
def newLabel(create_pos: int) -> MasmLabel {
Expand Down Expand Up @@ -241,6 +243,7 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) {
def emit_store_curstack_vsp(vsp: Reg);
def emit_load_curstack_vsp(vsp: Reg);
def emit_call_runtime_Probe_instr();
def emit_call_runtime_materialize_frame_accessor();
def emit_increment_CountProbe(tmp: Reg, probe: CountProbe, increment: u64);
def emit_call_OperandProbe_i_v_fire(probe: OperandProbe_i_v, value_reg: Reg);

Expand Down
82 changes: 82 additions & 0 deletions src/engine/compiler/SinglePassCompiler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl
masm.emit_debugger_breakpoint();
return;
}
x: WhammProbe => {
emitWhammProbe(x);
return;
}
}
// spill everything
state.emitSaveAll(resolver, probeSpillMode);
Expand All @@ -368,6 +372,80 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl
emit_reload_regs();
if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver);
}

// saves the overhead of using a runtime call by directly invoking the wasm function associated with the whamm probe
def emitWhammProbe(probe: WhammProbe) {
// spill entire value stack.
state.emitSaveAll(resolver, probeSpillMode);
// set up args and push to frame slots.
var whamm_sig = probe.sig;
for (i < whamm_sig.length) {
var slot_addr = masm.slotAddr(state.sp + u32.view(i));
match(whamm_sig[i]) {
FrameAccessor => {
// check if we have a frame accessor already
var label_id = it.pc + it.func.orig_bytecode.length;
var cont_label = masm.newLabel(label_id);
masm.emit_mov_r_m(ValueKind.REF, regs.scratch, frame.accessor_slot);
masm.emit_br_r(regs.scratch, MasmBrCond.REF_NONNULL, cont_label);

// special case: requires runtime call to materialize FrameAccessor object
masm.emit_call_runtime_materialize_frame_accessor();
masm.emit_mov_r_m(ValueKind.REF, regs.scratch, frame.accessor_slot);

// move result to mem slot
masm.bindLabel(cont_label);
masm.emit_mov_m_m(ValueKind.REF, slot_addr, MasmAddr(regs.scratch, offsets.X86_64FrameAccessor_metaRef));
}
Val(val) => {
var is_v128 = false;
var low: u64, high: u64;
match (val) {
I31(v) => low = v;
I32(v) => low = v;
I64(v) => low = v;
F32(v) => low = v;
F64(v) => low = v;
V128(l, h) => {
low = l;
high = h;
is_v128 = true;
}
Ref(val) => low = u64.view(Pointer.atObject(val) - Pointer.NULL);
}
masm.emit_mov_m_d(slot_addr, low);
if (is_v128) {
masm.emit_mov_m_d(slot_addr.plus(8), high);
}
}
Operand(_, i) => {
masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(state.sp + u32.view(i) - 1));
}
Local(_, i) => {
masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(u32.view(i)));
}
}
}
var offsets = masm.getOffsets();
var whamm_instance = probe.func.instance;
var func_id = probe.func.decl.func_index;

var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp);
var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg);
var tmp = allocTmp(ValueKind.REF);

// Load the target code/entrypoint.
masm.emit_mov_r_l(func_reg, Pointer.atObject(whamm_instance.functions[func_id]) - Pointer.NULL);
masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(func_reg, offsets.WasmFunction_decl));
masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(tmp, offsets.FuncDecl_target_code));
// adjust vsp_reg to compute the "true" VSP, accounting for args to WhammProbe's WasmFunction
emit_compute_vsp(vsp_reg, state.sp + u32.view(whamm_sig.length));
// Call to the entrypoint.
masm.emit_call_r(tmp);
emit_reload_regs();
if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver);
}

def visit_CRASH_EXEC() {
masm.emit_intentional_crash();
}
Expand Down Expand Up @@ -540,6 +618,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl
state.popArgsAndPushResults(func.sig);
}
}

def emitMoveTailCallArgs(sig: SigDecl) {
var p = sig.params, count = u32.!(p.length);
var base = state.sp - count;
Expand Down Expand Up @@ -1457,6 +1536,9 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl
var pc = it.pc - start_pos;
instrTracer.instr_width = Opcodes.longestName + 1;
instrTracer.putPcAndInstr(OUT, module, func, pc, orig);
OUT.puts("JIT code: ");
masm.printCodeBytes(OUT);
OUT.ln();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this makes debugging easier, since we can see the exact assembly generated for each opcode

}
}
// Different branch instructions have different repush
Expand Down
2 changes: 2 additions & 0 deletions src/engine/x86-64/V3Offsets.v3
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class V3Offsets {
private def wf = WasmFunction.new(i, decl);
private def mem = X86_64Memory.new(null);
private def vs = X86_64Stack.new(2u * 4096u);
private def acc = X86_64FrameAccessor.new(vs, Pointer.NULL, decl);
private def ha = HeapArray.new(null, []);
private def cnt = CountProbe.new();

Expand Down Expand Up @@ -56,6 +57,7 @@ class V3Offsets {
def X86_64Stack_func = int.view(Pointer.atField(vs.func) - Pointer.atObject(vs));
def X86_64Stack_parent_rsp_ptr = int.view(Pointer.atField(vs.parent_rsp_ptr) - Pointer.atObject(vs));
def X86_64Stack_parent = int.view(Pointer.atField(vs.parent) - Pointer.atObject(vs));
def X86_64FrameAccessor_metaRef = int.view(Pointer.atField(acc.metaRef) - Pointer.atObject(acc));

def WasmFunction_typeId = Pointer.atObject(wf).load<int>();

Expand Down
13 changes: 12 additions & 1 deletion src/engine/x86-64/X86_64MacroAssembler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,19 @@ class X86_64MacroAssembler extends MacroAssembler {
var jump_tables: Vector<(int, Array<X86_64Label>)>;
var offsets: V3Offsets;
var trap_stubs: X86_64SpcTrapsStub;
var last_code_print_at = 0;

new(w, regConfig: RegConfig) super(Target.tagging, regConfig) {
scratch = G(regConfig.scratch);
}

def printCodeBytes(sb: StringBuilder) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds a little more local state; how about keeping track of the offset of the bytecode start in the SPC? Partially because we should consider saving that for use at runtime, e.g. to compute the PC at frame walking instead of storing it as is done now.

var w_offset = w.end();
while (last_code_print_at < w_offset) {
sb.put1("%x ", w.data[last_code_print_at]);
last_code_print_at++;
}
sb.ln();
}
def curCodeBytes() -> u64 {
return u64.!(w.end());
}
Expand Down Expand Up @@ -761,6 +769,9 @@ class X86_64MacroAssembler extends MacroAssembler {
def emit_call_runtime_Probe_instr() {
emit_call_runtime(RT.runtime_PROBE_instr);
}
def emit_call_runtime_materialize_frame_accessor() {
emit_call_runtime(RT.runtime_materialize_frame_accessor);
}
private def emit_call_runtime<P, R>(closure: P -> R) {
var ptr = CiRuntime.unpackClosure<X86_64Interpreter, P, R>(closure).0;
// Do an absolute call into the runtime
Expand Down
5 changes: 5 additions & 0 deletions src/engine/x86-64/X86_64Runtime.v3
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ component X86_64Runtime {
if (ret != null) return stack.throw(ret);
return ret;
}
def runtime_materialize_frame_accessor() {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about calling this runtime_getFrameAccessorMetaRef()and returning the metaref, since it's needed by the caller?

var rsp = CiRuntime.callerSp();
var frame = TargetFrame(rsp);
frame.getFrameAccessor().getMetaRef();
}
def runtime_TRAP(func: WasmFunction, pc: int, reason: TrapReason) -> Throwable {
var rsp = CiRuntime.callerSp();
var stack = curStack.setRsp(rsp);
Expand Down
7 changes: 7 additions & 0 deletions src/engine/x86-64/X86_64Target.v3
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ component Target {
if (Trace.compiler) {
Trace.OUT.put2("func[%d].target_code: break *0x%x", f.func_index, addr - Pointer.NULL)
.put2(" disass 0x%x, 0x%x", addr - Pointer.NULL, end - Pointer.NULL).ln();
var cur_byte = addr;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about guarding this behind a new trace flag --trace-asm?

Trace.OUT.puts("JIT code: ");
while (cur_byte < end) {
Trace.OUT.put1("%x ", cur_byte.load<u8>());
cur_byte++;
}
Trace.OUT.ln();
}
f.target_code = TargetCode(addr);
Debug.afterCompile(f, u64.view(addr - Pointer.NULL));
Expand Down
20 changes: 0 additions & 20 deletions src/monitors/WhammMonitor.v3
Original file line number Diff line number Diff line change
Expand Up @@ -131,23 +131,3 @@ class WhammMonitor(whamm: Module) extends Monitor {
return (instance, instRef);
}
}

// A probe that adapts a Wasm function to be called by the engine-internal probing mechanism.
class WhammProbe(func: WasmFunction, sig: Array<WhammArg>) extends Probe {
private def args = if(sig.length == 0, Values.NONE, Array<Value>.new(sig.length));

def fire(loc: DynamicLoc) -> Resumption {
for (i < sig.length) {
var v: Value;
match (sig[i]) {
FrameAccessor => v = Value.Ref(loc.frame.getFrameAccessor().getMetaRef());
Val(val) => v = val;
Operand(t, i) => v = loc.frame.getFrameAccessor().getOperand(i);
Local(t, i) => v = loc.frame.getFrameAccessor().getLocal(i);
}
args[i] = v;
}
Execute.call(func, args); // XXX: runs on a new stack
return Resumption.Continue;
}
}
20 changes: 20 additions & 0 deletions src/util/Whamm.v3
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,26 @@ component Whamm {
}
}

// A probe that adapts a Wasm function to be called by the engine-internal probing mechanism.
class WhammProbe(func: WasmFunction, sig: Array<WhammArg>) extends Probe {
private def args = if(sig.length == 0, Values.NONE, Array<Value>.new(sig.length));

def fire(loc: DynamicLoc) -> Resumption {
for (i < sig.length) {
var v: Value;
match (sig[i]) {
FrameAccessor => v = Value.Ref(loc.frame.getFrameAccessor().getMetaRef());
Val(val) => v = val;
Operand(t, i) => v = loc.frame.getFrameAccessor().getOperand(i);
Local(t, i) => v = loc.frame.getFrameAccessor().getLocal(i);
}
args[i] = v;
}
Execute.call(func, args); // XXX: runs on a new stack
return Resumption.Continue;
}
}

def parseParam0(r: TextReader) -> WhammParam {
var i = r.star_rel(0, isAlphaOrUnderscore);
var id = r.data[r.pos ... i];
Expand Down
Loading