From 0343305e1b5a748479cadd16584a981ed401fac2 Mon Sep 17 00:00:00 2001 From: Enrico Deiana Date: Tue, 9 Apr 2024 21:35:58 -0700 Subject: [PATCH] i#6662 public traces, part 1: synthetic ISA (#6691) A synthetic ISA that has the purpose of preserving register dependencies and giving hints on the type of operation an instruction performs. This PR implements the encoding/decoding functionalities for this new ISA, which we call #DR_ISA_REGDEPS. Note that being a synthetic ISA, some routines that work on instructions coming from an actual ISA (such as #DR_ISA_AMD64) are not supported (e.g., decode_sizeof()). Currently we support: - instr_convert_to_isa_regdeps(): to convert an #instr_t of an actual ISA to a #DR_ISA_REGDEPS #instr_t. - instr_encode() and instr_encode_to_copy(): to encode a #DR_ISA_REGDEPS #instr_t into a sequence of contiguous bytes. - decode() and decode_from_copy(): to decode an encoded #DR_ISA_REGDEPS instruction into an #instr_t. A #DR_ISA_REGDEPS #instr_t contains the following information: - categories: composed by #dr_instr_category_t values, they indicate the type of operation performed (e.g., a load, a store, a floating point math operation, a branch, etc.). Note that categories are composable, hence more than one category can be set. This information can be obtained using instr_get_category(). - arithmetic flags: we don't distinguish between different flags, we only report if at least one arithmetic flag was read (all arithmetic flags will be set to read) and/or written (all arithmetic flags will be set to written). This information can be obtained using instr_get_arith_flags(). - number of source and destination operands: we only consider register operands. This information can be obtained using instr_num_srcs() and instr_num_dsts(). - source operation size: is the largest source operand the instruction operates on. This information can be obtained by accessing the #instr_t operation_size field. - list of register operand identifiers: they are contained in #opnd_t lists, separated in source and destination. Note that these #reg_id_t identifiers are virtual and it should not be assumed that they belong to any DR_REG_ enum value of any specific architecture. These identifiers are meant for tracking register dependencies with respect to other #DR_ISA_REGDEPS instructions only. These lists can be obtained by walking the #instr_t operands with instr_get_dst() and instr_get_src(). - ISA mode: is always #DR_ISA_REGDEPS. This information can be obtained using instr_get_isa_mode(). - encoding bytes: an array of bytes containing the #DR_ISA_REGDEPS #instr_t encoding. Note that this information is present only for decoded instructions (i.e., #instr_t generated by decode() or decode_from_copy()). This information can be obtained using instr_get_raw_bits(). - length: the length of the encoded instruction in bytes. Note that this information is present only for decoded instructions (i.e., #instr_t generated by decode() or decode_from_copy()). This information can be obtained by accessing the #instr_t length field. Note that all routines that operate on #instr_t and #opnd_t are also supported for #DR_ISA_REGDEPS instructions. However, querying information outside of those described above (e.g., the instruction opcode with instr_get_opcode()) will return the zeroed value set by instr_create() or instr_init() when the #instr_t was created (e.g., instr_get_opcode() would return OP_INVALID). --- api/docs/release.dox | 4 + core/CMakeLists.txt | 2 + core/ir/aarch64/codec.c | 10 + core/ir/aarch64/decode.c | 3 +- core/ir/aarch64/instr.c | 4 +- core/ir/arm/decode.c | 12 +- core/ir/arm/instr.c | 2 +- core/ir/encode_api.h | 88 ++++++- core/ir/encode_shared.c | 29 ++- core/ir/instr_api.h | 28 +- core/ir/instr_shared.c | 137 ++++++++++ core/ir/isa_regdeps/decode.c | 141 ++++++++++ core/ir/isa_regdeps/decode.h | 41 +++ core/ir/isa_regdeps/encode.c | 143 +++++++++++ core/ir/isa_regdeps/encode.h | 41 +++ core/ir/isa_regdeps/encoding_common.h | 130 ++++++++++ core/ir/riscv64/codec.c | 10 + core/ir/riscv64/decode.c | 3 +- core/ir/riscv64/instr.c | 4 +- core/ir/x86/decode.c | 14 +- core/ir/x86/instr.c | 4 +- suite/tests/CMakeLists.txt | 3 + suite/tests/api/ir_regdeps.c | 356 ++++++++++++++++++++++++++ suite/tests/api/ir_regdeps.expect | 1 + 24 files changed, 1184 insertions(+), 26 deletions(-) create mode 100644 core/ir/isa_regdeps/decode.c create mode 100644 core/ir/isa_regdeps/decode.h create mode 100644 core/ir/isa_regdeps/encode.c create mode 100644 core/ir/isa_regdeps/encode.h create mode 100644 core/ir/isa_regdeps/encoding_common.h create mode 100644 suite/tests/api/ir_regdeps.c create mode 100644 suite/tests/api/ir_regdeps.expect diff --git a/api/docs/release.dox b/api/docs/release.dox index cc044060dbd..2321dab8a4e 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -219,6 +219,10 @@ Further non-compatibility-affecting changes include: is set to true by default to match the existing behavior of the invariant checker. - Added a new instr API instr_is_xrstor() that tells whether an instruction is any variant of the x86 xrstor opcode. + - Added a new #dr_isa_mode_t: #DR_ISA_REGDEPS, which is a synthetic ISA with the main + purpose of preserving register dependencies. + - Added instr_convert_to_isa_regdeps() API that converts an #instr_t from a real ISA + (e.g., #DR_ISA_AMD64) to the #DR_ISA_REGDEPS synthetic ISA. ************************************************** diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 3c1fd304e47..7146c53ca6e 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -279,6 +279,8 @@ set(DECODER_SRCS ir/${ARCH_NAME}/decode.c ir/encode_shared.c ir/${ARCH_NAME}/encode.c + ir/isa_regdeps/encode.c + ir/isa_regdeps/decode.c ir/disassemble_shared.c ir/${ARCH_NAME}/disassemble.c ir/ir_utils_shared.c diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index ffe676cc09a..e1364d6555f 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -40,9 +40,11 @@ #include #include "../globals.h" +#include "../isa_regdeps/decode.h" #include "arch.h" #include "decode.h" #include "disassemble.h" +#include "encode_api.h" #include "instr.h" #include "instr_create_shared.h" @@ -9721,6 +9723,14 @@ decode_category(uint encoding, instr_t *instr) byte * decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr) { + /* #DR_ISA_REGDEPS synthetic ISA has its own decoder. + * XXX i#1684: when DR can be built with full dynamic architecture selection we won't + * need to pollute the decoding of other architectures with this synthetic ISA special + * case. + */ + if (dr_get_isa_mode(dcontext) == DR_ISA_REGDEPS) + return decode_isa_regdeps(dcontext, pc, instr); + byte *next_pc = pc + 4; uint enc = *(uint *)pc; uint eflags = 0; diff --git a/core/ir/aarch64/decode.c b/core/ir/aarch64/decode.c index e056a7ef9d3..9e23c11c7d7 100644 --- a/core/ir/aarch64/decode.c +++ b/core/ir/aarch64/decode.c @@ -32,6 +32,7 @@ */ #include "../globals.h" +#include "encode_api.h" #include "instr.h" #include "decode.h" #include "decode_fast.h" /* ensure we export decode_next_pc, decode_sizeof */ @@ -41,7 +42,7 @@ bool is_isa_mode_legal(dr_isa_mode_t mode) { - return (mode == DR_ISA_ARM_A64); + return (mode == DR_ISA_ARM_A64 || mode == DR_ISA_REGDEPS); } app_pc diff --git a/core/ir/aarch64/instr.c b/core/ir/aarch64/instr.c index 80e6917fa65..0d02a8d0642 100644 --- a/core/ir/aarch64/instr.c +++ b/core/ir/aarch64/instr.c @@ -47,9 +47,9 @@ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - if (mode != DR_ISA_ARM_A64) + if (mode != DR_ISA_ARM_A64 && mode != DR_ISA_REGDEPS) return false; - instr->isa_mode = DR_ISA_ARM_A64; + instr->isa_mode = mode; return true; } diff --git a/core/ir/arm/decode.c b/core/ir/arm/decode.c index 8fbf85eed83..46de0943af9 100644 --- a/core/ir/arm/decode.c +++ b/core/ir/arm/decode.c @@ -31,6 +31,8 @@ */ #include "../globals.h" +#include "../isa_regdeps/decode.h" +#include "encode_api.h" #include "instr.h" #include "decode.h" #include "decode_private.h" @@ -172,7 +174,7 @@ decode_in_it_block(decode_state_t *state, app_pc pc, decode_info_t *di) bool is_isa_mode_legal(dr_isa_mode_t mode) { - return (mode == DR_ISA_ARM_THUMB || DR_ISA_ARM_A32); + return (mode == DR_ISA_ARM_THUMB || mode == DR_ISA_ARM_A32 || mode == DR_ISA_REGDEPS); } /* We need to call canonicalize_pc_target() on all next_tag-writing @@ -2428,6 +2430,14 @@ decode_opcode(dcontext_t *dcontext, byte *pc, instr_t *instr) static byte * decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr) { + /* #DR_ISA_REGDEPS synthetic ISA has its own decoder. + * XXX i#1684: when DR can be built with full dynamic architecture selection we won't + * need to pollute the decoding of other architectures with this synthetic ISA special + * case. + */ + if (dr_get_isa_mode(dcontext) == DR_ISA_REGDEPS) + return decode_isa_regdeps(dcontext, pc, instr); + const instr_info_t *info = &invalid_instr; decode_info_t di; byte *next_pc; diff --git a/core/ir/arm/instr.c b/core/ir/arm/instr.c index 30994552a45..6247eedf5c2 100644 --- a/core/ir/arm/instr.c +++ b/core/ir/arm/instr.c @@ -43,7 +43,7 @@ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - if (mode != DR_ISA_ARM_THUMB && mode != DR_ISA_ARM_A32) { + if (mode != DR_ISA_ARM_THUMB && mode != DR_ISA_ARM_A32 && mode != DR_ISA_REGDEPS) { return false; } instr->isa_mode = mode; diff --git a/core/ir/encode_api.h b/core/ir/encode_api.h index be9a94c79ae..3b900081de9 100644 --- a/core/ir/encode_api.h +++ b/core/ir/encode_api.h @@ -44,13 +44,87 @@ /** Specifies which processor mode to use when decoding or encoding. */ typedef enum _dr_isa_mode_t { - DR_ISA_IA32, /**< IA-32 (Intel/AMD 32-bit mode). */ - DR_ISA_X86 = DR_ISA_IA32, /**< Alias for DR_ISA_IA32. */ - DR_ISA_AMD64, /**< AMD64 (Intel/AMD 64-bit mode). */ - DR_ISA_ARM_A32, /**< ARM A32 (AArch32 ARM). */ - DR_ISA_ARM_THUMB, /**< Thumb (ARM T32). */ - DR_ISA_ARM_A64, /**< ARM A64 (AArch64). */ - DR_ISA_RV64IMAFDC, /**< RISC-V (rv64imafdc). */ + /** + * IA-32 (Intel/AMD 32-bit mode). + */ + DR_ISA_IA32, + /** + * Alias for DR_ISA_IA32. + */ + DR_ISA_X86 = DR_ISA_IA32, + /** + * AMD64 (Intel/AMD 64-bit mode). + */ + DR_ISA_AMD64, + /** + * ARM A32 (AArch32 ARM). + */ + DR_ISA_ARM_A32, + /** + * Thumb (ARM T32). + */ + DR_ISA_ARM_THUMB, + /** + * ARM A64 (AArch64). + */ + DR_ISA_ARM_A64, + /** + * RISC-V (rv64imafdc). + */ + DR_ISA_RV64IMAFDC, + /** + * A synthetic ISA that has the purpose of preserving register dependencies and giving + * hints on the type of operation an instruction performs. + * + * Being a synthetic ISA, some routines that work on instructions coming from an + * actual ISA (such as #DR_ISA_AMD64) are not supported (e.g., decode_sizeof()). + * + * Currently we support: + * - instr_convert_to_isa_regdeps(): to convert an #instr_t of an actual ISA to a + * #DR_ISA_REGDEPS #instr_t. + * - instr_encode() and instr_encode_to_copy(): to encode a #DR_ISA_REGDEPS #instr_t + * into a sequence of contiguous bytes. + * - decode() and decode_from_copy(): to decode an encoded #DR_ISA_REGDEPS instruction + * into an #instr_t. + * + * A #DR_ISA_REGDEPS #instr_t contains the following information: + * - categories: composed by #dr_instr_category_t values, they indicate the type of + * operation performed (e.g., a load, a store, a floating point math operation, a + * branch, etc.). Note that categories are composable, hence more than one category + * can be set. This information can be obtained using instr_get_category(). + * - arithmetic flags: we don't distinguish between different flags, we only report if + * at least one arithmetic flag was read (all arithmetic flags will be set to read) + * and/or written (all arithmetic flags will be set to written). This information + * can be obtained using instr_get_arith_flags(). + * - number of source and destination operands: we only consider register operands. + * This information can be obtained using instr_num_srcs() and instr_num_dsts(). + * - source operation size: is the largest source operand the instruction operates on. + * This information can be obtained by accessing the #instr_t operation_size field. + * - list of register operand identifiers: they are contained in #opnd_t lists, + * separated in source and destination. Note that these #reg_id_t identifiers are + * virtual and it should not be assumed that they belong to any DR_REG_ enum value + * of any specific architecture. These identifiers are meant for tracking register + * dependencies with respect to other #DR_ISA_REGDEPS instructions only. These + * lists can be obtained by walking the #instr_t operands with instr_get_dst() and + * instr_get_src(). + * - ISA mode: is always #DR_ISA_REGDEPS. This information can be obtained using + * instr_get_isa_mode(). + * - encoding bytes: an array of bytes containing the #DR_ISA_REGDEPS #instr_t + * encoding. Note that this information is present only for decoded instructions + * (i.e., #instr_t generated by decode() or decode_from_copy()). This information + * can be obtained using instr_get_raw_bits(). + * - length: the length of the encoded instruction in bytes. Note that this + * information is present only for decoded instructions (i.e., #instr_t generated by + * decode() or decode_from_copy()). This information can be obtained by accessing + * the #instr_t length field. + * + * Note that all routines that operate on #instr_t and #opnd_t are also supported for + * #DR_ISA_REGDEPS instructions. However, querying information outside of those + * described above (e.g., the instruction opcode with instr_get_opcode()) will return + * the zeroed value set by instr_create() or instr_init() when the #instr_t was + * created (e.g., instr_get_opcode() would return OP_INVALID). + */ + DR_ISA_REGDEPS, } dr_isa_mode_t; DR_API diff --git a/core/ir/encode_shared.c b/core/ir/encode_shared.c index dbdf875f49b..c48f40e3a83 100644 --- a/core/ir/encode_shared.c +++ b/core/ir/encode_shared.c @@ -38,6 +38,7 @@ /* encode_shared.c -- cross-platform encodingn routines */ #include "../globals.h" +#include "isa_regdeps/encode.h" #include "arch.h" #include "instr.h" #include "decode.h" @@ -111,11 +112,29 @@ get_encoding_info(instr_t *instr) return info; } +static byte * +instr_encode_common(dcontext_t *dcontext, instr_t *instr, byte *copy_pc, byte *final_pc, + bool check_reachable, + bool *has_instr_opnds /*OUT OPTIONAL*/ + _IF_DEBUG(bool assert_reachable)) +{ + /* #DR_ISA_REGDEPS synthetic ISA has its own encoder. + * XXX i#1684: when DR can be built with full dynamic architecture selection we won't + * need to pollute the encoding of other architectures with this synthetic ISA special + * case. + */ + if (instr_get_isa_mode(instr) == DR_ISA_REGDEPS) + return encode_isa_regdeps(dcontext, instr, copy_pc); + + return instr_encode_arch(dcontext, instr, copy_pc, final_pc, check_reachable, + has_instr_opnds _IF_DEBUG(assert_reachable)); +} + /* completely ignores reachability and predication failures */ byte * instr_encode_ignore_reachability(dcontext_t *dcontext, instr_t *instr, byte *pc) { - return instr_encode_arch(dcontext, instr, pc, pc, false, NULL _IF_DEBUG(false)); + return instr_encode_common(dcontext, instr, pc, pc, false, NULL _IF_DEBUG(false)); } /* just like instr_encode but doesn't assert on reachability or predication failures */ @@ -123,16 +142,16 @@ byte * instr_encode_check_reachability(dcontext_t *dcontext, instr_t *instr, byte *pc, bool *has_instr_opnds /*OUT OPTIONAL*/) { - return instr_encode_arch(dcontext, instr, pc, pc, true, - has_instr_opnds _IF_DEBUG(false)); + return instr_encode_common(dcontext, instr, pc, pc, true, + has_instr_opnds _IF_DEBUG(false)); } byte * instr_encode_to_copy(void *drcontext, instr_t *instr, byte *copy_pc, byte *final_pc) { dcontext_t *dcontext = (dcontext_t *)drcontext; - return instr_encode_arch(dcontext, instr, copy_pc, final_pc, true, - NULL _IF_DEBUG(true)); + return instr_encode_common(dcontext, instr, copy_pc, final_pc, true, + NULL _IF_DEBUG(true)); } byte * diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index b68f9706178..c9844564b5a 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -298,10 +298,19 @@ struct _instr_t { uint opcode; + union { # ifdef X86 - /* PR 251479: offset into instr's raw bytes of rip-relative 4-byte displacement */ - byte rip_rel_pos; + /* Offset into instr's raw bytes of rip-relative 4-byte displacement. + * This field is valid when instr_t isa_mode is DR_ISA_X86. + */ + byte rip_rel_pos; # endif + /* Size of source data (i.e., read) a DR_ISA_REGDEPS instruction operates on. + * This field is valid when instr_t isa_mode is DR_ISA_REGDEPS. + * Note that opnd_size_t is an alias of byte. + */ + opnd_size_t operation_size; + }; /* we dynamically allocate dst and src arrays b/c x86 instrs can have * up to 8 of each of them, but most have <=2 dsts and <=3 srcs, and we @@ -2096,6 +2105,21 @@ DR_API instr_t * instr_convert_short_meta_jmp_to_long(void *drcontext, instrlist_t *ilist, instr_t *instr); +DR_API +/** + * Converts a real ISA (e.g., #DR_ISA_AMD64) instruction \p instr_real_isa into a + * #DR_ISA_REGDEPS instruction and stores it into \p instr_regdeps_isa. + * Assumes \p instr_regdeps_isa has been allocated by the caller (e.g., using + * instr_create()). + * Assumes \p instr_real_isa is a fully-decoded or synthesized instruction of a real ISA + * with valid operand information. + * \note \p instr_regdeps_isa will contain only the information of a #DR_ISA_REGDEPS + * synthetic instruction. + */ +void +instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, + instr_t *instr_regdeps_isa); + DR_API /** * Given \p eflags, returns whether or not the conditional branch, \p diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index c7f76766bd3..500e3d37386 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -52,6 +52,7 @@ #define INSTR_INLINE extern inline #include "../globals.h" +#include "isa_regdeps/encoding_common.h" #include "instr.h" #include "arch.h" #include "../link.h" @@ -2991,6 +2992,142 @@ instr_uses_fp_reg(instr_t *instr) return false; } +void +instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, + instr_t *instr_regdeps_isa) +{ + /* Retrieve number of register destination operands from real ISA instruction. + * Note that a destination operand that is a memory renference should have its + * registers (if any) counted as source operands, since they are being read. + * We use [src|dst]_reg_used to keep track of registers we've seen and avoid + * duplicates. + */ + bool src_reg_used[REGDEPS_MAX_NUM_REGS]; + memset(src_reg_used, 0, sizeof(src_reg_used)); + uint num_srcs = 0; + bool dst_reg_used[REGDEPS_MAX_NUM_REGS]; + memset(dst_reg_used, 0, sizeof(dst_reg_used)); + uint num_dsts = 0; + uint instr_real_num_dsts = (uint)instr_num_dsts(instr_real_isa); + for (uint dst_index = 0; dst_index < instr_real_num_dsts; ++dst_index) { + opnd_t dst_opnd = instr_get_dst(instr_real_isa, dst_index); + uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd); + if (opnd_is_memory_reference(dst_opnd)) { + for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { + reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index); + /* Map sub-registers to their containing register. + */ + reg_id_t reg_canonical = reg_to_pointer_sized(reg); + if (!src_reg_used[reg_canonical]) { + ++num_srcs; + src_reg_used[reg_canonical] = true; + } + } + } else { + for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { + reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index); + /* Map sub-registers to their containing register. + */ + reg_id_t reg_canonical = reg_to_pointer_sized(reg); + if (!dst_reg_used[reg_canonical]) { + ++num_dsts; + dst_reg_used[reg_canonical] = true; + } + } + } + } + + /* We use max_src_opnd_size_bytes to keep track of the size of the largest source + * operand. This variable counts the number of bytes instead of using opnd_size_t to + * avoid relying on OPSZ_ enum values. Later on we convert max_src_opnd_size_bytes to + * its corresponding OPSZ_ enum value and store it into operation_size. + */ + uint max_src_opnd_size_bytes = 0; + /* Retrieve number of register source operands from real ISA instruction. + */ + uint instr_real_num_srcs = (uint)instr_num_srcs(instr_real_isa); + for (uint i = 0; i < instr_real_num_srcs; ++i) { + opnd_t src_opnd = instr_get_src(instr_real_isa, i); + opnd_size_t opnd_size = opnd_get_size(src_opnd); + uint opnd_size_bytes = opnd_size_in_bytes(opnd_size); + if (opnd_size_bytes > max_src_opnd_size_bytes) + max_src_opnd_size_bytes = opnd_size_bytes; + uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(src_opnd); + for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { + reg_id_t reg = opnd_get_reg_used(src_opnd, opnd_index); + /* Map sub-registers to their containing register. + */ + reg_id_t reg_canonical = reg_to_pointer_sized(reg); + if (!src_reg_used[reg_canonical]) { + ++num_srcs; + src_reg_used[reg_canonical] = true; + } + } + } + + /* Declare num of source and destination operands valid in the converted instruction. + */ + instr_set_num_opnds(drcontext, instr_regdeps_isa, num_dsts, num_srcs); + + /* Retrieve arithmetic flags from real ISA instruction. + * If the real ISA instruction reads or writes one or more arithmetic flag, all + * arithmetic flags will be set to read or written in the converted instruction. + * Note that this operation can trigger additional encoding and decoding of + * instr_real_isa, depending on its decoding level. + */ + uint eflags_instr_real = instr_get_arith_flags(instr_real_isa, DR_QUERY_DEFAULT); + uint eflags_instr_regdeps = 0; + if (TESTANY(EFLAGS_WRITE_ARITH, eflags_instr_real)) + eflags_instr_regdeps |= EFLAGS_WRITE_ARITH; + if (TESTANY(EFLAGS_READ_ARITH, eflags_instr_real)) + eflags_instr_regdeps |= EFLAGS_READ_ARITH; + instr_regdeps_isa->eflags = eflags_instr_regdeps; + instr_set_arith_flags_valid(instr_regdeps_isa, true); + + /* Retrieve category of real ISA instruction and set it directly as the category of + * the converted instruction. No changes needed here. + */ + instr_set_category(instr_regdeps_isa, instr_get_category(instr_real_isa)); + + /* Convert max_src_opnd_size_bytes from number of bytes to opnd_size_t (which holds + * OPSZ_ enum values). + */ + instr_regdeps_isa->operation_size = opnd_size_from_bytes(max_src_opnd_size_bytes); + + /* Set the source and destination register operands for the converted instruction. + */ + if (num_dsts > 0) { + uint reg_counter = 0; + for (uint reg = 0; reg < REGDEPS_MAX_NUM_REGS; ++reg) { + if (dst_reg_used[reg]) { + opnd_t dst_opnd = opnd_create_reg((reg_id_t)reg); + instr_set_dst(instr_regdeps_isa, reg_counter, dst_opnd); + ++reg_counter; + } + } + } + + if (num_srcs > 0) { + uint reg_counter = 0; + for (uint reg = 0; reg < REGDEPS_MAX_NUM_REGS; ++reg) { + if (src_reg_used[reg]) { + opnd_t src_opnd = opnd_create_reg((reg_id_t)reg); + instr_set_src(instr_regdeps_isa, reg_counter, src_opnd); + ++reg_counter; + } + } + } + + /* Declare converted instruction operands to be valid. + * Must be done after instr_allocate_raw_bits(), which sets operands as invalid. + */ + instr_set_operands_valid(instr_regdeps_isa, true); + + /* Set converted instruction ISA mode to be DR_ISA_REGDEPS. + */ + instr_set_isa_mode(instr_regdeps_isa, DR_ISA_REGDEPS); +} + /* We place these here rather than in mangle_shared.c to avoid the work of * linking mangle_shared.c into drdecodelib. */ diff --git a/core/ir/isa_regdeps/decode.c b/core/ir/isa_regdeps/decode.c new file mode 100644 index 00000000000..23794089f79 --- /dev/null +++ b/core/ir/isa_regdeps/decode.c @@ -0,0 +1,141 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* decode.c -- a decoder for DR_ISA_REGDEPS instructions */ + +#include "decode.h" + +#include "../globals.h" +#include "encode_api.h" +#include "encoding_common.h" +#include "instr_api.h" +#include "opnd_api.h" + +/* Decodes the raw bytes of an encoded instruction \p encoded_instr into DR instruction + * representation \p instr. + * Returns the next instruction's PC. + * The encoding scheme followed is described in #core/ir/isa_regdeps/encoding_common.h. + */ +byte * +decode_isa_regdeps(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr) +{ + /* Interpret the first 4 bytes of encoded_instr (which are always present) as a uint + * for easier retrieving of category, eflags, #src, and #dst values. + * We can do this safely because encoded_instr is 4 bytes aligned. + */ + ASSERT(ALIGNED(encoded_instr, REGDEPS_ALIGN_BYTES)); + uint encoding_header = *((uint *)encoded_instr); + + /* Decode number of register destination operands. + */ + uint num_dsts = encoding_header & REGDEPS_DST_OPND_MASK; + + /* Decode number of register source operands. + */ + uint num_srcs = (encoding_header & REGDEPS_SRC_OPND_MASK) >> REGDEPS_SRC_OPND_SHIFT; + + instr_set_num_opnds(dcontext, instr, num_dsts, num_srcs); + + /* Decode arithmetic flags. + */ + uint eflags = (encoding_header & REGDEPS_FLAGS_MASK) >> REGDEPS_FLAGS_SHIFT; + uint eflags_instr = 0; + if (TESTANY(REGDEPS_INSTR_WRITES_ARITH, eflags)) + eflags_instr |= EFLAGS_WRITE_ARITH; + if (TESTANY(REGDEPS_INSTR_READS_ARITH, eflags)) + eflags_instr |= EFLAGS_READ_ARITH; + instr->eflags = eflags_instr; + + /* Declare the eflags to be valid. + * This is needed in order to retrieve their value without trying to compute it again. + */ + instr_set_arith_flags_valid(instr, true); + + /* Decode instruction category. + */ + uint category = (encoding_header & REGDEPS_CATEGORY_MASK) >> REGDEPS_CATEGORY_SHIFT; + instr_set_category(instr, category); + + /* Decode operation size, if there are any operands. + */ + uint num_opnds = num_dsts + num_srcs; + opnd_size_t max_opnd_size = OPSZ_0; + if (num_opnds > 0) + max_opnd_size = (opnd_size_t)encoded_instr[REGDEPS_OP_SIZE_INDEX]; + instr->operation_size = max_opnd_size; + + /* Decode register destination operands, if present. + */ + for (uint i = 0; i < num_dsts; ++i) { + reg_id_t dst = (reg_id_t)encoded_instr[i + REGDEPS_OPND_INDEX]; + opnd_t dst_opnd = opnd_create_reg((reg_id_t)dst); + instr_set_dst(instr, i, dst_opnd); + } + + /* Decode register source operands, if present. + */ + for (uint i = 0; i < num_srcs; ++i) { + reg_id_t src = (reg_id_t)encoded_instr[i + REGDEPS_OPND_INDEX + num_dsts]; + opnd_t src_opnd = opnd_create_reg((reg_id_t)src); + instr_set_src(instr, i, src_opnd); + } + + /* Compute instruction length including bytes for padding to reach 4 bytes alignment. + * Account for 1 additional byte containing max register operand size, if there are + * any operands. + */ + uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0; + uint length = + ALIGN_FORWARD(REGDEPS_HEADER_BYTES + num_opnd_bytes, REGDEPS_ALIGN_BYTES); + instr->length = length; + + /* Allocate space to save encoding in the bytes field of instr_t. We use it to avoid + * unnecessary encoding. + */ + instr_allocate_raw_bits(dcontext, instr, length); + + /* Declare the operands to be valid. + */ + instr_set_operands_valid(instr, true); + + /* Set decoded instruction ISA mode to be synthetic. + */ + instr_set_isa_mode(instr, DR_ISA_REGDEPS); + + /* Copy encoding to bytes field of instr_t. + */ + instr_set_raw_bytes(instr, encoded_instr, length); + + /* Compute next instruction's PC as: current PC + instruction length. + */ + return encoded_instr + length; +} diff --git a/core/ir/isa_regdeps/decode.h b/core/ir/isa_regdeps/decode.h new file mode 100644 index 00000000000..d234115b893 --- /dev/null +++ b/core/ir/isa_regdeps/decode.h @@ -0,0 +1,41 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _REGDEPS_DECODE_H_ +#define _REGDEPS_DECODE_H_ 1 + +#include "../globals.h" + +byte * +decode_isa_regdeps(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr); + +#endif /* _REGDEPS_DECODE_H_ */ diff --git a/core/ir/isa_regdeps/encode.c b/core/ir/isa_regdeps/encode.c new file mode 100644 index 00000000000..d26588f94d4 --- /dev/null +++ b/core/ir/isa_regdeps/encode.c @@ -0,0 +1,143 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* encode.c -- an encoder for DR_ISA_REGDEPS instructions */ + +#include "encode.h" + +#include "../globals.h" +#include "encode_api.h" +#include "encoding_common.h" +#include "instr_api.h" +#include "opnd_api.h" + +/* Encodes DR instruction representation \p instr into raw bytes \p encoded_instr. + * Returns the next instruction's PC. + * The encoding scheme followed is described in #core/ir/isa_regdeps/encoding_common.h. + */ +byte * +encode_isa_regdeps(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr) +{ + /* Check if the instruction we're encoding already has encoding in its bytes field. + * If so, just copy that encoding in encoded_instr. + */ + if (instr_raw_bits_valid(instr)) { + memcpy(encoded_instr, instr->bytes, instr->length); + return encoded_instr + instr->length; + } + + /* Use a local uint variable for easier setting of category, eflags, #src, and #dst + * values. + */ + uint encoding_header = 0; + + /* Encode number of register destination operands (i.e., written registers). + */ + uint num_dsts = (uint)instr_num_dsts(instr); + encoding_header |= num_dsts; + + /* Encode number of register source operands (i.e., read registers). + */ + uint num_srcs = (uint)instr_num_srcs(instr); + encoding_header |= (num_srcs << REGDEPS_SRC_OPND_SHIFT); + + /* Check that the number of operands is <= 8 to catch x86 corner cases we might have + * missed. + */ + uint num_opnds = num_dsts + num_srcs; + if (num_opnds > REGDEPS_MAX_NUM_OPNDS) { + SYSLOG_INTERNAL_WARNING("DR_ISA_REGDEPS instruction has %u number of operands.\n " + "We only support encoding of max %u operands.", + num_opnds, (uint)REGDEPS_MAX_NUM_OPNDS); + return NULL; + } + + /* Encode arithmetic flags. + */ + ASSERT(instr_arith_flags_valid(instr)); + uint eflags_instr = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); + uint eflags = 0; + if (TESTANY(EFLAGS_WRITE_ARITH, eflags_instr)) + eflags |= REGDEPS_INSTR_WRITES_ARITH; + if (TESTANY(EFLAGS_READ_ARITH, eflags_instr)) + eflags |= REGDEPS_INSTR_READS_ARITH; + encoding_header |= (eflags << REGDEPS_FLAGS_SHIFT); + + /* Encode instruction category. + */ + uint category = instr_get_category(instr); + encoding_header |= (category << REGDEPS_CATEGORY_SHIFT); + + /* Copy header encoding back into encoded_instr output. + */ + *((uint *)encoded_instr) = encoding_header; + + /* Encode register destination operands, if present. + */ + for (uint dst_index = 0; dst_index < num_dsts; ++dst_index) { + opnd_t dst_opnd = instr_get_dst(instr, dst_index); + uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd); + for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { + reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index); + encoded_instr[dst_index + REGDEPS_OPND_INDEX] = (byte)reg; + } + } + + /* Encode register source operands, if present. + */ + for (uint src_index = 0; src_index < num_srcs; ++src_index) { + opnd_t src_opnd = instr_get_src(instr, src_index); + uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(src_opnd); + for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { + reg_id_t reg = opnd_get_reg_used(src_opnd, opnd_index); + encoded_instr[src_index + REGDEPS_OPND_INDEX + num_dsts] = (byte)reg; + } + } + + /* Encode largest register size, if there is at least one operand. + */ + opnd_size_t max_opnd_size = instr->operation_size; + if (num_opnds > 0) + encoded_instr[REGDEPS_OP_SIZE_INDEX] = (byte)max_opnd_size; + + /* Compute instruction length including bytes for padding to reach 4 bytes alignment. + * Account for 1 additional byte containing max register operand size, if there are + * any operands. + */ + uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0; + uint length = + ALIGN_FORWARD(REGDEPS_HEADER_BYTES + num_opnd_bytes, REGDEPS_ALIGN_BYTES); + + /* Compute next instruction's PC as: current PC + instruction length. + */ + return encoded_instr + length; +} diff --git a/core/ir/isa_regdeps/encode.h b/core/ir/isa_regdeps/encode.h new file mode 100644 index 00000000000..b93ce20ab87 --- /dev/null +++ b/core/ir/isa_regdeps/encode.h @@ -0,0 +1,41 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _REGDEPS_ENCODE_H_ +#define _REGDEPS_ENCODE_H_ 1 + +#include "../globals.h" + +byte * +encode_isa_regdeps(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr); + +#endif /* _REGDEPS_ENCODE_H_ */ diff --git a/core/ir/isa_regdeps/encoding_common.h b/core/ir/isa_regdeps/encoding_common.h new file mode 100644 index 00000000000..31fc77149ca --- /dev/null +++ b/core/ir/isa_regdeps/encoding_common.h @@ -0,0 +1,130 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _REGDEPS_ENCODING_COMMON_H_ +#define _REGDEPS_ENCODING_COMMON_H_ + +/* Here we describe the encoding scheme for the Synthetic ISA that is enforced in decode.c + * and encode.c. + * + * Encoded instructions are 4 byte aligned. + * + * All instruction encodings begin with the following 4 header bytes, which follow this + * scheme: + * |----------------------| |--| |----| |----| + * 31.. ..10 9,8 7..4 3..0 + * category eflags #src #dst + * + * - 22 bits, category: it's a high level representation of the opcode of an instruction. + * Each bit represents one category following #dr_instr_category_t. Note that an + * instruction can belong to more than one category, hence multiple bits can be set; + * - 2 bits, eflags: most significant bit set to 1 indicates the instruction reads at + * least one arithmetic flag; least significant bit set to 1 indicates the instruction + * writes at least one arithmetic flag; + * - 4 bits, #src: number of source operands (read) that are registers. Registers used in + * memory reference operands of the instruction we are encoding (regardless of whether + * they are source or destination operands) are considered as source operands in the + * encoded instruction because they are always read; + * - 4 bits, #dst: number of destination operands (written) that are registers. + * + * We assume these encoded values to be little-endian. Note that we are only interested + * in register dependencies, hence operands that are not registers, such as immediates or + * memory references, are not present. + * + * Following the 4 header bytes are the bytes for the operation size and for the encoding + * of register operands, if any are present. + * + * The first byte contains the operation size encoded as an OPSZ_ enum value. The + * operation size is the size of the largest source operand, regardless of it being a + * register, a memory reference, or an immediate. + * + * Following the operation size are the register operand IDs. Each register operand is 1 + * byte. The destination operands go first, followed by the source operands. An + * instruction can have up to 8 operands (sources + destinations). Note that, because of + * 4 byte alignment, the length of encoded instructions will include padding and is as + * follows: + * - instructions with no operands have only the 4 header bytes (no operation size byte + * nor operand-related bytes); + * - instructions with 1 to 3 operands have a length of 8 bytes (4 header bytes + 1 byte + * for operation size + 3 operand-related/padding bytes); + * - instructions with 4 to 7 operands have a length of 12 bytes; + * - instructions with 8 operands have the maximum length of 16 bytes. + * + * For example, an instruction with 4 operands (1 dst, 3 src) has a length of 12 bytes and + * would be encoded as: + * |----------------------| |--| |----| |----| + * 31.. ..10 9,8 7..4 3..0 + * category eflags #src #dst + * |--------| |--------| |--------| |--------| + * 31.. ..24 23.. ..16 15.. ..8 7.. ..0 + * src_op1 src_op0 dst_op0 op_size + * |--------| |--------| |--------| |--------| + * 31.. ..24 23.. ..16 15.. ..8 7.. ..0 + * padding padding padding src_op2 + * + * Because of 4 byte alignment, the last 3 bytes [31.. ..8] are padding and are undefined + * (i.e., it cannot be assumed that they have been zeroed-out or contain any meaningful + * value). + */ + +#define REGDEPS_CATEGORY_BITS 22 +#define REGDEPS_FLAGS_BITS 2 +#define REGDEPS_NUM_OPND_BITS 4 + +#define REGDEPS_SRC_OPND_SHIFT REGDEPS_NUM_OPND_BITS +#define REGDEPS_FLAGS_SHIFT (2 * REGDEPS_NUM_OPND_BITS) +#define REGDEPS_CATEGORY_SHIFT (2 * REGDEPS_NUM_OPND_BITS + REGDEPS_FLAGS_BITS) + +#define REGDEPS_DST_OPND_MASK ((1U << REGDEPS_NUM_OPND_BITS) - 1) +#define REGDEPS_SRC_OPND_MASK \ + (((1U << REGDEPS_NUM_OPND_BITS) - 1) << REGDEPS_SRC_OPND_SHIFT) +#define REGDEPS_FLAGS_MASK (((1U << REGDEPS_FLAGS_BITS) - 1) << REGDEPS_FLAGS_SHIFT) +#define REGDEPS_CATEGORY_MASK \ + (((1U << REGDEPS_CATEGORY_BITS) - 1) << REGDEPS_CATEGORY_SHIFT) + +#define REGDEPS_INSTR_WRITES_ARITH 0x1 +#define REGDEPS_INSTR_READS_ARITH 0x2 + +#define REGDEPS_HEADER_BYTES 4 +#define REGDEPS_OP_SIZE_INDEX REGDEPS_HEADER_BYTES +#define REGDEPS_OPND_INDEX REGDEPS_OP_SIZE_INDEX + 1 + +#define REGDEPS_ALIGN_BYTES 4 + +#define REGDEPS_MAX_NUM_OPNDS 8 + +/* Defines the maximum number of non-overlapping registers for any architecture we + * currently support. Currently AARCH64 has the highest number: 198. We round it to 256. + */ +#define REGDEPS_MAX_NUM_REGS 256 + +#endif // _REGDEPS_ENCODING_COMMON_H_ diff --git a/core/ir/riscv64/codec.c b/core/ir/riscv64/codec.c index a0a31e889f6..2977cac8126 100644 --- a/core/ir/riscv64/codec.c +++ b/core/ir/riscv64/codec.c @@ -32,7 +32,9 @@ #include #include "../globals.h" +#include "../isa_regdeps/decode.h" #include "codec.h" +#include "encode_api.h" #include "trie.h" /* RISC-V extended instruction information structure. @@ -1483,6 +1485,14 @@ get_instruction_info(uint opc) byte * decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr) { + /* #DR_ISA_REGDEPS synthetic ISA has its own decoder. + * XXX i#1684: when DR can be built with full dynamic architecture selection we won't + * need to pollute the decoding of other architectures with this synthetic ISA special + * case. + */ + if (dr_get_isa_mode(dcontext) == DR_ISA_REGDEPS) + return decode_isa_regdeps(dcontext, pc, instr); + /* Decode instruction width from the opcode. */ int width = instruction_width(*(uint16_t *)pc); /* Start assuming a compressed instruction. Code memory should be 2b aligned. */ diff --git a/core/ir/riscv64/decode.c b/core/ir/riscv64/decode.c index 4677e16bda8..32a635c0867 100644 --- a/core/ir/riscv64/decode.c +++ b/core/ir/riscv64/decode.c @@ -31,6 +31,7 @@ */ #include "../globals.h" +#include "encode_api.h" #include "instr.h" #include "decode.h" #include "codec.h" @@ -38,7 +39,7 @@ bool is_isa_mode_legal(dr_isa_mode_t mode) { - return (mode == DR_ISA_RV64IMAFDC); + return (mode == DR_ISA_RV64IMAFDC || mode == DR_ISA_REGDEPS); } app_pc diff --git a/core/ir/riscv64/instr.c b/core/ir/riscv64/instr.c index 31e3a3882af..5f7c41bb4fb 100644 --- a/core/ir/riscv64/instr.c +++ b/core/ir/riscv64/instr.c @@ -42,9 +42,9 @@ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - if (mode != DR_ISA_RV64IMAFDC) + if (mode != DR_ISA_RV64IMAFDC && mode != DR_ISA_REGDEPS) return false; - instr->isa_mode = DR_ISA_RV64IMAFDC; + instr->isa_mode = mode; return true; } diff --git a/core/ir/x86/decode.c b/core/ir/x86/decode.c index f01137e3f4a..caceafd62c2 100644 --- a/core/ir/x86/decode.c +++ b/core/ir/x86/decode.c @@ -38,7 +38,9 @@ /* decode.c -- a full x86 decoder */ #include "../globals.h" +#include "../isa_regdeps/decode.h" #include "arch.h" +#include "encode_api.h" #include "instr.h" #include "decode.h" #include "decode_fast.h" @@ -127,9 +129,9 @@ bool is_isa_mode_legal(dr_isa_mode_t mode) { #ifdef X64 - return (mode == DR_ISA_IA32 || mode == DR_ISA_AMD64); + return (mode == DR_ISA_IA32 || mode == DR_ISA_AMD64 || mode == DR_ISA_REGDEPS); #else - return (mode == DR_ISA_IA32); + return (mode == DR_ISA_IA32 || mode == DR_ISA_REGDEPS); #endif } @@ -2579,6 +2581,14 @@ check_is_variable_size(opnd_t op) static byte * decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr) { + /* #DR_ISA_REGDEPS synthetic ISA has its own decoder. + * XXX i#1684: when DR can be built with full dynamic architecture selection we won't + * need to pollute the decoding of other architectures with this synthetic ISA special + * case. + */ + if (dr_get_isa_mode(dcontext) == DR_ISA_REGDEPS) + return decode_isa_regdeps(dcontext, pc, instr); + const instr_info_t *info; decode_info_t di; byte *next_pc; diff --git a/core/ir/x86/instr.c b/core/ir/x86/instr.c index 31438fa047d..b611a44241d 100644 --- a/core/ir/x86/instr.c +++ b/core/ir/x86/instr.c @@ -77,10 +77,10 @@ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { #ifdef X64 - if (mode != DR_ISA_IA32 && mode != DR_ISA_AMD64) + if (mode != DR_ISA_IA32 && mode != DR_ISA_AMD64 && mode != DR_ISA_REGDEPS) return false; #else - if (mode != DR_ISA_IA32) + if (mode != DR_ISA_IA32 && mode != DR_ISA_REGDEPS) return false; #endif instr->isa_mode = mode; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 59c267d5d6d..9f0764c0c9a 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -2036,6 +2036,9 @@ if (NOT ANDROID) endif (AARCH64) endif () +# test synthetic DR_ISA_REGDEPS encoding/decoding +tobuild_api(api.ir_regdeps api/ir_regdeps.c "" "" OFF OFF OFF) + # test static decoder library tobuild_api(api.ir-static api/ir_${sfx}.c "" "" ON OFF OFF) if (AARCH64) diff --git a/suite/tests/api/ir_regdeps.c b/suite/tests/api/ir_regdeps.c new file mode 100644 index 00000000000..b8e08b8221d --- /dev/null +++ b/suite/tests/api/ir_regdeps.c @@ -0,0 +1,356 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include "configure.h" +#include "dr_api.h" +#include "tools.h" + +#define ASSERT(x) \ + ((void)((!(x)) ? (dr_fprintf(STDERR, "ASSERT FAILURE: %s:%d: %s\n", __FILE__, \ + __LINE__, #x), \ + dr_abort(), 0) \ + : 0)) + +/* We are not exporting the defines in core/ir/isa_regdeps/encoding_common.h, so we + * redefine DR_ISA_REGDEPS alignment requirement here. + */ +#define REGDEPS_ALIGN_BYTES 4 + +static bool +instr_has_only_register_operands(instr_t *instr) +{ + uint num_dsts = (uint)instr_num_dsts(instr); + for (uint dst_index = 0; dst_index < num_dsts; ++dst_index) { + opnd_t dst_opnd = instr_get_dst(instr, dst_index); + if (!opnd_is_reg(dst_opnd)) + return false; + } + + uint num_srcs = (uint)instr_num_srcs(instr); + for (uint src_index = 0; src_index < num_srcs; ++src_index) { + opnd_t src_opnd = instr_get_src(instr, src_index); + if (!opnd_is_reg(src_opnd)) + return false; + } + + return true; +} + +static void +test_instr_encode_decode_synthetic(void *dc, instr_t *instr) +{ + /* Encoded synthetic ISA instructions require 4 byte alignment. + * The largest synthetic encoded instruction has 16 bytes. + */ + byte ALIGN_VAR(REGDEPS_ALIGN_BYTES) bytes[16]; + + /* Convert a real ISA instruction to a synthetic ISA (DR_ISA_REGDEPS) instruction. + */ + instr_t *instr_synthetic_converted = instr_create(dc); + instr_convert_to_isa_regdeps(dc, instr, instr_synthetic_converted); + + /* Check that the converted instruction only has register operands. + */ + ASSERT(instr_has_only_register_operands(instr_synthetic_converted)); + + /* Check that we do not have an opcode for the converted instruction. + */ + ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_INVALID); + + /* Encode the synthetic instruction. + */ + byte *next_pc_encode = instr_encode(dc, instr_synthetic_converted, bytes); + ASSERT(next_pc_encode != NULL); + + /* Create an instruction where we can decode the previously encoded synthetic + * instruction. + */ + instr_t *instr_synthetic_decoded = instr_create(dc); + + dr_isa_mode_t old_isa_mode; + /* DR uses dcontext_t ISA mode to decode instructions. + * Since we are decoding synthetic instructions, we set it to DR_ISA_REGDEPS. + */ + dr_set_isa_mode(dc, DR_ISA_REGDEPS, &old_isa_mode); + /* Decode the encoded synthetic instruction bytes into instr_synthetic. + */ + byte *next_pc_decode = decode(dc, bytes, instr_synthetic_decoded); + dr_set_isa_mode(dc, old_isa_mode, NULL); + ASSERT(next_pc_decode != NULL); + ASSERT(next_pc_encode == next_pc_decode); + /* Check for overflow. + */ + ASSERT((next_pc_encode - bytes) <= sizeof(bytes)); + ASSERT((next_pc_decode - bytes) <= sizeof(bytes)); + /* Check that the two synthetic instructions are the same. + */ + ASSERT(instr_same(instr_synthetic_converted, instr_synthetic_decoded)); + + instr_destroy(dc, instr); + instr_destroy(dc, instr_synthetic_converted); + instr_destroy(dc, instr_synthetic_decoded); +} + +#ifdef X86_64 +static void +test_instr_create_encode_decode_synthetic_x86_64(void *dc) +{ + byte buf[128]; + instr_t *instr; + + instr = INSTR_CREATE_push(dc, opnd_create_reg(SEG_FS)); + /* Instructions generated by INSTR_CREATE_ or XINST_CREATE_ are not fully decoded. + * For example, their categories are not set. So we encode and decode them to obtain + * a fully decoded instruction before testing our DR_ISA_REGDEPS synthetic encoding + * and decoding. We do so for all instructions of all architectures. + */ + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_pop(dc, opnd_create_reg(SEG_FS)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + opnd_t abs_addr = opnd_create_abs_addr((void *)0xdeadbeefdeadbeef, OPSZ_8); + instr = INSTR_CREATE_mov_ld(dc, opnd_create_reg(DR_REG_RAX), abs_addr); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_cmps_1(dc); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_maskmovq(dc, opnd_create_reg(DR_REG_MM0), + opnd_create_reg(DR_REG_MM1)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = + INSTR_CREATE_xchg(dc, opnd_create_reg(DR_REG_R8D), opnd_create_reg(DR_REG_EAX)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_add(dc, opnd_create_reg(DR_REG_RAX), OPND_CREATE_INT32(42)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr_t *tgt = INSTR_CREATE_mov_imm(dc, opnd_create_reg(DR_REG_XAX), + opnd_create_immed_int(0xdeadbeef, OPSZ_PTR)); + instr = INSTR_CREATE_jmp_ind(dc, opnd_create_mem_instr(tgt, 2, OPSZ_PTR)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + instr_destroy(dc, tgt); + + instr = + INSTR_CREATE_bsf(dc, opnd_create_reg(DR_REG_EAX), opnd_create_reg(DR_REG_ECX)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); +} +#endif + +#ifdef ARM +static void +test_instr_create_encode_decode_synthetic_arm(void *dc) +{ + byte buf[128]; + instr_t *instr; + + instr = INSTR_CREATE_lsls(dc, opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R1), + OPND_CREATE_INT(4)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_sel(dc, opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R1), + opnd_create_reg(DR_REG_R1)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_movs(dc, opnd_create_reg(DR_REG_R0), OPND_CREATE_INT(4)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_movs(dc, opnd_create_reg(DR_REG_R0), opnd_create_reg(DR_REG_R1)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); +} +#endif + +#ifdef AARCH64 +static void +test_instr_create_encode_decode_synthetic_aarch64(void *dc) +{ + byte buf[128]; + instr_t *instr; + + instr = INSTR_CREATE_add(dc, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_X1)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_sub(dc, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_X1)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = + INSTR_CREATE_adds_imm(dc, opnd_create_reg(DR_REG_W0), opnd_create_reg(DR_REG_W1), + opnd_create_immed_int(0, OPSZ_12b), OPND_CREATE_INT8(0)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_adc(dc, opnd_create_reg(DR_REG_W0), opnd_create_reg(DR_REG_W1), + opnd_create_reg(DR_REG_W2)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_ldpsw( + dc, opnd_create_reg(DR_REG_X1), opnd_create_reg(DR_REG_X2), + opnd_create_reg(DR_REG_X0), + opnd_create_base_disp_aarch64(DR_REG_X0, DR_REG_NULL, 0, false, 4, 0, OPSZ_8), + OPND_CREATE_INT(4)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); +} +#endif + +#ifdef RISCV64 +static void +test_instr_create_encode_decode_synthetic_riscv64(void *dc) +{ + byte buf[128]; + instr_t *instr; + + instr = + INSTR_CREATE_lwu(dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_X31, DR_REG_NULL, 0, 0, OPSZ_4)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_sw( + dc, opnd_create_base_disp(DR_REG_X31, DR_REG_NULL, 0, (1 << 11) - 1, OPSZ_4), + opnd_create_reg(DR_REG_X0)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_flw(dc, opnd_create_reg(DR_REG_F0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = + INSTR_CREATE_lr_d(dc, opnd_create_reg(DR_REG_X0), + opnd_create_base_disp(DR_REG_X31, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_immed_int(0b10, OPSZ_2b)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); + + instr = INSTR_CREATE_fmadd_d(dc, opnd_create_reg(DR_REG_F31), + opnd_create_immed_int(0b000, OPSZ_3b), + opnd_create_reg(DR_REG_F0), opnd_create_reg(DR_REG_F2), + opnd_create_reg(DR_REG_F3)); + instr_encode(dc, instr, buf); + instr_reset(dc, instr); + decode(dc, buf, instr); + test_instr_encode_decode_synthetic(dc, instr); +} +#endif + +int +main(int argc, char *argv[]) +{ + void *dcontext = dr_standalone_init(); + ASSERT(!dr_running_under_dynamorio()); + +#ifdef X86_64 + test_instr_create_encode_decode_synthetic_x86_64(dcontext); +#endif + +#ifdef ARM + test_instr_create_encode_decode_synthetic_arm(dcontext); +#endif + +#ifdef AARCH64 + test_instr_create_encode_decode_synthetic_aarch64(dcontext); +#endif + +#ifdef RISCV64 + test_instr_create_encode_decode_synthetic_riscv64(dcontext); +#endif + + print("All DR_ISA_REGDEPS tests are done.\n"); + dr_standalone_exit(); + return 0; +} diff --git a/suite/tests/api/ir_regdeps.expect b/suite/tests/api/ir_regdeps.expect new file mode 100644 index 00000000000..c430108c53a --- /dev/null +++ b/suite/tests/api/ir_regdeps.expect @@ -0,0 +1 @@ +All DR_ISA_REGDEPS tests are done.