From a0b86cc401ca4f50ebe6545d5349ecb34ed3a822 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Fri, 5 Apr 2024 12:24:14 -0400 Subject: [PATCH] i#6758: Fix AArch64 FP state at fcache events and attach (#6757) Fixes the slot used to save and restore FP regs at fcache enter and return events. PR #6725 adjusted the slots used during signal handling in core/unix/signal_linux_aarch64.c but did not adjust the same in fcache enter/return and attach events. Prior to that PR, the FP regs were simply stored in a contiguous manner in signal handling code and fcache enter/return routines (instead of in their respective dr_simd_t struct member), which was a bit confusing. The mismatch between slot usage in signal handling and fcache enter/return code caused failures in the signalNNN1 tests on some systems. Verified that those tests pass with this fix. Also fixes the same issue in save_priv_mcontext_helper which is used in the dr_app_start API. Unit tests for this scenario will be added as part of #6759. Issue: #5036, #6755, #5365 Fixes #6758 --- core/arch/aarch64/aarch64.asm | 48 ++++++++++++++++++++++++++-------- core/arch/aarch64/emit_utils.c | 39 +++++++++++++-------------- 2 files changed, 55 insertions(+), 32 deletions(-) diff --git a/core/arch/aarch64/aarch64.asm b/core/arch/aarch64/aarch64.asm index bc8ec7af5f4..7d770c2dfe1 100644 --- a/core/arch/aarch64/aarch64.asm +++ b/core/arch/aarch64/aarch64.asm @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2019-2023 Google, Inc. All rights reserved. + * Copyright (c) 2019-2024 Google, Inc. All rights reserved. * Copyright (c) 2016 ARM Limited. All rights reserved. * **********************************************************/ @@ -226,16 +226,42 @@ save_priv_mcontext_helper: str w2, [x0, #(16 * ARG_SZ*2 + 12)] str w3, [x0, #(16 * ARG_SZ*2 + 16)] add x4, x0, #simd_OFFSET - st1 {v0.2d-v3.2d}, [x4], #64 - st1 {v4.2d-v7.2d}, [x4], #64 - st1 {v8.2d-v11.2d}, [x4], #64 - st1 {v12.2d-v15.2d}, [x4], #64 - st1 {v16.2d-v19.2d}, [x4], #64 - st1 {v20.2d-v23.2d}, [x4], #64 - st1 {v24.2d-v27.2d}, [x4], #64 - st1 {v28.2d-v31.2d}, [x4], #64 - /* TODO i#5365: Save Z/P regs as well? Will require runtime check of - * ID_AA64PFR0_EL1 for FEAT_SVE. + + /* Registers Q0-Q31 map directly to registers V0-V31. */ + str q0, [x4], #64 + str q1, [x4], #64 + str q2, [x4], #64 + str q3, [x4], #64 + str q4, [x4], #64 + str q5, [x4], #64 + str q6, [x4], #64 + str q7, [x4], #64 + str q8, [x4], #64 + str q9, [x4], #64 + str q10, [x4], #64 + str q11, [x4], #64 + str q12, [x4], #64 + str q13, [x4], #64 + str q14, [x4], #64 + str q15, [x4], #64 + str q16, [x4], #64 + str q17, [x4], #64 + str q18, [x4], #64 + str q19, [x4], #64 + str q20, [x4], #64 + str q21, [x4], #64 + str q22, [x4], #64 + str q23, [x4], #64 + str q24, [x4], #64 + str q25, [x4], #64 + str q26, [x4], #64 + str q27, [x4], #64 + str q28, [x4], #64 + str q29, [x4], #64 + str q30, [x4], #64 + str q31, [x4], #64 + /* TODO i#5365, i#5036: Save Z/P regs as well? Will require runtime + * check of ID_AA64PFR0_EL1 for FEAT_SVE. */ ret diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index ba5066ee0ad..cd7656f950f 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2014-2021 Google, Inc. All rights reserved. + * Copyright (c) 2014-2024 Google, Inc. All rights reserved. * Copyright (c) 2016 ARM Limited. All rights reserved. * **********************************************************/ @@ -567,13 +567,12 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - for (i = 0; i < 32; i += 2) { - /* ldp q(i), q(i + 1), [x1, #(i * 16)] */ + for (i = 0; i < 32; i++) { + /* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */ APP(ilist, - INSTR_CREATE_ldp( - dcontext, opnd_create_reg(DR_REG_Q0 + i), - opnd_create_reg(DR_REG_Q0 + i + 1), - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32))); + INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_Q0 + i), + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, + i * sizeof(dr_simd_t), OPSZ_16))); } if (proc_has_feature(FEATURE_SVE)) { for (i = 0; i < 32; i++) { @@ -588,7 +587,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_Z0 + i), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes())))); } /* add x1, x(dcxt), #(offset svep) */ @@ -605,8 +604,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P0 + i), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, - i * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } /* There is no load instruction for the first-fault register (FFR). Use @@ -633,7 +631,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P15), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } } @@ -778,18 +776,18 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - for (i = 0; i < 32; i += 2) { - /* stp q(i), q(i + 1), [x1, #(i * 16)] + for (i = 0; i < 32; i++) { + /* str q(i), [x1, #(i * sizeof(dr_simd_t))] * From the AArch64 manual: * "The signed immediate byte offset is a multiple of 16 in the range * -1024 to 1008, defaulting to 0 and encoded in the imm7 field as * /16." */ APP(ilist, - INSTR_CREATE_stp( - dcontext, - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32), - opnd_create_reg(DR_REG_Q0 + i), opnd_create_reg(DR_REG_Q0 + i + 1))); + INSTR_CREATE_str(dcontext, + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, + i * sizeof(dr_simd_t), OPSZ_16), + opnd_create_reg(DR_REG_Q0 + i))); } if (proc_has_feature(FEATURE_SVE)) { for (i = 0; i < 32; i++) { @@ -803,7 +801,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_str( dcontext, opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes())), opnd_create_reg(DR_REG_Z0 + i))); } @@ -818,8 +816,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_str( dcontext, opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, - i * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)), opnd_create_reg(DR_REG_P0 + i))); } @@ -848,7 +845,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P15), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } }