From 8c441586503b8d424eaffe1c9152bd1a6f66d3c0 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 4 Apr 2024 20:23:06 -0400 Subject: [PATCH 1/4] Fix AArch64 fp reg state save/restore Fixes the slot used to save and restore FP regs at fcache enter and return events. PR #6725 adjusted the slots used during signal handling in core/unix/signal_linux_aarch64.c but did not adjust the same in fcache enter/exit and attach events. Prior to that the FP regs were simply stored in a continuous manner in signal handling code, and fcache enter/exit routines. The mismatch between slot usage caused failueres in the signalNNN1 tests on some systems. --- core/arch/aarch64/aarch64.asm | 48 ++++++++++++++++++++++++++-------- core/arch/aarch64/emit_utils.c | 29 ++++++++++++-------- 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/core/arch/aarch64/aarch64.asm b/core/arch/aarch64/aarch64.asm index bc8ec7af5f4..7d770c2dfe1 100644 --- a/core/arch/aarch64/aarch64.asm +++ b/core/arch/aarch64/aarch64.asm @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2019-2023 Google, Inc. All rights reserved. + * Copyright (c) 2019-2024 Google, Inc. All rights reserved. * Copyright (c) 2016 ARM Limited. All rights reserved. * **********************************************************/ @@ -226,16 +226,42 @@ save_priv_mcontext_helper: str w2, [x0, #(16 * ARG_SZ*2 + 12)] str w3, [x0, #(16 * ARG_SZ*2 + 16)] add x4, x0, #simd_OFFSET - st1 {v0.2d-v3.2d}, [x4], #64 - st1 {v4.2d-v7.2d}, [x4], #64 - st1 {v8.2d-v11.2d}, [x4], #64 - st1 {v12.2d-v15.2d}, [x4], #64 - st1 {v16.2d-v19.2d}, [x4], #64 - st1 {v20.2d-v23.2d}, [x4], #64 - st1 {v24.2d-v27.2d}, [x4], #64 - st1 {v28.2d-v31.2d}, [x4], #64 - /* TODO i#5365: Save Z/P regs as well? Will require runtime check of - * ID_AA64PFR0_EL1 for FEAT_SVE. + + /* Registers Q0-Q31 map directly to registers V0-V31. */ + str q0, [x4], #64 + str q1, [x4], #64 + str q2, [x4], #64 + str q3, [x4], #64 + str q4, [x4], #64 + str q5, [x4], #64 + str q6, [x4], #64 + str q7, [x4], #64 + str q8, [x4], #64 + str q9, [x4], #64 + str q10, [x4], #64 + str q11, [x4], #64 + str q12, [x4], #64 + str q13, [x4], #64 + str q14, [x4], #64 + str q15, [x4], #64 + str q16, [x4], #64 + str q17, [x4], #64 + str q18, [x4], #64 + str q19, [x4], #64 + str q20, [x4], #64 + str q21, [x4], #64 + str q22, [x4], #64 + str q23, [x4], #64 + str q24, [x4], #64 + str q25, [x4], #64 + str q26, [x4], #64 + str q27, [x4], #64 + str q28, [x4], #64 + str q29, [x4], #64 + str q30, [x4], #64 + str q31, [x4], #64 + /* TODO i#5365, i#5036: Save Z/P regs as well? Will require runtime + * check of ID_AA64PFR0_EL1 for FEAT_SVE. */ ret diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index ba5066ee0ad..d8304cd919c 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2014-2021 Google, Inc. All rights reserved. + * Copyright (c) 2014-2024 Google, Inc. All rights reserved. * Copyright (c) 2016 ARM Limited. All rights reserved. * **********************************************************/ @@ -567,15 +567,18 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - for (i = 0; i < 32; i += 2) { - /* ldp q(i), q(i + 1), [x1, #(i * 16)] */ + ASSERT(sizeof(dr_simd_t) == 64); + for (i = 0; i < 32; i ++) { + /* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */ APP(ilist, - INSTR_CREATE_ldp( + INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_Q0 + i), - opnd_create_reg(DR_REG_Q0 + i + 1), - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32))); + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16))); } if (proc_has_feature(FEATURE_SVE)) { + /* TODO i#5036: Ensure that the following uses the correct offset for the + * dr_simd_t element corresponding to each vector reg. + */ for (i = 0; i < 32; i++) { /* ldr z(i), [x1, #(i mul vl)] * From the SVE manual: @@ -778,20 +781,24 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - for (i = 0; i < 32; i += 2) { - /* stp q(i), q(i + 1), [x1, #(i * 16)] + ASSERT(sizeof(dr_simd_t) == 64); + for (i = 0; i < 32; i++) { + /* str q(i), [x1, #(i * sizeof(dr_simd_t))] * From the AArch64 manual: * "The signed immediate byte offset is a multiple of 16 in the range * -1024 to 1008, defaulting to 0 and encoded in the imm7 field as * /16." */ APP(ilist, - INSTR_CREATE_stp( + INSTR_CREATE_str( dcontext, - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32), - opnd_create_reg(DR_REG_Q0 + i), opnd_create_reg(DR_REG_Q0 + i + 1))); + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16), + opnd_create_reg(DR_REG_Q0 + i))); } if (proc_has_feature(FEATURE_SVE)) { + /* TODO i#5036: Ensure that the following uses the correct offset for the + * dr_simd_t element corresponding to each vector reg. + */ for (i = 0; i < 32; i++) { /* str z(i), [x1, #(i mul vl)] * "Store a vector register to a memory address generated by a From 257704fae1fbd3be091ee5ebc3ad253507e6b2dc Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 4 Apr 2024 20:26:17 -0400 Subject: [PATCH 2/4] Clang format --- core/arch/aarch64/emit_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index d8304cd919c..395da96f2c8 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -568,7 +568,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); ASSERT(sizeof(dr_simd_t) == 64); - for (i = 0; i < 32; i ++) { + for (i = 0; i < 32; i++) { /* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */ APP(ilist, INSTR_CREATE_ldr( From 73d3f366e856e62bd742abd9f856ab5c121c0cad Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 4 Apr 2024 23:59:25 -0400 Subject: [PATCH 3/4] Fix spill slot used for sve --- core/arch/aarch64/emit_utils.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index 395da96f2c8..61d8a637caf 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -567,18 +567,14 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - ASSERT(sizeof(dr_simd_t) == 64); for (i = 0; i < 32; i++) { /* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */ APP(ilist, - INSTR_CREATE_ldr( - dcontext, opnd_create_reg(DR_REG_Q0 + i), - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16))); + INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_Q0 + i), + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, + i * sizeof(dr_simd_t), OPSZ_16))); } if (proc_has_feature(FEATURE_SVE)) { - /* TODO i#5036: Ensure that the following uses the correct offset for the - * dr_simd_t element corresponding to each vector reg. - */ for (i = 0; i < 32; i++) { /* ldr z(i), [x1, #(i mul vl)] * From the SVE manual: @@ -591,7 +587,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_Z0 + i), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes())))); } /* add x1, x(dcxt), #(offset svep) */ @@ -608,8 +604,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P0 + i), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, - i * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } /* There is no load instruction for the first-fault register (FFR). Use @@ -781,7 +776,6 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); - ASSERT(sizeof(dr_simd_t) == 64); for (i = 0; i < 32; i++) { /* str q(i), [x1, #(i * sizeof(dr_simd_t))] * From the AArch64 manual: @@ -790,15 +784,12 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) * /16." */ APP(ilist, - INSTR_CREATE_str( - dcontext, - opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16), - opnd_create_reg(DR_REG_Q0 + i))); + INSTR_CREATE_str(dcontext, + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, + i * sizeof(dr_simd_t), OPSZ_16), + opnd_create_reg(DR_REG_Q0 + i))); } if (proc_has_feature(FEATURE_SVE)) { - /* TODO i#5036: Ensure that the following uses the correct offset for the - * dr_simd_t element corresponding to each vector reg. - */ for (i = 0; i < 32; i++) { /* str z(i), [x1, #(i mul vl)] * "Store a vector register to a memory address generated by a @@ -810,7 +801,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_str( dcontext, opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes())), opnd_create_reg(DR_REG_Z0 + i))); } @@ -825,8 +816,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_str( dcontext, opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, - i * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)), opnd_create_reg(DR_REG_P0 + i))); } From 89e0698ac59b5b00cbe7e88e7f0be236ca6a4d72 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Fri, 5 Apr 2024 06:11:14 -0400 Subject: [PATCH 4/4] update other instances --- core/arch/aarch64/emit_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index 61d8a637caf..cd7656f950f 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -631,7 +631,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P15), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } } @@ -845,7 +845,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) INSTR_CREATE_ldr( dcontext, opnd_create_reg(DR_REG_P15), opnd_create_base_disp( - DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t), opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); } }