From c26ce09343b083e7f260d72e2c7eb9c7d2ba07d2 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 23 Apr 2024 21:53:02 +0800 Subject: [PATCH] Make sure allocated buffers live through GC When allocating an `RMatch` instance, make sure the buffers allocated for `registers.{beg,end}` fields are properly rooted so that they live through GC if allocating `RMatch` triggers a GC. Fixes: https://github.com/mmtk/ruby/issues/67 --- re.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ regexec.c | 21 +++++++++++++++++---- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/re.c b/re.c index 202425756ec3a3..b315e58e2b15a6 100644 --- a/re.c +++ b/re.c @@ -1122,6 +1122,16 @@ match_init_copy(VALUE obj, VALUE orig) RB_OBJ_WRITE(obj, &RMATCH(obj)->regexp, RMATCH(orig)->regexp); rm = RMATCH_EXT(obj); + +#if USE_MMTK + if (rb_mmtk_enabled_p()) { + // The rb_reg_region_copy below may write to `obj` (`rm->registers.{beg,end}`). + // We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE` + // above executes the same object-remembering operation. + rb_gc_writebarrier_remember(obj); + } +#endif + if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig))) rb_memerror(); @@ -1501,6 +1511,15 @@ match_set_string(VALUE m, VALUE string, long pos, long len) RB_OBJ_WRITE(match, &RMATCH(match)->str, string); RB_OBJ_WRITE(match, &RMATCH(match)->regexp, Qnil); + +#if USE_MMTK + if (rb_mmtk_enabled_p()) { + // The onig_region_resize below may write to `m` (`rmatch->registers.{beg,end}`). + // We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE` + // above executes the same object-remembering operation. + rb_gc_writebarrier_remember(m); + } +#endif int err = onig_region_resize(&rmatch->regs, 1); if (err) rb_memerror(); rmatch->regs.beg[0] = pos; @@ -1825,10 +1844,35 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back return ONIG_MISMATCH; } +#if USE_MMTK + VALUE root_beg; + VALUE root_end; + if (rb_mmtk_enabled_p()) { + // When using MMTk, the `beg` and `end` fields of `re_registers` point to heap objects, + // but are interior pointers. The conservative stack scanner will not recognize interior + // pointers as object references. We compute the pointers to the beginning of those + // objects and use RB_GC_GUARD to keep them on the stack so that even if the `match_alloc` + // invocation triggers GC, the `beg` and `end` will still be kept alive. + root_beg = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.beg); + root_end = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.end); + } else { + root_beg = root_end = Qnil; + } +#endif + + // MMTk note: `match_alloc` may trigger GC. VALUE match = match_alloc(rb_cMatch); rb_matchext_t *rm = RMATCH_EXT(match); rm->regs = args.regs; +#if USE_MMTK + // Guard `root_beg` and `root_end` until here. Now that `args.regs` has been assigned to a + // field of `match`, the conservative stack scanner will pick up the `match` variable, and + // `gc_mark_children` will take care of the interior pointers when scanning the `T_MATCH`. + RB_GC_GUARD(root_beg); + RB_GC_GUARD(root_end); +#endif + if (set_backref_str) { RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str)); } diff --git a/regexec.c b/regexec.c index 6de6444b28a74e..1044bc6e8ed820 100644 --- a/regexec.c +++ b/regexec.c @@ -889,9 +889,10 @@ onig_region_clear(OnigRegion* region) #if USE_MMTK static OnigPosition* -rb_mmtk_onig_position_array_alloc(size_t len) +rb_mmtk_onig_position_array_alloc(size_t len, VALUE *root_var) { rb_mmtk_strbuf_t *strbuf = rb_mmtk_new_strbuf(len * sizeof(OnigPosition)); + *root_var = (VALUE)strbuf; OnigPosition *result = (OnigPosition*)rb_mmtk_strbuf_to_chars(strbuf); return result; } @@ -931,13 +932,25 @@ onig_region_resize(OnigRegion* region, int n) } #if USE_MMTK } else { - OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n); - OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n); - // TODO: Use write barrier when writing these fields. + VALUE root_beg; + VALUE root_end; + OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n, &root_beg); // May trigger GC. + OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n, &root_end); // May trigger GC. + + // About write barriers: The following assignments may write to fields of `RMatch` or local + // variables on the stack, depending on the call site. This file `regexec.c` knows nothing + // about the Ruby-level `RMatch` object, so we can't apply write barriers here. The caller + // should apply `rb_gc_writebarrier_remember` to the `RMatch` object if applicable. region->beg = new_beg; region->end = new_end; + // Currently MMTk panicks when out of memory. We can customize the OOM handling in the // `mmtk-ruby` binding so that it returns NULL instead. + + // Prevent new_beg and `new_end from being collected. Actually we only need to guard + // `root_beg`, but we do the same for `root_end` just for consistency. + RB_GC_GUARD(root_beg); + RB_GC_GUARD(root_end); } #endif