Skip to content

Commit

Permalink
Make sure allocated buffers live through GC
Browse files Browse the repository at this point in the history
When allocating an `RMatch` instance, make sure the buffers allocated
for `registers.{beg,end}` fields are properly rooted so that they live
through GC if allocating `RMatch` triggers a GC.
  • Loading branch information
wks committed Apr 23, 2024
1 parent 83e1407 commit 767b301
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 4 deletions.
44 changes: 44 additions & 0 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,16 @@ match_init_copy(VALUE obj, VALUE orig)
RB_OBJ_WRITE(obj, &RMATCH(obj)->regexp, RMATCH(orig)->regexp);

rm = RMATCH_EXT(obj);

#if USE_MMTK
if (rb_mmtk_enabled_p()) {
// The rb_reg_region_copy below may write to `obj` (`rm->registers.{beg,end}`).
// We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE`
// above executes the same object-remembering operation.
rb_gc_writebarrier_remember(obj);
}
#endif

if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
rb_memerror();

Expand Down Expand Up @@ -1501,6 +1511,15 @@ match_set_string(VALUE m, VALUE string, long pos, long len)

RB_OBJ_WRITE(match, &RMATCH(match)->str, string);
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, Qnil);

#if USE_MMTK
if (rb_mmtk_enabled_p()) {
// The onig_region_resize below may write to `m` (`rmatch->registers.{beg,end}`).
// We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE`
// above executes the same object-remembering operation.
rb_gc_writebarrier_remember(m);
}
#endif
int err = onig_region_resize(&rmatch->regs, 1);
if (err) rb_memerror();
rmatch->regs.beg[0] = pos;
Expand Down Expand Up @@ -1825,10 +1844,35 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return ONIG_MISMATCH;
}

#if USE_MMTK
VALUE root_beg;
VALUE root_end;
if (rb_mmtk_enabled_p()) {
// When using MMTk, the `beg` and `end` fields of `re_registers` point to heap objects,
// but are interior pointers. The conservative stack scanner will not recognize interior
// pointers as object references. We compute the pointers to the beginning of those
// objects and use RB_GC_GUARD to keep them on the stack so that even if the `match_alloc`
// invocation triggers GC, the `beg` and `end` will still be kept alive.
root_beg = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.beg);
root_end = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.end);
} else {
root_beg = root_end = Qnil;
}
#endif

// MMTk note: `match_alloc` may trigger GC.
VALUE match = match_alloc(rb_cMatch);
rb_matchext_t *rm = RMATCH_EXT(match);
rm->regs = args.regs;

#if USE_MMTK
// Guard `root_beg` and `root_end` until here. Now that `args.regs` has been assigned to a
// field of `match`, the conservative stack scanner will pick up the `match` variable, and
// `gc_mark_children` will take care of the interior pointers when scanning the `T_MATCH`.
RB_GC_GUARD(root_beg);
RB_GC_GUARD(root_end);
#endif

if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
}
Expand Down
21 changes: 17 additions & 4 deletions regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,9 +889,10 @@ onig_region_clear(OnigRegion* region)

#if USE_MMTK
static OnigPosition*
rb_mmtk_onig_position_array_alloc(size_t len)
rb_mmtk_onig_position_array_alloc(size_t len, VALUE *root_var)
{
rb_mmtk_strbuf_t *strbuf = rb_mmtk_new_strbuf(len * sizeof(OnigPosition));
*root_var = (VALUE)strbuf;
OnigPosition *result = (OnigPosition*)rb_mmtk_strbuf_to_chars(strbuf);
return result;
}
Expand Down Expand Up @@ -931,13 +932,25 @@ onig_region_resize(OnigRegion* region, int n)
}
#if USE_MMTK
} else {
OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n);
OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n);
// TODO: Use write barrier when writing these fields.
VALUE root_beg;
VALUE root_end;
OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n, &root_beg); // May trigger GC.
OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n, &root_end); // May trigger GC.

// About write barriers: The following assignments may write to fields of `RMatch` or local
// variables on the stack, depending on the call site. This file `regexec.c` knows nothing
// about the Ruby-level `RMatch` object, so we can't apply write barriers here. The caller
// should apply `rb_gc_writebarrier_remember` to the `RMatch` object if applicable.
region->beg = new_beg;
region->end = new_end;

// Currently MMTk panicks when out of memory. We can customize the OOM handling in the
// `mmtk-ruby` binding so that it returns NULL instead.

// Prevent new_beg and `new_end from being collected. Actually we only need to guard
// `root_beg`, but we do the same for `root_end` just for consistency.
RB_GC_GUARD(root_beg);
RB_GC_GUARD(root_end);
}
#endif

Expand Down

0 comments on commit 767b301

Please sign in to comment.