From da5e57a3cf1a5969a6d5b05cf371f613d21f42f7 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 23 Aug 2024 17:58:08 +0800 Subject: [PATCH] Support heap traversal We use the object enumeration feature in mmtk-core to implement heap traversal. This enables `ObjectSpace.each_object` as well as the `TracePoint` utility which needs heap traversal to enumerate objects and set hooks. Test cases involving each_object and TracePoint are no longer excluded. The TestTracepointObj test case remains excluded because we have not implemented querying GC statistics from TracePoint. --- gc/default.c | 96 +++++++++++++++++++++ internal/mmtk.h | 6 ++ internal/mmtk_support.h | 1 + mmtk_support.c | 7 ++ test/.excludes-mmtk/TestISeq.rb | 6 -- test/.excludes-mmtk/TestObjectSpace.rb | 3 - test/.excludes-mmtk/TestRubyOptimization.rb | 1 - test/.excludes-mmtk/TestSetTraceFunc.rb | 1 - test/.excludes-mmtk/TestTracepointObj.rb | 2 +- 9 files changed, 111 insertions(+), 12 deletions(-) delete mode 100644 test/.excludes-mmtk/TestISeq.rb delete mode 100644 test/.excludes-mmtk/TestObjectSpace.rb delete mode 100644 test/.excludes-mmtk/TestRubyOptimization.rb delete mode 100644 test/.excludes-mmtk/TestSetTraceFunc.rb diff --git a/gc/default.c b/gc/default.c index 85d9219aae837b..da61f9dc36df14 100644 --- a/gc/default.c +++ b/gc/default.c @@ -3207,9 +3207,105 @@ objspace_each_objects(rb_objspace_t *objspace, each_obj_callback *callback, void objspace_each_exec(protected, &each_obj_data); } +#if USE_MMTK +struct rb_mmtk_build_obj_array_data { + VALUE **array_ptr; + size_t len; + size_t capa; +}; + +void +rb_mmtk_build_obj_array_i(MMTk_ObjectReference object, void *data) +{ + struct rb_mmtk_build_obj_array_data *build_array_data = (struct rb_mmtk_build_obj_array_data*)data; + VALUE *array = *build_array_data->array_ptr; + size_t len = build_array_data->len; + size_t capa = build_array_data->capa; + if (len == capa) { + size_t new_capa = capa * 2; + VALUE *new_array = (VALUE*)realloc(array, sizeof(VALUE) * new_capa); + *build_array_data->array_ptr = new_array; + build_array_data->capa = new_capa; + array = new_array; + } + + RUBY_ASSERT(build_array_data->len < build_array_data->capa); + + array[len] = (VALUE)object; + build_array_data->len = len + 1; +} + +void +rb_mmtk_each_objects_safe(each_obj_callback *callback, void *data) +{ + // Allocate a tmpbuf object. It's OK if it triggers GC now. + volatile VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + + // Build an array of object references. + const size_t initial_capacity = 512; + // We must not trigger GC while running `mmtk_enumerate_objects`, + // so we use `malloc` directly. + // It will be realloced as we add more objects. + VALUE *array = (VALUE*)malloc(sizeof(VALUE) * initial_capacity); + struct rb_mmtk_build_obj_array_data build_array_data = { + .array_ptr = &array, + .len = 0, + .capa = initial_capacity, + }; + + // No GC from now on. + mmtk_enumerate_objects(rb_mmtk_build_obj_array_i, &build_array_data); + + // Root the array. + rb_imemo_tmpbuf_set_ptr(tmpbuf, array); + ((rb_imemo_tmpbuf_t*)tmpbuf)->cnt = build_array_data.len; + // GC is OK from now on. + + // Inform the VM about malloc memory usage. + // Since elements of `array` are rooted by `tmpbuf`, it is safe to trigger GC. + // The GC won't free any object because we have just rooted every object. + // But the GC may adjust the threshold for triggering the next GC. + rb_gc_adjust_memory_usage(sizeof(VALUE) * build_array_data.capa); + + RUBY_DEBUG_LOG("Begin enumerating %zu objects\n", build_array_data.len); + + // Now enumerate objects. + // If GC is triggered in `callback`, `tmpbuf` will keep elements of `array` alive. + for (size_t i = 0; i < build_array_data.len; i++) { + volatile VALUE object = array[i]; + size_t object_size = rb_mmtk_get_object_size(object); + uintptr_t object_end = object + object_size; + + RUBY_DEBUG_LOG("Enumerating object: %p\n", (void*)object); + callback((void*)object, (void*)object_end, object_size, data); + RB_GC_GUARD(object); + + // Clear the element so that it no longer pins the object if it dies. + array[i] = 0; + } + + RUBY_DEBUG_LOG("End enumerating %zu objects\n", build_array_data.len); + + // Explicitly free `array` because we know it is no longer used. + // Don't wait for GC to free it because `free()` is a bottleneck during GC. + // Adjust memory usage accordingly. + rb_imemo_tmpbuf_set_ptr(tmpbuf, NULL); + ((rb_imemo_tmpbuf_t*)tmpbuf)->cnt = 0; + free(array); + rb_gc_adjust_memory_usage(-(ssize_t)(sizeof(VALUE) * build_array_data.capa)); + + RB_GC_GUARD(tmpbuf); +} +#endif + void rb_gc_impl_each_objects(void *objspace_ptr, each_obj_callback *callback, void *data) { + WHEN_USING_MMTK({ + rb_mmtk_each_objects_safe(callback, data); + return; + }) + objspace_each_objects(objspace_ptr, callback, data, TRUE); } diff --git a/internal/mmtk.h b/internal/mmtk.h index 0bc2621bfedbf6..6231694c8f99fa 100644 --- a/internal/mmtk.h +++ b/internal/mmtk.h @@ -254,4 +254,10 @@ bool mmtk_is_object_wb_unprotected(MMTk_ObjectReference object); void mmtk_object_reference_write_post(MMTk_Mutator *mutator, MMTk_ObjectReference object); +/** + * Enumerate objects. This function will call `callback(object, data)` for each object. It has + * undefined behavior if allocation or GC happens while this function is running. + */ +void mmtk_enumerate_objects(void (*callback)(MMTk_ObjectReference, void*), void *data); + #endif /* MMTK_H */ diff --git a/internal/mmtk_support.h b/internal/mmtk_support.h index 9101ee4f8eb9bf..358f8c47c86678 100644 --- a/internal/mmtk_support.h +++ b/internal/mmtk_support.h @@ -49,6 +49,7 @@ void rb_mmtk_destroy_mutator(MMTk_VMMutatorThread cur_thread, bool at_fork); // Object layout size_t rb_mmtk_prefix_size(void); size_t rb_mmtk_suffix_size(void); +size_t rb_mmtk_get_object_size(VALUE object); // Allocation VALUE rb_mmtk_alloc_obj(size_t mmtk_alloc_size, size_t size_pool_size, size_t prefix_size); diff --git a/mmtk_support.c b/mmtk_support.c index 69bb12b2134962..e50799787305b2 100644 --- a/mmtk_support.c +++ b/mmtk_support.c @@ -395,6 +395,13 @@ rb_mmtk_suffix_size(void) return ruby_binding_options.suffix_size; } +size_t +rb_mmtk_get_object_size(VALUE object) +{ + return *(size_t*)(object - sizeof(VALUE)); +} + + //////////////////////////////////////////////////////////////////////////////// // Allocation //////////////////////////////////////////////////////////////////////////////// diff --git a/test/.excludes-mmtk/TestISeq.rb b/test/.excludes-mmtk/TestISeq.rb deleted file mode 100644 index f177fce8757ba7..00000000000000 --- a/test/.excludes-mmtk/TestISeq.rb +++ /dev/null @@ -1,6 +0,0 @@ -exclude(:test_to_binary_line_tracepoint, "tracepoints not implemented") -exclude(:test_to_binary_class_tracepoint, "tracepoints not implemented") -exclude(:test_to_binary_end_tracepoint, "tracepoints not implemented") -exclude(:test_to_binary_return_tracepoint, "tracepoints not implemented") -exclude(:test_to_binary_b_call_tracepoint, "tracepoints not implemented") -exclude(:test_to_binary_b_return_tracepoint, "tracepoints not implemented") diff --git a/test/.excludes-mmtk/TestObjectSpace.rb b/test/.excludes-mmtk/TestObjectSpace.rb deleted file mode 100644 index abe43e10423f37..00000000000000 --- a/test/.excludes-mmtk/TestObjectSpace.rb +++ /dev/null @@ -1,3 +0,0 @@ -exclude(:test_each_object_singleton_class, "obj space not implemented") -exclude(:test_each_object_enumerator, "obj space not implemented") -exclude(:test_each_object, "obj space not implemented") diff --git a/test/.excludes-mmtk/TestRubyOptimization.rb b/test/.excludes-mmtk/TestRubyOptimization.rb deleted file mode 100644 index 805256dfd7ac84..00000000000000 --- a/test/.excludes-mmtk/TestRubyOptimization.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(:test_trace_optimized_methods, "tracepoints not supported") diff --git a/test/.excludes-mmtk/TestSetTraceFunc.rb b/test/.excludes-mmtk/TestSetTraceFunc.rb deleted file mode 100644 index e62554b38e1b0b..00000000000000 --- a/test/.excludes-mmtk/TestSetTraceFunc.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(/test_/, "tracepoints are not supported") diff --git a/test/.excludes-mmtk/TestTracepointObj.rb b/test/.excludes-mmtk/TestTracepointObj.rb index e62554b38e1b0b..2e6970d2490f3a 100644 --- a/test/.excludes-mmtk/TestTracepointObj.rb +++ b/test/.excludes-mmtk/TestTracepointObj.rb @@ -1 +1 @@ -exclude(/test_/, "tracepoints are not supported") +exclude(/test_/, "tracepoints does not supported GC stats yet")