From fe1772df5207119125f7b600636923f345d4d3f8 Mon Sep 17 00:00:00 2001 From: Luis Eduardo de Souza Amorim Date: Thu, 9 May 2024 00:06:58 +0000 Subject: [PATCH 1/8] Processing gc preserve regions differently, transitively pinning roots from them --- src/gc.c | 10 ++++++ src/jl_exported_funcs.inc | 2 ++ src/julia.h | 4 +++ src/llvm-alloc-opt.cpp | 58 ++++++++++++++++---------------- src/llvm-final-gc-lowering.cpp | 12 ++++--- src/llvm-late-gc-lowering.cpp | 60 ++++++++++++++++++++++++++++++++-- src/llvm-pass-helpers.cpp | 30 +++++++++++++++++ src/llvm-pass-helpers.h | 4 +++ src/mmtk-gc.c | 43 ++++++++++++++++++++++-- 9 files changed, 186 insertions(+), 37 deletions(-) diff --git a/src/gc.c b/src/gc.c index a8b032a540c24..5d6360d2280c3 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3423,6 +3423,16 @@ JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT jl_unreachable(); } +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT { jl_unreachable(); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 32ec4866792f8..094e69b976eaa 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -191,6 +191,8 @@ XX(jl_gc_pool_alloc_instrumented) \ XX(jl_gc_queue_multiroot) \ XX(jl_gc_queue_root) \ + XX(jl_gc_preserve_begin_hook) \ + XX(jl_gc_preserve_end_hook) \ XX(jl_gc_wb1_noinline) \ XX(jl_gc_wb2_noinline) \ XX(jl_gc_wb_binding_noinline) \ diff --git a/src/julia.h b/src/julia.h index 1c4cef4886901..16a52f92a0ad1 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2082,6 +2082,10 @@ typedef struct _jl_task_t { int8_t threadpoolid; // saved gc stack top for context switches jl_gcframe_t *gcstack; +#ifdef MMTK_GC + // GC stack of objects that need to be transitively pinned + jl_gcframe_t *tpin_gcstack; +#endif size_t world_age; // quick lookup for current ptls jl_ptls_t ptls; // == jl_all_tls_states[tid] diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index c04a5cd3af625..3c3da0e83f52e 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -55,22 +55,22 @@ namespace { static void removeGCPreserve(CallInst *call, Instruction *val) { - ++RemovedGCPreserve; - auto replace = Constant::getNullValue(val->getType()); - call->replaceUsesOfWith(val, replace); - call->setAttributes(AttributeList()); - for (auto &arg: call->args()) { - if (!isa(arg.get())) { - return; - } - } - while (!call->use_empty()) { - auto end = cast(*call->user_begin()); - // gc_preserve_end returns void. - assert(end->use_empty()); - end->eraseFromParent(); - } - call->eraseFromParent(); + // ++RemovedGCPreserve; + // auto replace = Constant::getNullValue(val->getType()); + // call->replaceUsesOfWith(val, replace); + // call->setAttributes(AttributeList()); + // for (auto &arg: call->args()) { + // if (!isa(arg.get())) { + // return; + // } + // } + // while (!call->use_empty()) { + // auto end = cast(*call->user_begin()); + // // gc_preserve_end returns void. + // assert(end->use_empty()); + // end->eraseFromParent(); + // } + // call->eraseFromParent(); } /** @@ -653,15 +653,15 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) return; } // Also remove the preserve intrinsics so that it can be better optimized. - if (pass.gc_preserve_begin_func == callee) { - if (has_ref) { - call->replaceUsesOfWith(orig_i, buff); - } - else { - removeGCPreserve(call, orig_i); - } - return; - } + // if (pass.gc_preserve_begin_func == callee) { + // if (has_ref) { + // call->replaceUsesOfWith(orig_i, buff); + // } + // else { + // removeGCPreserve(call, orig_i); + // } + // return; + // } if (pass.write_barrier_func == callee || pass.write_barrier_binding_func == callee) { ++RemovedWriteBarriers; @@ -761,10 +761,10 @@ void Optimizer::removeAlloc(CallInst *orig_inst) } else if (auto call = dyn_cast(user)) { auto callee = call->getCalledOperand(); - if (pass.gc_preserve_begin_func == callee) { - removeGCPreserve(call, orig_i); - return; - } + // if (pass.gc_preserve_begin_func == callee) { + // removeGCPreserve(call, orig_i); + // return; + // } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 990bd92f3b499..74e69d9d6fa9f 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -52,6 +52,8 @@ struct FinalLowerGC: private JuliaPassContext { Function *bigAllocFunc; Function *allocTypedFunc; #ifdef MMTK_GC + Function *gcPreserveBeginHookFunc; + Function *gcPreserveEndHookFunc; Function *writeBarrier1Func; Function *writeBarrier2Func; Function *writeBarrierBindingFunc; @@ -145,7 +147,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) IRBuilder<> builder(target->getContext()); builder.SetInsertPoint(&*(++BasicBlock::iterator(target))); StoreInst *inst = builder.CreateAlignedStore( - ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS(nRoots)), + ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS_NO_TPIN(nRoots)), builder.CreateBitCast( builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0), getSizeTy(F.getContext())->getPointerTo()), @@ -407,12 +409,14 @@ bool FinalLowerGC::doInitialization(Module &M) { bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); #ifdef MMTK_GC + gcPreserveBeginHookFunc = getOrDeclare(jl_well_known::GCPreserveBeginHook); + gcPreserveEndHookFunc = getOrDeclare(jl_well_known::GCPreserveEndHook); writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding); writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; #endif @@ -432,8 +436,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { #ifdef MMTK_GC - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; - queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index b76f4c38227f2..a1fab8b66088d 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2308,9 +2308,65 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { continue; } Value *callee = CI->getCalledOperand(); - if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func - || callee == gc_preserve_end_func)) { + if (callee && (callee == gc_flush_func)) { /* No replacement */ + } else if (callee && (callee == gc_preserve_begin_func)) { + /* Replace with a call to the hook functions */ + // Initialize an IR builder. + IRBuilder<> builder(CI); + CallInst *newI; + + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + size_t nargs = 0; + State S2(F); + + std::vector args; + for (Use &U : CI->args()) { + Value *V = U; + if (isa(V)) + continue; + if (isa(V->getType())) { + if (isSpecialPtr(V->getType())) { + int Num = Number(S2, V); + if (Num >= 0) { + nargs++; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + } + } + } else { + std::vector Nums = NumberAll(S2, V); + for (int Num : Nums) { + if (Num < 0) + continue; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + nargs++; + } + } + } + args.insert(args.begin(), ConstantInt::get(T_size, nargs)); + + ArrayRef args_llvm = ArrayRef(args); + + newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); + + // newI->setAttributes(newI->getCalledFunction()->getAttributes()); + // newI->takeName(CI); + + CI->replaceAllUsesWith(newI); + } else if (callee && (callee == gc_preserve_end_func)) { + /* Replace with a call to the hook functions */ + // Initialize an IR builder. + IRBuilder<> builder(CI); + CallInst *newI; + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); + + // newI->setAttributes(newI->getCalledFunction()->getAttributes()); + // newI->takeName(CI); + + CI->replaceAllUsesWith(newI); } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 73d765f61e856..8d2e0c2d14ca5 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -334,6 +334,8 @@ namespace jl_well_known { static const char *GC_QUEUE_BINDING_NAME = XSTR(jl_gc_queue_binding); static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed); #ifdef MMTK_GC + static const char *GC_PRESERVE_BEGIN_HOOK_NAME = XSTR(jl_gc_preserve_begin_hook); + static const char *GC_PRESERVE_END_HOOK_NAME = XSTR(jl_gc_preserve_end_hook); static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline); static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline); static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline); @@ -424,6 +426,34 @@ namespace jl_well_known { }); #ifdef MMTK_GC + const WellKnownFunctionDescription GCPreserveBeginHook( + GC_PRESERVE_BEGIN_HOOK_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { T_size_t(context) }, + true), + Function::ExternalLinkage, + GC_PRESERVE_BEGIN_HOOK_NAME); + + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); + + const WellKnownFunctionDescription GCPreserveEndHook( + GC_PRESERVE_END_HOOK_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { }, + false), + Function::ExternalLinkage, + GC_PRESERVE_END_HOOK_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); const WellKnownFunctionDescription GCWriteBarrier1( GC_WB_1_NAME, [](const JuliaPassContext &context) { diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 21aaed8e9ba75..1e672433be596 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -134,6 +134,8 @@ namespace jl_intrinsics { extern const IntrinsicDescription safepoint; #ifdef MMTK_GC + extern const IntrinsicDescription gcPreserveBeginHook; + extern const IntrinsicDescription gcPreserveEndHook; extern const IntrinsicDescription writeBarrier1; extern const IntrinsicDescription writeBarrier2; extern const IntrinsicDescription writeBarrierBinding; @@ -168,6 +170,8 @@ namespace jl_well_known { extern const WellKnownFunctionDescription GCAllocTyped; #ifdef MMTK_GC + extern const WellKnownFunctionDescription GCPreserveBeginHook; + extern const WellKnownFunctionDescription GCPreserveEndHook; extern const WellKnownFunctionDescription GCWriteBarrier1; extern const WellKnownFunctionDescription GCWriteBarrier2; extern const WellKnownFunctionDescription GCWriteBarrierBinding; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 3666ecd21bf15..6d50b447954c1 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -391,7 +391,7 @@ void jl_gc_init(void) double min_size = strtod(min_size_gb, &p); min_heap_size = (long) 1024 * 1024 * 1024 * min_size; } else { - min_heap_size = (long) 1024 * 1024 * 1024 * 1; + min_heap_size = default_collect_interval; } // default max heap currently set as 30 Gb @@ -404,7 +404,7 @@ void jl_gc_init(void) double max_size = strtod(max_size_gb, &p); max_heap_size = (long) 1024 * 1024 * 1024 * max_size; } else { - max_heap_size = (long) uv_get_free_memory() * 60 / 100; + max_heap_size = (long) uv_get_free_memory() * 70 / 100; } // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads @@ -569,6 +569,45 @@ JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); } +#define jl_p_tpin_gcstack (jl_current_task->tpin_gcstack) + +#define JL_GC_PUSHARGS_TPIN_ROOT_OBJS(rts_var,n) \ + rts_var = ((jl_value_t**)malloc(((n)+2)*sizeof(jl_value_t*)))+2; \ + ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n); \ + ((void**)rts_var)[-1] = jl_p_tpin_gcstack; \ + memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ + jl_p_tpin_gcstack = (jl_gcframe_t*)&(((void**)rts_var)[-2]); \ + +#define JL_GC_POP_TPIN_ROOT_OBJS() \ + jl_gcframe_t *curr = jl_p_tpin_gcstack; \ + if(curr) { \ + (jl_p_tpin_gcstack = jl_p_tpin_gcstack->prev); \ + free(curr); \ + } + +// Add each argument as a tpin root object. +// However, we cannot use JL_GC_PUSH and JL_GC_POP since the slots should live +// beyond this function. Instead, we maintain a tpin stack by mallocing/freeing +// the frames for each of the preserve regions we encounter +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_value_t** frame; + JL_GC_PUSHARGS_TPIN_ROOT_OBJS(frame, n); + if (n == 0) return; + + va_list args; + va_start(args, n); + for (int i = 0; i < n; i++) { + frame[i] = va_arg(args, jl_value_t *); + } + va_end(args); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + JL_GC_POP_TPIN_ROOT_OBJS(); +} + // No inline write barrier -- only used for debugging JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT { From 480b89d8ce85ee1030aefa35804b6c9e68883913 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 11 Jun 2024 03:25:29 +0000 Subject: [PATCH 2/8] Fixing merge conflict --- src/mmtk-gc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 4dfe4a652c0c9..b59d13bfa66bb 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -393,11 +393,7 @@ void jl_gc_init(void) double min_size = strtod(min_size_gb, &p); min_heap_size = (long) 1024 * 1024 * 1024 * min_size; } else { -<<<<<<< HEAD - min_heap_size = default_collect_interval; -======= min_heap_size = 0; ->>>>>>> v1.9.2+RAI } if (max_size_def != NULL) { @@ -409,11 +405,7 @@ void jl_gc_init(void) double max_size = strtod(max_size_gb, &p); max_heap_size = (long) 1024 * 1024 * 1024 * max_size; } else { -<<<<<<< HEAD - max_heap_size = (long) uv_get_free_memory() * 70 / 100; -======= max_heap_size = 0; ->>>>>>> v1.9.2+RAI } // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads From ed66d4ba1af8c3e143f910bcac51486a0a8ab787 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 11 Jun 2024 05:39:55 +0000 Subject: [PATCH 3/8] Cleanup --- src/llvm-alloc-opt.cpp | 34 ---------------------------------- src/llvm-late-gc-lowering.cpp | 6 ------ 2 files changed, 40 deletions(-) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 3c3da0e83f52e..a276da08929f0 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -53,26 +53,6 @@ STATISTIC(RemovedGCPreserve, "Total number of GC preserve instructions removed") namespace { -static void removeGCPreserve(CallInst *call, Instruction *val) -{ - // ++RemovedGCPreserve; - // auto replace = Constant::getNullValue(val->getType()); - // call->replaceUsesOfWith(val, replace); - // call->setAttributes(AttributeList()); - // for (auto &arg: call->args()) { - // if (!isa(arg.get())) { - // return; - // } - // } - // while (!call->use_empty()) { - // auto end = cast(*call->user_begin()); - // // gc_preserve_end returns void. - // assert(end->use_empty()); - // end->eraseFromParent(); - // } - // call->eraseFromParent(); -} - /** * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca. * Uses that are not considered to escape the object (i.e. heap address) includes, @@ -652,16 +632,6 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) call->eraseFromParent(); return; } - // Also remove the preserve intrinsics so that it can be better optimized. - // if (pass.gc_preserve_begin_func == callee) { - // if (has_ref) { - // call->replaceUsesOfWith(orig_i, buff); - // } - // else { - // removeGCPreserve(call, orig_i); - // } - // return; - // } if (pass.write_barrier_func == callee || pass.write_barrier_binding_func == callee) { ++RemovedWriteBarriers; @@ -761,10 +731,6 @@ void Optimizer::removeAlloc(CallInst *orig_inst) } else if (auto call = dyn_cast(user)) { auto callee = call->getCalledOperand(); - // if (pass.gc_preserve_begin_func == callee) { - // removeGCPreserve(call, orig_i); - // return; - // } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index a1fab8b66088d..083f03ed48942 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2351,9 +2351,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); - // newI->setAttributes(newI->getCalledFunction()->getAttributes()); - // newI->takeName(CI); - CI->replaceAllUsesWith(newI); } else if (callee && (callee == gc_preserve_end_func)) { /* Replace with a call to the hook functions */ @@ -2363,9 +2360,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { builder.SetCurrentDebugLocation(CI->getDebugLoc()); newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); - // newI->setAttributes(newI->getCalledFunction()->getAttributes()); - // newI->takeName(CI); - CI->replaceAllUsesWith(newI); } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); From 0558e64792ace533e7dccacb903ed03d2b5a51c3 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 11 Jun 2024 06:14:26 +0000 Subject: [PATCH 4/8] Removing code that fails llvm assertion --- src/llvm-late-gc-lowering.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 083f03ed48942..1bd556c0f8640 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2314,7 +2314,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { /* Replace with a call to the hook functions */ // Initialize an IR builder. IRBuilder<> builder(CI); - CallInst *newI; builder.SetCurrentDebugLocation(CI->getDebugLoc()); size_t nargs = 0; @@ -2348,19 +2347,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { args.insert(args.begin(), ConstantInt::get(T_size, nargs)); ArrayRef args_llvm = ArrayRef(args); - - newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); - - CI->replaceAllUsesWith(newI); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); } else if (callee && (callee == gc_preserve_end_func)) { /* Replace with a call to the hook functions */ // Initialize an IR builder. IRBuilder<> builder(CI); - CallInst *newI; builder.SetCurrentDebugLocation(CI->getDebugLoc()); - newI = builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); - - CI->replaceAllUsesWith(newI); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI); From c4c04d31574154e23ee0c78b07a7335d8a2f1843 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 20 Jun 2024 05:37:21 +0000 Subject: [PATCH 5/8] Restore optimisation that removes GCPreserve calls --- src/llvm-alloc-opt.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index a276da08929f0..c04a5cd3af625 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -53,6 +53,26 @@ STATISTIC(RemovedGCPreserve, "Total number of GC preserve instructions removed") namespace { +static void removeGCPreserve(CallInst *call, Instruction *val) +{ + ++RemovedGCPreserve; + auto replace = Constant::getNullValue(val->getType()); + call->replaceUsesOfWith(val, replace); + call->setAttributes(AttributeList()); + for (auto &arg: call->args()) { + if (!isa(arg.get())) { + return; + } + } + while (!call->use_empty()) { + auto end = cast(*call->user_begin()); + // gc_preserve_end returns void. + assert(end->use_empty()); + end->eraseFromParent(); + } + call->eraseFromParent(); +} + /** * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca. * Uses that are not considered to escape the object (i.e. heap address) includes, @@ -632,6 +652,16 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) call->eraseFromParent(); return; } + // Also remove the preserve intrinsics so that it can be better optimized. + if (pass.gc_preserve_begin_func == callee) { + if (has_ref) { + call->replaceUsesOfWith(orig_i, buff); + } + else { + removeGCPreserve(call, orig_i); + } + return; + } if (pass.write_barrier_func == callee || pass.write_barrier_binding_func == callee) { ++RemovedWriteBarriers; @@ -731,6 +761,10 @@ void Optimizer::removeAlloc(CallInst *orig_inst) } else if (auto call = dyn_cast(user)) { auto callee = call->getCalledOperand(); + if (pass.gc_preserve_begin_func == callee) { + removeGCPreserve(call, orig_i); + return; + } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag); From 1b3d0defb72663433fae4f28f3a0812f5e1b880c Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 20 Jun 2024 05:37:35 +0000 Subject: [PATCH 6/8] Clarifying code about pushing objects as gc preserve roots --- src/julia.h | 5 +++-- src/mmtk-gc.c | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/julia.h b/src/julia.h index c75d4eca39d89..b23f2bc561502 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2111,8 +2111,9 @@ typedef struct _jl_task_t { // saved gc stack top for context switches jl_gcframe_t *gcstack; #ifdef MMTK_GC - // GC stack of objects that need to be transitively pinned - jl_gcframe_t *tpin_gcstack; + // GC stack of objects from gc preserve regions + // These must always be transitively pinned + jl_gcframe_t *gcpreserve_stack; #endif size_t world_age; // quick lookup for current ptls diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b59d13bfa66bb..e0363c8e70386 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -570,19 +570,27 @@ JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); } -#define jl_p_tpin_gcstack (jl_current_task->tpin_gcstack) +#define jl_p_gcpreserve_stack (jl_current_task->gcpreserve_stack) -#define JL_GC_PUSHARGS_TPIN_ROOT_OBJS(rts_var,n) \ +// This macro currently uses malloc instead of alloca because this function will exit +// after pushing the roots into the gc_preserve_stack, which means that the preserve_begin function's +// stack frame will be destroyed (together with its alloca variables). When we support lowering this code +// inside the same function that is doing the preserve_begin/preserve_end calls we should be able to simple use allocas. +// Note also that we use a separate stack for gc preserve roots to avoid the possibility of calling free +// on a stack that has been allocated with alloca instead of malloc, which could happen depending on the order in which +// JL_GC_POP() and jl_gc_preserve_end_hook() occurs. + +#define JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(rts_var,n) \ rts_var = ((jl_value_t**)malloc(((n)+2)*sizeof(jl_value_t*)))+2; \ ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n); \ - ((void**)rts_var)[-1] = jl_p_tpin_gcstack; \ + ((void**)rts_var)[-1] = jl_p_gcpreserve_stack; \ memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ - jl_p_tpin_gcstack = (jl_gcframe_t*)&(((void**)rts_var)[-2]); \ + jl_p_gcpreserve_stack = (jl_gcframe_t*)&(((void**)rts_var)[-2]); \ -#define JL_GC_POP_TPIN_ROOT_OBJS() \ - jl_gcframe_t *curr = jl_p_tpin_gcstack; \ +#define JL_GC_POP_PRESERVE_ROOT_OBJS() \ + jl_gcframe_t *curr = jl_p_gcpreserve_stack; \ if(curr) { \ - (jl_p_tpin_gcstack = jl_p_tpin_gcstack->prev); \ + (jl_p_gcpreserve_stack = jl_p_gcpreserve_stack->prev); \ free(curr); \ } @@ -593,7 +601,7 @@ JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT { jl_value_t** frame; - JL_GC_PUSHARGS_TPIN_ROOT_OBJS(frame, n); + JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(frame, n); if (n == 0) return; va_list args; @@ -606,7 +614,7 @@ JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT { - JL_GC_POP_TPIN_ROOT_OBJS(); + JL_GC_POP_PRESERVE_ROOT_OBJS(); } // No inline write barrier -- only used for debugging From 83d551b1fe214ef0d6c80797e5347778d68a93f4 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 24 Jun 2024 01:40:32 +0000 Subject: [PATCH 7/8] Fixing stock build --- src/Makefile | 2 +- src/llvm-late-gc-lowering.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 545e4e4ecb2e8..c1c0d350f36c3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -48,7 +48,7 @@ SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions mtarraylist \ - threading partr stackwalk gc-common gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ + threading partr stackwalk gc-common gc gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler method \ mmtk-gc jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 1bd556c0f8640..a6bacff36323f 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2308,6 +2308,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { continue; } Value *callee = CI->getCalledOperand(); +#ifdef MMTK_GC if (callee && (callee == gc_flush_func)) { /* No replacement */ } else if (callee && (callee == gc_preserve_begin_func)) { @@ -2354,6 +2355,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); +#else + if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func + || callee == gc_preserve_end_func)) { + /* No replacement */ +#endif } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI); From 01f2f2406b48b9592ab5626bd50bad10989d0247 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 24 Jun 2024 03:46:59 +0000 Subject: [PATCH 8/8] Skip gc-page-profiler.c when using mmtk --- src/gc-page-profiler.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index 5af1c3d014770..1c189af01aa0f 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc-page-profiler.h" #ifdef __cplusplus @@ -165,3 +167,5 @@ JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) #ifdef __cplusplus } #endif + +#endif // !MMTK_GC