Skip to content

Commit

Permalink
Apply refactorings
Browse files Browse the repository at this point in the history
  • Loading branch information
udesou committed Feb 13, 2025
1 parent e8ebb0f commit 7ea2484
Show file tree
Hide file tree
Showing 9 changed files with 288 additions and 76 deletions.
6 changes: 4 additions & 2 deletions src/gc-interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAF
// The GC may use that information to, for instance, determine that such objects should
// be treated as marked and belonged to the old generation in nursery collections.
void jl_gc_notify_image_load(const char* img_data, size_t len);
// This function notifies the GC about memory addresses that are set when allocating the boot image.
// The GC may use that information to, for instance, determine that all objects in that chunk of memory should
// be treated as marked and belonged to the old generation in nursery collections.
void jl_gc_notify_image_alloc(const char* img_data, size_t len);

// ========================================================================= //
// Runtime Write-Barriers
Expand Down Expand Up @@ -254,13 +258,11 @@ STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOT
// per field of the object being copied, but may be special-cased for performance reasons.
STATIC_INLINE void jl_gc_multi_wb(const void *parent,
const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;

// Write-barrier function that must be used after copying fields of elements of genericmemory objects
// into another. It should be semantically equivalent to triggering multiple write barriers – one
// per field of the object being copied, but may be special-cased for performance reasons.
STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p,
size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;

// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory
// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr.
// The parameters src_p, dest_p and n will be modified and will contain information about
Expand Down
99 changes: 95 additions & 4 deletions src/gc-mmtk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "gc-common.h"
#include "gc-tls-mmtk.h"
#include "gc-wb-mmtk.h"
#include "mmtkMutator.h"
#include "threading.h"

Expand Down Expand Up @@ -861,10 +862,22 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t
return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
}

inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) {
intptr_t addr = (intptr_t) obj;
uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6);
intptr_t shift = (addr >> 3) & 0b111;
while(1) {
uint8_t old_val = *meta_addr;
uint8_t new_val = old_val | (1 << shift);
if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) {
break;
}
}
}

STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
// FIXME: Similarly, for now, we do nothing
// but when supporting moving, this is where we set the valid object (VO) bit
// and log (old gen) bit
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
mmtk_set_side_metadata(MMTK_SIDE_LOG_BIT_BASE_ADDRESS, obj);
}

JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
Expand Down Expand Up @@ -1128,7 +1141,9 @@ _Atomic(int) gc_stack_free_idx = 0;

JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
{
mmtk_unreachable();
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, (const void*) 0);
}

JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
Expand Down Expand Up @@ -1210,6 +1225,82 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
return NULL;
}

JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
jl_genericmemory_t *src, char* srcdata,
size_t n) JL_NOTSAFEPOINT
{
jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
if (dt != (jl_datatype_t*)jl_typetagof(src))
jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
const jl_datatype_layout_t *layout = dt->layout;
if (layout->flags.arrayelem_isboxed) {
_Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
_Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
jl_value_t *owner = jl_genericmemory_owner(dest);
jl_gc_wb(owner, NULL);
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
ssize_t done = 0;
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
if (dest_p < src_p || dest_p > src_p + n) {
for (; done < n; done++) { // copy forwards
void *val = jl_atomic_load_relaxed(src_p + done);
jl_atomic_store_release(dest_p + done, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
jl_gc_queue_root(owner);
break;
}
}
src_p += done;
dest_p += done;
} else {
for (; done < n; done++) { // copy backwards
void *val = jl_atomic_load_relaxed(src_p + n - done - 1);
jl_atomic_store_release(dest_p + n - done - 1, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
jl_gc_queue_root(owner);
break;
}
}
}
n -= done;
}
}
return memmove_refs(dest_p, src_p, n);
}
size_t elsz = layout->size;
char *src_p = srcdata;
int isbitsunion = layout->flags.arrayelem_isunion;
if (isbitsunion) {
char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
char *desttypetagdata = jl_genericmemory_typetagdata(dest);
memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
destdata = (char*)dest->ptr + elsz*(size_t)destdata;
}
if (layout->first_ptr != -1) {
memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
jl_value_t *owner = jl_genericmemory_owner(dest);
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
dt = (jl_datatype_t*)jl_tparam1(dt);
for (size_t done = 0; done < n; done++) { // copy forwards
char* s = (char*)src_p+done*elsz;
if (*((jl_value_t**)s+layout->first_ptr) != NULL)
jl_gc_queue_multiroot(owner, s, dt);
}
}
}
}
else {
memmove(destdata, srcdata, n * elsz);
}
}


#ifdef __cplusplus
}
#endif
5 changes: 5 additions & 0 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -4071,6 +4071,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len)
// Do nothing
}

void jl_gc_notify_image_alloc(const char* img_data, size_t len)
{
// Do nothing
}

JL_DLLEXPORT const char* jl_gc_active_impl(void) {
return "Built with stock GC";
}
Expand Down
40 changes: 39 additions & 1 deletion src/gc-wb-mmtk.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,57 @@
extern "C" {
#endif

extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);

#define MMTK_OBJECT_BARRIER (1)
// Stickyimmix needs write barrier. Immix does not need write barrier.
#ifdef MMTK_PLAN_IMMIX
#define MMTK_NEEDS_WRITE_BARRIER (0)
#endif
#ifdef MMTK_PLAN_STICKYIMMIX
#define MMTK_NEEDS_WRITE_BARRIER (1)
#endif

// GC write barriers

// TODO: implement these functions for MMTk
// Directly call into MMTk for write barrier (debugging only)
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr);
}

// Inlined fastpath
STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
intptr_t addr = (intptr_t) (void*) parent;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
intptr_t shift = (addr >> 3) & 0b111;
uint8_t byte_val = *meta_addr;
if (((byte_val >> shift) & 1) == 1) {
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr);
}
}
}

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_fast(parent, ptr);
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
mmtk_gc_wb_fast(ptr, (void*)0);
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_fast(parent, (void*)0);
}


Expand Down
1 change: 1 addition & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@

typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
typedef struct _jl_tls_states_t *jl_ptls_t;
typedef struct _jl_genericmemory_t jl_genericmemory_t;

#ifdef JL_LIBRARY_EXPORTS
#include "uv.h"
Expand Down
25 changes: 25 additions & 0 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,4 +413,29 @@ struct FinalLowerGC: private JuliaPassContext {
void lowerSafepoint(CallInst *target, Function &F);
};

// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
// constant store loop to produce a `memset_pattern16` with a global variable
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
// for NI pointers.
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
{
if (!S)
return nullptr;
auto it = S->AllPtrNumbering.find(V);
if (it == S->AllPtrNumbering.end())
return nullptr;
auto rit = S->Refinements.find(it->second);
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
}

inline bool IsPermRooted(Value *V, State *S)
{
if (isa<Constant>(V))
return true;
if (auto *RefinePtr = FindRefinements(V, S))
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
return false;
}

#endif // LLVM_GC_PASSES_H
75 changes: 75 additions & 0 deletions src/llvm-late-gc-lowering-mmtk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,78 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
}
return target;
}

void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
for (auto CI : WriteBarriers) {
auto parent = CI->getArgOperand(0);
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
CI->eraseFromParent();
continue;
}
if (CFGModified) {
*CFGModified = true;
}

IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());

// FIXME: Currently we call write barrier with the src object (parent).
// This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
// But for other MMTk plans, we need to be careful.
const bool INLINE_WRITE_BARRIER = true;
if (CI->getCalledOperand() == write_barrier_func) {
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
if (INLINE_WRITE_BARRIER) {
auto i8_ty = Type::getInt8Ty(F.getContext());
auto intptr_ty = T_size;

// intptr_t addr = (intptr_t) (void*) src;
// uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));

auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);

// intptr_t shift = (addr >> 3) & 0b111;
auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);

// uint8_t byte_val = *meta_addr;
auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());

// if (((byte_val >> shift) & 1) == 1) {
auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));

// object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
MDBuilder MDB(F.getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
if (S) {
if (!S->DT) {
S->DT = &GetDT();
}
DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
builder.SetInsertPoint(mayTriggerSlowpath);
} else {
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights));
builder.SetInsertPoint(mayTriggerSlowpath);
}
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent });
} else {
Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot);
builder.CreateCall(wb_func, { parent });
}
}
} else {
assert(false);
}
CI->eraseFromParent();
}
}
44 changes: 44 additions & 0 deletions src/llvm-late-gc-lowering-stock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
// Do nothing for the stock GC
return target;
}

void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
for (auto CI : WriteBarriers) {
auto parent = CI->getArgOperand(0);
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
CI->eraseFromParent();
continue;
}
if (CFGModified) {
*CFGModified = true;
}

IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
CI->eraseFromParent();
}
}
Loading

0 comments on commit 7ea2484

Please sign in to comment.