From 2713ff1b8810faed52fd3f1774165d53c919b8fd Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 4 Feb 2025 03:49:12 +0000 Subject: [PATCH] Moving write barrier away from genericmemory.c --- src/gc-interface.h | 15 ++++++ src/genericmemory.c | 43 +--------------- src/julia.h | 122 ++++++++++++++++++++++++++++++++------------ 3 files changed, 106 insertions(+), 74 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 826e91355b17a4..f47410f6f728a8 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -8,6 +8,7 @@ #define JL_GC_INTERFACE_H #include "dtypes.h" +#include "julia_atomics.h" #ifdef __cplusplus extern "C" { @@ -17,6 +18,7 @@ struct _jl_tls_states_t; struct _jl_value_t; struct _jl_weakref_t; struct _jl_datatype_t; +struct _jl_genericmemory_t; // ========================================================================= // // GC Metrics @@ -250,6 +252,19 @@ STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOT STATIC_INLINE void jl_gc_multi_wb(const void *parent, const struct _jl_value_t *ptr) JL_NOTSAFEPOINT; +// Write-barrier function that must be used after copying fields of elements of genericmemory objects +// into another. It should be semantically equivalent to triggering multiple write barriers – one +// per field of the object being copied, but may be special-cased for performance reasons. +STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p, + size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT; + +// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory +// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr. +// The parameters src_p, dest_p and n will be modified and will contain information about +// the *uncopied* data after performing this barrier, and will be copied using memmove_refs. +STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const struct _jl_value_t *owner, _Atomic(void*) * dest_p, + struct _jl_genericmemory_t *src, _Atomic(void*) * src_p, + size_t* n) JL_NOTSAFEPOINT; #ifdef __cplusplus } #endif diff --git a/src/genericmemory.c b/src/genericmemory.c index e435ec3b63c9f1..b455a2fb362741 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -235,36 +235,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata; _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata; jl_value_t *owner = jl_genericmemory_owner(dest); - if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { - jl_value_t *src_owner = jl_genericmemory_owner(src); - ssize_t done = 0; - if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { - if (dest_p < src_p || dest_p > src_p + n) { - for (; done < n; done++) { // copy forwards - void *val = jl_atomic_load_relaxed(src_p + done); - jl_atomic_store_release(dest_p + done, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - break; - } - } - src_p += done; - dest_p += done; - } else { - for (; done < n; done++) { // copy backwards - void *val = jl_atomic_load_relaxed(src_p + n - done - 1); - jl_atomic_store_release(dest_p + n - done - 1, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - break; - } - } - } - n -= done; - } - } + jl_gc_wb_genericmemory_copy_boxed(owner, dest_p, src, src_p, &n); return memmove_refs(dest_p, src_p, n); } size_t elsz = layout->size; @@ -280,17 +251,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda if (layout->first_ptr != -1) { memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*)); jl_value_t *owner = jl_genericmemory_owner(dest); - if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { - jl_value_t *src_owner = jl_genericmemory_owner(src); - if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { - dt = (jl_datatype_t*)jl_tparam1(dt); - for (size_t done = 0; done < n; done++) { // copy forwards - char* s = (char*)src_p+done*elsz; - if (*((jl_value_t**)s+layout->first_ptr) != NULL) - jl_gc_queue_multiroot(owner, s, dt); - } - } - } + jl_gc_wb_genericmemory_copy_ptr(owner, src, src_p, n, dt); } else { memmove(destdata, srcdata, n * elsz); diff --git a/src/julia.h b/src/julia.h index a80a69049ccb2e..3b7ab6281a1d9a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -162,7 +162,7 @@ typedef struct { // jl_value_t *data[]; } jl_svec_t; -JL_EXTENSION typedef struct { +JL_EXTENSION typedef struct _jl_genericmemory_t { JL_DATA_TYPE size_t length; void *ptr; @@ -1137,38 +1137,6 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz); // thread-local allocator of the current thread. JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value); -// GC write barriers - -STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - // parent and ptr isa jl_value_t* - if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset - (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young - jl_gc_queue_root((jl_value_t*)parent); -} - -STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* -{ - // if ptr is old - if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) { - jl_gc_queue_root((jl_value_t*)ptr); - } -} - -STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT -{ - // 3 == GC_OLD_MARKED - // ptr is an immutable object - if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) - return; // parent is young or in remset - if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3)) - return; // ptr is old and not in remset (thus it does not point to young) - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); - const jl_datatype_layout_t *ly = dt->layout; - if (ly->npointers) - jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt); -} - JL_DLLEXPORT void jl_gc_safepoint(void); JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate); JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct); @@ -1287,6 +1255,94 @@ STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set( } #endif +// GC write barriers + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + // parent and ptr isa jl_value_t* + if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset + (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young + jl_gc_queue_root((jl_value_t*)parent); +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ + // if ptr is old + if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) { + jl_gc_queue_root((jl_value_t*)ptr); + } +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + // 3 == GC_OLD_MARKED + // ptr is an immutable object + if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) + return; // parent is young or in remset + if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3)) + return; // ptr is old and not in remset (thus it does not point to young) + jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); + const jl_datatype_layout_t *ly = dt->layout; + if (ly->npointers) + jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt); +} + +STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owner, _Atomic(void*) * dest_p, + jl_genericmemory_t *src, _Atomic(void*) * src_p, + size_t* n) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_astaggedvalue(dest_owner)->bits.gc == 3 /* GC_OLD_MARKED */ )) { + jl_value_t *src_owner = jl_genericmemory_owner(src); + size_t done = 0; + if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) { + if (dest_p < src_p || dest_p > src_p + (*n)) { + for (; done < (*n); done++) { // copy forwards + void *val = jl_atomic_load_relaxed(src_p + done); + jl_atomic_store_release(dest_p + done, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) { + jl_gc_queue_root(dest_owner); + break; + } + } + src_p += done; + dest_p += done; + } + else { + for (; done < (*n); done++) { // copy backwards + void *val = jl_atomic_load_relaxed(src_p + (*n) - done - 1); + jl_atomic_store_release(dest_p + (*n) - done - 1, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) { + jl_gc_queue_root(dest_owner); + break; + } + } + } + (*n) -= done; + } + } +} + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_genericmemory_t *src, char* src_p, + size_t n, jl_datatype_t *dt) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_astaggedvalue(owner)->bits.gc == 3 /* GC_OLD_MARKED */)) { + jl_value_t *src_owner = jl_genericmemory_owner(src); + size_t elsz = dt->layout->size; + if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) { + dt = (jl_datatype_t*)jl_tparam1(dt); + for (size_t done = 0; done < n; done++) { // copy forwards + char* s = (char*)src_p+done*elsz; + if (*((jl_value_t**)s+dt->layout->first_ptr) != NULL) + jl_gc_queue_multiroot(owner, s, dt); + } + } + } +} + STATIC_INLINE uint8_t jl_memory_uint8_ref(void *m, size_t i) JL_NOTSAFEPOINT { jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;