Skip to content

Commit

Permalink
Make late_gc_lowering more robust (#57380)
Browse files Browse the repository at this point in the history
There are cases where we optimize the SRet more than the pass expected
so try and handle those. I'm tryin to get a test for this, this is
separated from #52850 to make
merging both easier

---------

Co-authored-by: Jameson Nash <[email protected]>
  • Loading branch information
gbaraldi and vtjnash authored Feb 17, 2025
1 parent e331deb commit 7c89aba
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 36 deletions.
56 changes: 45 additions & 11 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,21 +161,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
target->replaceAllUsesWith(newI);
target->eraseFromParent();
}
bool FinalLowerGC::shouldRunFinalGC(Function &F)
{
bool should_run = 0;
should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr;
should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr;
return should_run;
}

bool FinalLowerGC::runOnFunction(Function &F)
{
initAll(*F.getParent());
if (!pgcstack_getter && !adoptthread_func) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
return false;
}

// Look for a call to 'julia.get_pgcstack'.
pgcstack = getPGCstack(F);
if (!pgcstack) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
return false;
}
if (!shouldRunFinalGC(F))
goto verify_skip;

LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
Expand Down Expand Up @@ -212,8 +217,37 @@ bool FinalLowerGC::runOnFunction(Function &F)
#undef LOWER_INTRINSIC
}
}

return true;
// Verify that skipping was in fact correct
verify_skip:
#ifdef JL_VERIFY_PASSES
for (auto &BB : F) {
for (auto &I : make_early_inc_range(BB)) {
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;

Value *callee = CI->getCalledOperand();
assert(callee);
auto IS_INTRINSIC = [&](auto intrinsic) {
auto intrinsic2 = getOrNull(intrinsic);
if (intrinsic2 == callee) {
errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n";
errs() << *F.getParent() << "\n";
abort();
}
};
IS_INTRINSIC(jl_intrinsics::newGCFrame);
IS_INTRINSIC(jl_intrinsics::pushGCFrame);
IS_INTRINSIC(jl_intrinsics::popGCFrame);
IS_INTRINSIC(jl_intrinsics::getGCFrameSlot);
IS_INTRINSIC(jl_intrinsics::GCAllocBytes);
IS_INTRINSIC(jl_intrinsics::queueGCRoot);
IS_INTRINSIC(jl_intrinsics::safepoint);
}
}
#endif
return false;
}

PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
Expand Down
2 changes: 2 additions & 0 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,8 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.safepoint` intrinsic.
void lowerSafepoint(CallInst *target, Function &F);
// Check if the pass should be run
bool shouldRunFinalGC(Function &F);
};

#endif // LLVM_GC_PASSES_H
82 changes: 57 additions & 25 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
}
}

// Look through instructions to find all possible allocas that might become the sret argument
static SmallSetVector<AllocaInst *, 8> FindSretAllocas(Value* SRetArg) {
SmallSetVector<AllocaInst *, 8> allocas;
if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
allocas.insert(OneSRet); // Found it directly
} else {
SmallSetVector<Value *, 8> worklist;
worklist.insert(SRetArg);
while (!worklist.empty()) {
Value *V = worklist.pop_back_val();
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
allocas.insert(Alloca); // Found a candidate
} else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
for (Value *Incoming : Phi->incoming_values()) {
worklist.insert(Incoming);
}
} else if (SelectInst *SI = dyn_cast<SelectInst>(SRetArg)) {
auto TrueBranch = SI->getTrueValue();
auto FalseBranch = SI->getFalseValue();
if (TrueBranch && FalseBranch) {
worklist.insert(TrueBranch);
worklist.insert(FalseBranch);
} else {
llvm_dump(SI);
assert(false && "Malformed Select");
}
} else {
llvm_dump(V);
assert(false && "Unexpected SRet argument");
}
}
}
assert(allocas.size() > 0);
assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() &&
SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType());
}
));
return allocas;
}

State LateLowerGCFrame::LocalScan(Function &F) {
State S(F);
SmallVector<int, 8> PHINumbers;
Expand Down Expand Up @@ -1165,46 +1206,35 @@ State LateLowerGCFrame::LocalScan(Function &F) {
Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
auto tracked = CountTrackedPointers(ElT, true);
if (tracked.count) {
AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
assert(SRet);
{
SmallSetVector<AllocaInst *, 8> allocas = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets());
// We know that with the right optimizations we can forward a sret directly from an argument
// This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
// If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
for (AllocaInst *SRet : allocas) {
if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
assert(!tracked.derived);
if (tracked.all) {
S.ArrayAllocas[SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize())->getZExtValue();
}
else {
Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
SmallSetVector<AllocaInst *, 8> gc_allocas = FindSretAllocas(arg1);
AllocaInst *SRet_gc = nullptr;
if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
for (Value *V : Phi->incoming_values()) {
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
if (SRet_gc == nullptr) {
SRet_gc = Alloca;
} else if (SRet_gc == Alloca) {
continue;
} else {
llvm_dump(Alloca);
llvm_dump(SRet_gc);
assert(false && "Allocas in Phi node should match");
}
} else {
llvm_dump(V->stripInBoundsOffsets());
assert(false && "Expected alloca");
}
}
} else {
SRet_gc = dyn_cast<AllocaInst>(arg1);
if (gc_allocas.size() == 1) {
SRet_gc = gc_allocas.pop_back_val();
}
if (!SRet_gc) {
else {
llvm_dump(CI);
llvm_dump(arg1);
assert(false && "Expected alloca");
for (AllocaInst *Alloca : gc_allocas) {
llvm_dump(Alloca);
}
assert(false && "Expected single alloca");
}
Type *ElT = SRet_gc->getAllocatedType();
if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
}
break; // Found our gc roots
}
}
}
Expand Down Expand Up @@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
return S;
}



static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
Type *T_int32 = Type::getInt32Ty(V->getContext());
if (isptr) {
Expand Down
49 changes: 49 additions & 0 deletions test/llvmpasses/late-lower-gc-sret.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; This file is a part of Julia. License is MIT: https://julialang.org/license

; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s

declare ptr @julia.get_pgcstack()

declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull)

define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
; CHECK-LABEL: @sret_select
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
%pgcstack = call ptr @julia.get_pgcstack()
%3 = alloca [3 x i64], align 8
%4 = alloca [3 x i64], align 8
%5 = select i1 %unpredictable, ptr %3, ptr %4
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
ret ptr addrspace(10) %1
}

define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
top:
; CHECK-LABEL: @sret_phi
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
%pgcstack = call ptr @julia.get_pgcstack()
%2 = alloca [3 x i64], align 8
%3 = alloca [3 x i64], align 8
br i1 %unpredictable, label %true, label %false

true: ; preds = %top
br label %ret

false: ; preds = %top
br label %ret

ret: ; preds = %false, %true
%4 = phi ptr [ %2, %true ], [ %3, %false ]
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
ret ptr addrspace(10) %1
}

0 comments on commit 7c89aba

Please sign in to comment.