diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 76dcd944890ab..50bdd2251ab42 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -161,21 +161,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) target->replaceAllUsesWith(newI); target->eraseFromParent(); } +bool FinalLowerGC::shouldRunFinalGC(Function &F) +{ + bool should_run = 0; + should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr; + should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr; + should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr; + should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr; + return should_run; +} bool FinalLowerGC::runOnFunction(Function &F) { initAll(*F.getParent()); - if (!pgcstack_getter && !adoptthread_func) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n"); - return false; - } - - // Look for a call to 'julia.get_pgcstack'. pgcstack = getPGCstack(F); - if (!pgcstack) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n"); - return false; - } + if (!shouldRunFinalGC(F)) + goto verify_skip; + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); @@ -212,8 +217,37 @@ bool FinalLowerGC::runOnFunction(Function &F) #undef LOWER_INTRINSIC } } - return true; + // Verify that skipping was in fact correct + verify_skip: + #ifdef JL_VERIFY_PASSES + for (auto &BB : F) { + for (auto &I : make_early_inc_range(BB)) { + auto *CI = dyn_cast(&I); + if (!CI) + continue; + + Value *callee = CI->getCalledOperand(); + assert(callee); + auto IS_INTRINSIC = [&](auto intrinsic) { + auto intrinsic2 = getOrNull(intrinsic); + if (intrinsic2 == callee) { + errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n"; + errs() << *F.getParent() << "\n"; + abort(); + } + }; + IS_INTRINSIC(jl_intrinsics::newGCFrame); + IS_INTRINSIC(jl_intrinsics::pushGCFrame); + IS_INTRINSIC(jl_intrinsics::popGCFrame); + IS_INTRINSIC(jl_intrinsics::getGCFrameSlot); + IS_INTRINSIC(jl_intrinsics::GCAllocBytes); + IS_INTRINSIC(jl_intrinsics::queueGCRoot); + IS_INTRINSIC(jl_intrinsics::safepoint); + } + } + #endif + return false; } PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM) diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 7b2a4bb033203..b899cd9494673 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -411,6 +411,8 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. void lowerSafepoint(CallInst *target, Function &F); + // Check if the pass should be run + bool shouldRunFinalGC(Function &F); }; #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7d6fba65a79e7..81d636b58c527 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef PHINumbers, State &S) } } +// Look through instructions to find all possible allocas that might become the sret argument +static SmallSetVector FindSretAllocas(Value* SRetArg) { + SmallSetVector allocas; + if (AllocaInst *OneSRet = dyn_cast(SRetArg)) { + allocas.insert(OneSRet); // Found it directly + } else { + SmallSetVector worklist; + worklist.insert(SRetArg); + while (!worklist.empty()) { + Value *V = worklist.pop_back_val(); + if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { + allocas.insert(Alloca); // Found a candidate + } else if (PHINode *Phi = dyn_cast(V)) { + for (Value *Incoming : Phi->incoming_values()) { + worklist.insert(Incoming); + } + } else if (SelectInst *SI = dyn_cast(SRetArg)) { + auto TrueBranch = SI->getTrueValue(); + auto FalseBranch = SI->getFalseValue(); + if (TrueBranch && FalseBranch) { + worklist.insert(TrueBranch); + worklist.insert(FalseBranch); + } else { + llvm_dump(SI); + assert(false && "Malformed Select"); + } + } else { + llvm_dump(V); + assert(false && "Unexpected SRet argument"); + } + } + } + assert(allocas.size() > 0); + assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT { + return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() && + SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType()); + } + )); + return allocas; +} + State LateLowerGCFrame::LocalScan(Function &F) { State S(F); SmallVector PHINumbers; @@ -1165,9 +1206,11 @@ State LateLowerGCFrame::LocalScan(Function &F) { Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType(); auto tracked = CountTrackedPointers(ElT, true); if (tracked.count) { - AllocaInst *SRet = dyn_cast((CI->arg_begin()[0])->stripInBoundsOffsets()); - assert(SRet); - { + SmallSetVector allocas = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets()); + // We know that with the right optimizations we can forward a sret directly from an argument + // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too + // If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯ + for (AllocaInst *SRet : allocas) { if (!(SRet->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { assert(!tracked.derived); if (tracked.all) { @@ -1175,36 +1218,23 @@ State LateLowerGCFrame::LocalScan(Function &F) { } else { Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets(); + SmallSetVector gc_allocas = FindSretAllocas(arg1); AllocaInst *SRet_gc = nullptr; - if (PHINode *Phi = dyn_cast(arg1)) { - for (Value *V : Phi->incoming_values()) { - if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { - if (SRet_gc == nullptr) { - SRet_gc = Alloca; - } else if (SRet_gc == Alloca) { - continue; - } else { - llvm_dump(Alloca); - llvm_dump(SRet_gc); - assert(false && "Allocas in Phi node should match"); - } - } else { - llvm_dump(V->stripInBoundsOffsets()); - assert(false && "Expected alloca"); - } - } - } else { - SRet_gc = dyn_cast(arg1); + if (gc_allocas.size() == 1) { + SRet_gc = gc_allocas.pop_back_val(); } - if (!SRet_gc) { + else { llvm_dump(CI); - llvm_dump(arg1); - assert(false && "Expected alloca"); + for (AllocaInst *Alloca : gc_allocas) { + llvm_dump(Alloca); + } + assert(false && "Expected single alloca"); } Type *ElT = SRet_gc->getAllocatedType(); if (!(SRet_gc->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { S.ArrayAllocas[SRet_gc] = tracked.count * cast(SRet_gc->getArraySize())->getZExtValue(); } + break; // Found our gc roots } } } @@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) { return S; } + + static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef Idxs, IRBuilder<> &irbuilder) { Type *T_int32 = Type::getInt32Ty(V->getContext()); if (isptr) { diff --git a/test/llvmpasses/late-lower-gc-sret.ll b/test/llvmpasses/late-lower-gc-sret.ll new file mode 100644 index 0000000000000..0c31edb3494d1 --- /dev/null +++ b/test/llvmpasses/late-lower-gc-sret.ll @@ -0,0 +1,49 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s + +declare ptr @julia.get_pgcstack() + +declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull) + +define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) { + ; CHECK-LABEL: @sret_select + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) + ; CHECK: %pgcstack = call ptr @julia.get_pgcstack() + ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6) + %pgcstack = call ptr @julia.get_pgcstack() + %3 = alloca [3 x i64], align 8 + %4 = alloca [3 x i64], align 8 + %5 = select i1 %unpredictable, ptr %3, ptr %4 + call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1) + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret ptr addrspace(10) %1 +} + +define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) { +top: + ; CHECK-LABEL: @sret_phi + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) + ; CHECK: %pgcstack = call ptr @julia.get_pgcstack() + ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6) + %pgcstack = call ptr @julia.get_pgcstack() + %2 = alloca [3 x i64], align 8 + %3 = alloca [3 x i64], align 8 + br i1 %unpredictable, label %true, label %false + +true: ; preds = %top + br label %ret + +false: ; preds = %top + br label %ret + +ret: ; preds = %false, %true + %4 = phi ptr [ %2, %true ], [ %3, %false ] + call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1) + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret ptr addrspace(10) %1 +}