Skip to content
This repository has been archived by the owner on Jan 20, 2024. It is now read-only.

Commit

Permalink
[Flang][OpenMP][MLIR] Create lifetime markers for allocations only us…
Browse files Browse the repository at this point in the history
…ed within OpenMP loop regions

By creating `llvm.lifetime.start` and `llvm.lifetime.end` markers for outside
allocations around the body of the loop produced for `omp.wsloop` and
`omp.simdloop` MLIR operations, later uses of the LLVM `CodeExtractor` class to
potentially outline the body of these loops into independent functions will be
able to see the reduced scope of use of these allocations and sink them into
the outlined function's body rather than to unnecessarily pass them as
arguments. This can also help later optimization stages to detect cases where
allocations for loop indices are redundant.
  • Loading branch information
skatrak committed Sep 27, 2023
1 parent ccde72e commit ff9c6db
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 6 deletions.
87 changes: 87 additions & 0 deletions flang/test/Lower/OpenMP/loop-lifetime.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
! This test checks the insertion of lifetime information for loop indices of
! OpenMP loop operations.
! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm -fopenmp %s -o - | FileCheck %s

! CHECK-LABEL: define void @wsloop_i32
subroutine wsloop_i32()
! CHECK-DAG: %[[LASTITER:.*]] = alloca i32
! CHECK-DAG: %[[LB:.*]] = alloca i32
! CHECK-DAG: %[[UB:.*]] = alloca i32
! CHECK-DAG: %[[STRIDE:.*]] = alloca i32
! CHECK-DAG: %[[I:.*]] = alloca i32
integer :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I]])
! CHECK-NEXT: br label %[[WSLOOP_BLOCK:.*]]
! CHECK: [[WSLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %[[I]])
!$omp do
do i = 1, 10
print *, i
end do
!$omp end do
end subroutine

! CHECK-LABEL: define void @wsloop_i64
subroutine wsloop_i64()
! CHECK-DAG: %[[LASTITER:.*]] = alloca i32
! CHECK-DAG: %[[LB:.*]] = alloca i64
! CHECK-DAG: %[[UB:.*]] = alloca i64
! CHECK-DAG: %[[STRIDE:.*]] = alloca i64
! CHECK-DAG: %[[I:.*]] = alloca i64
integer*8 :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I]])
! CHECK-NEXT: br label %[[WSLOOP_BLOCK:.*]]
! CHECK: [[WSLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %[[I]])
!$omp do
do i = 1, 10
print *, i
end do
!$omp end do
end subroutine

! CHECK-LABEL: define void @simdloop_i32
subroutine simdloop_i32()
! CHECK: %[[I:.*]] = alloca i32
integer :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I]])
! CHECK-NEXT: br label %[[SIMDLOOP_BLOCK:.*]]
! CHECK: [[SIMDLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %[[I]])
!$omp simd
do i=1, 9
print *, i
end do
!$omp end simd
end subroutine

! CHECK-LABEL: define void @simdloop_i64
subroutine simdloop_i64()
! CHECK: %[[I:.*]] = alloca i64
integer*8 :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I]])
! CHECK-NEXT: br label %[[SIMDLOOP_BLOCK:.*]]
! CHECK: [[SIMDLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %[[I]])
!$omp simd
do i=1, 9
print *, i
end do
!$omp end simd
end subroutine
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,50 @@ static void collectReductionInfo(
}
}

/// Populate a set of previously created llvm.alloca instructions that are only
/// used inside of the given region but defined outside of it. Allocations of
/// non-primitive types are skipped by this function.
static void getSinkableAllocas(LLVM::ModuleTranslation &moduleTranslation,
Region &region,
SetVector<llvm::AllocaInst *> &allocasToSink) {
Operation *op = region.getParentOp();

for (auto storeOp : region.getOps<LLVM::StoreOp>()) {
Value storeAddr = storeOp.getAddr();
Operation *addrOp = storeAddr.getDefiningOp();

// The destination address is already defined in this region or it is not an
// llvm.alloca operation, so skip it.
if (!isa_and_present<LLVM::AllocaOp>(addrOp) || op->isAncestor(addrOp))
continue;

// Get LLVM value to which the address is mapped. It has to be mapped to the
// allocation instruction of a scalar type to be marked as sinkable by this
// function.
llvm::Value *llvmAddr = moduleTranslation.lookupValue(storeAddr);
if (!isa_and_present<llvm::AllocaInst>(llvmAddr))
continue;

auto *llvmAlloca = cast<llvm::AllocaInst>(llvmAddr);
if (llvmAlloca->getAllocatedType()->getPrimitiveSizeInBits() == 0)
continue;

// Check that the address is only used inside of the region.
bool addressUsedOnlyInternally = true;
for (auto &addrUse : storeAddr.getUses()) {
if (!op->isAncestor(addrUse.getOwner())) {
addressUsedOnlyInternally = false;
break;
}
}

if (!addressUsedOnlyInternally)
continue;

allocasToSink.insert(llvmAlloca);
}
}

/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
Expand Down Expand Up @@ -850,6 +894,9 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
// Set up the source location value for OpenMP runtime.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

SetVector<llvm::AllocaInst *> allocasToSink;
getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink);

// Generator of the canonical loop body.
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
Expand All @@ -869,10 +916,21 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
if (loopInfos.size() != loop.getNumLoops() - 1)
return;

// Convert the body of the loop.
// Convert the body of the loop, adding lifetime markers to allocations that
// can be sunk into the new block.
builder.restoreIP(ip);
convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
moduleTranslation, bodyGenStatus);
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeStart(alloca, builder.getInt64(size));
}
llvm::BasicBlock *cont =
convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(cont, cont->begin());
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
}
};

// Delegate actual loop construction to the OpenMP IRBuilder.
Expand Down Expand Up @@ -1091,6 +1149,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

SetVector<llvm::AllocaInst *> allocasToSink;
getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink);

// Generator of the canonical loop body.
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
Expand All @@ -1110,10 +1171,21 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
if (loopInfos.size() != loop.getNumLoops() - 1)
return;

// Convert the body of the loop.
// Convert the body of the loop, adding lifetime markers to allocations that
// can be sunk into the new block.
builder.restoreIP(ip);
convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
moduleTranslation, bodyGenStatus);
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeStart(alloca, builder.getInt64(size));
}
llvm::BasicBlock *cont =
convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(cont, cont->begin());
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
}
};

// Delegate actual loop construction to the OpenMP IRBuilder.
Expand Down

0 comments on commit ff9c6db

Please sign in to comment.