Skip to content

Commit

Permalink
[Offload] Introduce the offload sanitizer (initially for traps)
Browse files Browse the repository at this point in the history
This is the first commit for a new "OffloadSanitizer" that is designed
to work well on GPUs. To keep the commit small, only traps are sanitized
and we only report information about the encountering thread. It is also
restricted to AMD GPUs for now, though that is not a conceptual
requirement.

The communication between the instrumented device code and the runtime
is performed via host initialized pinned memory. If an error is
detected, one encountering thread will setup this sanitizer environment
and a hardware trap is executed to end the kernel. The host trap handler
can check the sanitizer environment to determine if the trap was issued
by the sanitizer code or not. If so, we report the reason (for now only
that a trap was encountered), the encountering thread id, and the PC.
  • Loading branch information
jdoerfert committed Nov 15, 2024
1 parent 691bd18 commit d9f7a1c
Show file tree
Hide file tree
Showing 19 changed files with 545 additions and 26 deletions.
27 changes: 27 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Pass to instrument offload code in order to detect errors and communicate
// them to the LLVM/Offload runtimes.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("nsan", NumericalStabilitySanitizerPass())
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("offload-sanitizer", OffloadSanitizerPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
MODULE_PASS("openmp-opt-postlink",
OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
Expand Down Expand Up @@ -448,6 +449,11 @@ static cl::opt<bool>
cl::desc("Enable AMDGPUAttributorPass"),
cl::init(true), cl::Hidden);

static cl::opt<bool>
EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
cl::desc("Enable the offload sanitizer"),
cl::init(false), cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
Expand Down Expand Up @@ -823,6 +829,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {

PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
if (EnableOffloadSanitizer)
PM.addPass(OffloadSanitizerPass());

// We want to support the -lto-partitions=N option as "best effort".
// For that, we need to lower LDS earlier in the pipeline before the
// module is partitioned for codegen.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ add_llvm_target(AMDGPUCodeGen
Core
GlobalISel
HipStdPar
Instrumentation
IPO
IRPrinter
Instrumentation
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Instrumentation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
MemProfiler.cpp
MemorySanitizer.cpp
NumericalStabilitySanitizer.cpp
OffloadSanitizer.cpp
IndirectCallPromotion.cpp
InstrOrderFile.cpp
InstrProfiling.cpp
Expand Down
160 changes: 160 additions & 0 deletions llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

using namespace llvm;

#define DEBUG_TYPE "offload-sanitizer"

namespace {

class OffloadSanitizerImpl final {
public:
OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
: M(M), FAM(FAM), Ctx(M.getContext()) {}

bool instrument();

private:
bool shouldInstrumentFunction(Function &Fn);
bool instrumentFunction(Function &Fn);
bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);

FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
ArrayRef<Type *> ArgTys) {
if (!FC) {
auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
}
return FC;
}

/// void __offload_san_trap_info(Int64Ty);
FunctionCallee TrapInfoFn;
FunctionCallee getTrapInfoFn() {
return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
{/*PC*/ Int64Ty});
}

CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
ArrayRef<Value *> Args = std::nullopt,
const Twine &Name = "") {
Calls.push_back(IRB.CreateCall(Callee, Args, Name));
return Calls.back();
}
SmallVector<CallInst *> Calls;

Value *getPC(IRBuilder<> &IRB) {
return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
"PC");
}

Module &M;
FunctionAnalysisManager &FAM;
LLVMContext &Ctx;

Type *VoidTy = Type::getVoidTy(Ctx);
Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
PointerType *PtrTy = PointerType::getUnqual(Ctx);
IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
IntegerType *Int64Ty = Type::getInt64Ty(Ctx);

const DataLayout &DL = M.getDataLayout();
};

} // end anonymous namespace

bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
if (Fn.isDeclaration())
return false;
if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
Fn.getName().starts_with("rpc_"))
return false;
return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
}

bool OffloadSanitizerImpl::instrumentTrapInstructions(
SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
bool Changed = false;
for (auto *II : TrapCalls) {
IRBuilder<> IRB(II);
createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
}
return Changed;
}

bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
if (!shouldInstrumentFunction(Fn))
return false;

SmallVector<IntrinsicInst *> TrapCalls;

bool Changed = false;
for (auto &I : instructions(Fn)) {
switch (I.getOpcode()) {
case Instruction::Call: {
auto &CI = cast<CallInst>(I);
if (auto *II = dyn_cast<IntrinsicInst>(&CI))
if (II->isNonContinuableTrap())
TrapCalls.push_back(II);
break;
}
default:
break;
}
}

Changed |= instrumentTrapInstructions(TrapCalls);

return Changed;
}

bool OffloadSanitizerImpl::instrument() {
bool Changed = false;

for (Function &Fn : M)
Changed |= instrumentFunction(Fn);

removeFromUsedLists(M, [&](Constant *C) {
if (!C->getName().starts_with("__offload_san"))
return false;
return Changed = true;
});

return Changed;
}

PreservedAnalyses OffloadSanitizerPass::run(Module &M,
ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
OffloadSanitizerImpl Impl(M, FAM);
if (!Impl.instrument())
return PreservedAnalyses::all();
LLVM_DEBUG(M.dump());
return PreservedAnalyses::none();
}
79 changes: 79 additions & 0 deletions llvm/test/Instrumentation/OffloadSanitizer/basic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

; Test basic offload sanitizer trap instrumentation.

; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s

define void @test_trap1() {
; CHECK-LABEL: define void @test_trap1() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
entry:
call void @llvm.trap()
ret void
}

define void @test_trap2() {
; CHECK-LABEL: define void @test_trap2() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
;
entry:
call void @llvm.trap()
unreachable
}

define void @test_trap3(i1 %c) {
; CHECK-LABEL: define void @test_trap3(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
; CHECK: [[T]]:
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[F]]:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %t ,label %f
t:
call void @llvm.trap()
unreachable
f:
ret void
}

define void @test_ubsantrap(i1 %c) {
; CHECK-LABEL: define void @test_ubsantrap(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.ubsantrap(i8 42)
; CHECK-NEXT: unreachable
;
call void @llvm.ubsantrap(i8 42)
unreachable
}

define void @test_trap_no_san_attr(i1 %c) disable_sanitizer_instrumentation {
; __attribute__((disable_sanitizer_instrumentation))
; CHECK-LABEL: define void @test_trap_no_san_attr(
; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
call void @llvm.trap()
ret void
}
1 change: 1 addition & 0 deletions offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ set(src_files
${source_directory}/Parallelism.cpp
${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/Sanitizer.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
${source_directory}/Tasking.cpp
Expand Down
Loading

0 comments on commit d9f7a1c

Please sign in to comment.