-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Offload] Introduce the offload sanitizer (initially for traps) #101417
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-compiler-rt-sanitizer Author: Johannes Doerfert (jdoerfert) ChangesThis is the first commit for a new "OffloadSanitizer" that is designed The communication between the instrumented device code and the runtime Patch is 43.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101417.diff 28 Files Affected:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h
index 6f7a39acac1d3..f081015db0b0b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm::omp {
ArrayRef<Directive> getLeafConstructs(Directive D);
@@ -30,6 +31,14 @@ Directive getCompoundConstruct(ArrayRef<Directive> Parts);
bool isLeafConstruct(Directive D);
bool isCompositeConstruct(Directive D);
bool isCombinedConstruct(Directive D);
+
+/// Create a nicer version of a function name for humans to look at.
+std::string prettityFunctionName(StringRef FunctionName);
+
+/// Deconstruct an OpenMP kernel name into the parent function name and the line
+/// number.
+std::string deconstructOpenMPKernelName(StringRef KernelName, unsigned &LineNo);
+
} // namespace llvm::omp
#endif // LLVM_FRONTEND_OPENMP_OMP_H
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1614d5716d28c..9cb311834907b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -196,6 +196,9 @@ class OpenMPIRBuilderConfig {
/// Data structure to contain the information needed to uniquely identify
/// a target entry.
struct TargetRegionEntryInfo {
+ /// The prefix used for kernel names.
+ static constexpr const char *KernelNamePrefix = "__omp_offloading_";
+
std::string ParentName;
unsigned DeviceID;
unsigned FileID;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
new file mode 100644
index 0000000000000..6935b7dc390c4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
@@ -0,0 +1,27 @@
+//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to instrument offload code in order to detect errors and communicate
+// them to the LLVM/Offload runtimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
index 67aedf5c2b61a..82d2a9ae7c533 100644
--- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_component_library(LLVMFrontendOpenMP
TargetParser
TransformUtils
Analysis
+ Demangle
MC
Scalar
BitReader
diff --git a/llvm/lib/Frontend/OpenMP/OMP.cpp b/llvm/lib/Frontend/OpenMP/OMP.cpp
index c1556ff3c74d7..b54cc90a14d83 100644
--- a/llvm/lib/Frontend/OpenMP/OMP.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMP.cpp
@@ -10,13 +10,19 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
#include <algorithm>
+#include <cstdio>
#include <iterator>
+#include <string>
#include <type_traits>
using namespace llvm;
@@ -186,4 +192,55 @@ bool isCombinedConstruct(Directive D) {
// Otherwise directive-name is a combined construct.
return !getLeafConstructs(D).empty() && !isCompositeConstruct(D);
}
+
+std::string prettityFunctionName(StringRef FunctionName) {
+ // Internalized functions have the right name, but simply a suffix.
+ if (FunctionName.ends_with(".internalized"))
+ return FunctionName.drop_back(sizeof("internalized")).str() +
+ " (internalized)";
+ unsigned LineNo = 0;
+ auto ParentName = deconstructOpenMPKernelName(FunctionName, LineNo);
+ if (LineNo == 0)
+ return FunctionName.str();
+ return ("omp target in " + ParentName + " @ " + std::to_string(LineNo) +
+ " (" + FunctionName + ")")
+ .str();
+}
+
+std::string deconstructOpenMPKernelName(StringRef KernelName,
+ unsigned &LineNo) {
+
+ // Only handle functions with an OpenMP kernel prefix for now. Naming scheme:
+ // __omp_offloading_<hex_hash1>_<hex_hash2>_<name>_l<line>_[<count>_]<suffix>
+ if (!KernelName.starts_with(TargetRegionEntryInfo::KernelNamePrefix))
+ return "";
+ auto SkipAfterNext = [](StringRef S, char Tgt, int &Remaining) {
+ return S.drop_while([&](char C) {
+ if (!Remaining)
+ return false;
+ Remaining -= (C == Tgt);
+ return true;
+ });
+ };
+ auto PrettyName = KernelName.drop_front(
+ sizeof(TargetRegionEntryInfo::KernelNamePrefix) - /*'\0'*/ 1);
+ int Remaining = 3;
+ PrettyName = SkipAfterNext(PrettyName, '_', Remaining);
+ if (Remaining)
+ return "";
+
+ // Look for the last '_l<line>'.
+ size_t LineIdx = PrettyName.find("_l");
+ if (LineIdx == StringRef::npos)
+ return "";
+ while (true) {
+ size_t NewLineIdx = PrettyName.find("_l", LineIdx + 2);
+ if (NewLineIdx == StringRef::npos)
+ break;
+ LineIdx = NewLineIdx;
+ }
+ if (PrettyName.drop_front(LineIdx + 2).consumeInteger(10, LineNo))
+ return "";
+ return demangle(PrettyName.take_front(LineIdx));
+}
} // namespace llvm::omp
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 77e350e7276ab..3f8e64315849e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8581,7 +8581,7 @@ void TargetRegionEntryInfo::getTargetRegionEntryFnName(
SmallVectorImpl<char> &Name, StringRef ParentName, unsigned DeviceID,
unsigned FileID, unsigned Line, unsigned Count) {
raw_svector_ostream OS(Name);
- OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
+ OS << KernelNamePrefix << llvm::format("%x", DeviceID)
<< llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
if (Count)
OS << "_" << Count;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8fb68d1c0b0c..a10357f8e584c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -60,6 +60,7 @@
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
@@ -380,6 +381,11 @@ static cl::opt<bool> EnableHipStdPar(
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
cl::Hidden);
+static cl::opt<bool>
+ EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
+ cl::desc("Enable the offload sanitizer"),
+ cl::init(false), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -744,6 +750,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
+ if (EnableOffloadSanitizer)
+ PM.addPass(OffloadSanitizerPass());
+
// We want to support the -lto-partitions=N option as "best effort".
// For that, we need to lower LDS earlier in the pipeline before the
// module is partitioned for codegen.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 671caf8484cd9..008102372d852 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
Core
GlobalISel
HipStdPar
+ Instrumentation
IPO
IRPrinter
MC
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 4e3f9e27e0c34..8db9f795fd8e9 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
MemProfiler.cpp
MemorySanitizer.cpp
NumericalStabilitySanitizer.cpp
+ OffloadSanitizer.cpp
IndirectCallPromotion.cpp
Instrumentation.cpp
InstrOrderFile.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
new file mode 100644
index 0000000000000..a24fdc477a063
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -0,0 +1,160 @@
+//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "offload-sanitizer"
+
+namespace {
+
+class OffloadSanitizerImpl final {
+public:
+ OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
+ : M(M), FAM(FAM), Ctx(M.getContext()) {}
+
+ bool instrument();
+
+private:
+ bool shouldInstrumentFunction(Function &Fn);
+ bool instrumentFunction(Function &Fn);
+ bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+
+ FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
+ ArrayRef<Type *> ArgTys) {
+ if (!FC) {
+ auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
+ FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
+ }
+ return FC;
+ }
+
+ /// void __offload_san_trap_info(Int64Ty);
+ FunctionCallee TrapInfoFn;
+ FunctionCallee getTrapInfoFn() {
+ return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
+ {/*PC*/ Int64Ty});
+ }
+
+ CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
+ ArrayRef<Value *> Args = std::nullopt,
+ const Twine &Name = "") {
+ Calls.push_back(IRB.CreateCall(Callee, Args, Name));
+ return Calls.back();
+ }
+ SmallVector<CallInst *> Calls;
+
+ Value *getPC(IRBuilder<> &IRB) {
+ return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
+ "PC");
+ }
+
+ Module &M;
+ FunctionAnalysisManager &FAM;
+ LLVMContext &Ctx;
+
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+ PointerType *PtrTy = PointerType::getUnqual(Ctx);
+ IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
+
+ const DataLayout &DL = M.getDataLayout();
+};
+
+} // end anonymous namespace
+
+bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
+ if (Fn.isDeclaration())
+ return false;
+ if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
+ Fn.getName().starts_with("rpc_"))
+ return false;
+ return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
+}
+
+bool OffloadSanitizerImpl::instrumentTrapInstructions(
+ SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
+ bool Changed = false;
+ for (auto *II : TrapCalls) {
+ IRBuilder<> IRB(II);
+ createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
+ }
+ return Changed;
+}
+
+bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
+ if (!shouldInstrumentFunction(Fn))
+ return false;
+
+ SmallVector<IntrinsicInst *> TrapCalls;
+
+ bool Changed = false;
+ for (auto &I : instructions(Fn)) {
+ switch (I.getOpcode()) {
+ case Instruction::Call: {
+ auto &CI = cast<CallInst>(I);
+ if (auto *II = dyn_cast<IntrinsicInst>(&CI))
+ if (II->getIntrinsicID() == Intrinsic::trap)
+ TrapCalls.push_back(II);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ Changed |= instrumentTrapInstructions(TrapCalls);
+
+ return Changed;
+}
+
+bool OffloadSanitizerImpl::instrument() {
+ bool Changed = false;
+
+ for (Function &Fn : M)
+ Changed |= instrumentFunction(Fn);
+
+ removeFromUsedLists(M, [&](Constant *C) {
+ if (!C->getName().starts_with("__offload_san"))
+ return false;
+ return Changed = true;
+ });
+
+ return Changed;
+}
+
+PreservedAnalyses OffloadSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ OffloadSanitizerImpl Impl(M, FAM);
+ if (!Impl.instrument())
+ return PreservedAnalyses::all();
+ LLVM_DEBUG(M.dump());
+ return PreservedAnalyses::none();
+}
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 7818c8d752599..8535c5ee981b2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -94,6 +94,7 @@ set(src_files
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
${source_directory}/Reduction.cpp
+ ${source_directory}/Sanitizer.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
${source_directory}/Tasking.cpp
diff --git a/offload/DeviceRTL/include/Utils.h b/offload/DeviceRTL/include/Utils.h
index 82e2397b5958b..2e7767808b721 100644
--- a/offload/DeviceRTL/include/Utils.h
+++ b/offload/DeviceRTL/include/Utils.h
@@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+/// Terminate the execution of this warp.
+void terminateWarp();
+
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
new file mode 100644
index 0000000000000..cf0a983f62395
--- /dev/null
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -0,0 +1,95 @@
+//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mapping.h"
+#include "Shared/Environment.h"
+#include "Synchronization.h"
+#include "Types.h"
+#include "Utils.h"
+
+using namespace ompx;
+
+#define _SAN_ATTRS \
+ [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]]
+#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS
+
+#pragma omp begin declare target device_type(nohost)
+
+[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy
+ *__sanitizer_environment_ptr;
+
+namespace {
+
+/// Helper to lock the sanitizer environment. While we never unlock it, this
+/// allows us to have a no-op "side effect" in the spin-wait function below.
+_SAN_ATTRS bool
+getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE,
+ SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) {
+ return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE,
+ ErrorCode, atomic::OrderingTy::seq_cst,
+ atomic::OrderingTy::seq_cst);
+}
+
+/// The spin-wait function should not be inlined, it's a catch all to give one
+/// thread time to setup the sanitizer environment.
+[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) {
+ while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire))
+ ;
+ __builtin_trap();
+}
+
+_SAN_ATTRS
+void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) {
+ for (int I = 0; I < 3; ++I) {
+ SE.ThreadId[I] = mapping::getThreadIdInBlock(I);
+ SE.BlockId[I] = mapping::getBlockIdInKernel(I);
+ }
+ SE.PC = PC;
+
+ // This is the last step to initialize the sanitizer environment, time to
+ // trap via the spinWait. Flush the memory writes and signal for the end.
+ fence::system(atomic::OrderingTy::release);
+ atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release);
+}
+
+_SAN_ATTRS
+void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode,
+ uint64_t PC) {
+ SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr;
+ bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode);
+
+ // If no thread of this warp has the lock, end execution gracefully.
+ bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock);
+ if (!AnyThreadHasLock)
+ utils::terminateWarp();
+
+ // One thread will set the location information and signal that the rest of
+ // the wapr that the actual trap can be executed now.
+ if (HasLock)
+ setLocation(SE, PC);
+
+ synchronize::warp(lanes::All);
+
+ // This is not the first thread that encountered the trap, to avoid a race
+ // on the sanitizer environment, this thread is simply going to spin-wait.
+ // The trap above will end the program for all threads.
+ spinWait(SE);
+}
+
+} // namespace
+
+extern "C" {
+
+_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
+ raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
+}
+}
+
+#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Utils.cpp b/offload/DeviceRTL/src/Utils.cpp
index 53cc803234867..ae6bcf80e348f 100644
--- a/offload/DeviceRTL/src/Utils.cpp
+++ b/offload/DeviceRTL/src/Utils.cpp
@@ -38,6 +38,7 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
int32_t Width);
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+void terminateWarp();
/// AMDGCN Implementation
///
@@ -63,6 +64,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
return Mask & __builtin_amdgcn_ballot_w64(Pred);
}
+void terminateWarp() { __builtin_amdgcn_endpgm(); }
+
bool isSharedMemPtr(const void *Ptr) {
return __builtin_amdgcn_is_shared(
(const __attribute__((address_space(0))) void *)Ptr);
@@ -90,6 +93,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
}
+void terminateWarp() { __nvvm_exit(); }
+
bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
#pragma omp end declare variant
@@ -126,6 +131,8 @@ uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
return impl::ballotSync(Mask, Pred);
}
+void utils::terminateWarp() { return impl::terminateWarp(); }
+
bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
exte...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 691bd184e628bac8a2d7385dba1057cfcd844689 494e271e4c39e3ea669560363ca87a782453b29a --extensions c,h,cpp -- llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp offload/DeviceRTL/src/Sanitizer.cpp offload/test/sanitizer/kernel_trap_all.c llvm/lib/Passes/PassBuilder.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp offload/include/Shared/Environment.h offload/plugins-nextgen/common/include/ErrorReporting.h offload/plugins-nextgen/common/include/PluginInterface.h offload/plugins-nextgen/common/src/PluginInterface.cpp offload/test/sanitizer/kernel_trap.c offload/test/sanitizer/kernel_trap.cpp offload/test/sanitizer/kernel_trap_async.c offload/test/sanitizer/kernel_trap_many.c View the diff from clang-format here.diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
index adae3fcb16..6519558c19 100644
--- a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -119,7 +119,6 @@ bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
}
}
-
return Changed;
}
|
5f6098f
to
c130a25
Compare
5206248
to
2459bef
Compare
acdb2a4
to
548d26d
Compare
This is the first commit for a new "OffloadSanitizer" that is designed to work well on GPUs. To keep the commit small, only traps are sanitized and we only report information about the encountering thread. It is also restricted to AMD GPUs for now, though that is not a conceptual requirement. The communication between the instrumented device code and the runtime is performed via host initialized pinned memory. If an error is detected, one encountering thread will setup this sanitizer environment and a hardware trap is executed to end the kernel. The host trap handler can check the sanitizer environment to determine if the trap was issued by the sanitizer code or not. If so, we report the reason (for now only that a trap was encountered), the encountering thread id, and the PC.
548d26d
to
494e271
Compare
This is the first commit for a new "OffloadSanitizer" that is designed
to work well on GPUs. To keep the commit small, only traps are sanitized
and we only report information about the encountering thread. It is also
restricted to AMD GPUs for now, though that is not conceptual.
The communication between the instrumented device code and the runtime
is performed via host initialized pinned memory. If an error is
detected, one encountering thread will setup this sanitizer environment
and a hardware trap is executed to end the kernel. The host trap handler
can check the sanitizer environment to determine if the trap was issued
by the sanitizer code or not. If so, we report the reason (for now only
that a trap was encountered), the encountering thread id, and the PC.