From 548d26d7b75454e24021fc852d9aed538729e6cd Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 24 Jul 2024 16:03:19 -0700 Subject: [PATCH] [Offload] Introduce the offload sanitizer (initially for traps) This is the first commit for a new "OffloadSanitizer" that is designed to work well on GPUs. To keep the commit small, only traps are sanitized and we only report information about the encountering thread. It is also restricted to AMD GPUs for now, though that is not a conceptual requirement. The communication between the instrumented device code and the runtime is performed via host initialized pinned memory. If an error is detected, one encountering thread will setup this sanitizer environment and a hardware trap is executed to end the kernel. The host trap handler can check the sanitizer environment to determine if the trap was issued by the sanitizer code or not. If so, we report the reason (for now only that a trap was encountered), the encountering thread id, and the PC. --- .../Instrumentation/OffloadSanitizer.h | 27 ++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 9 ++ llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/OffloadSanitizer.cpp | 150 ++++++++++++++++++ .../Instrumentation/OffloadSanitizer/basic.ll | 79 +++++++++ offload/DeviceRTL/CMakeLists.txt | 1 + offload/DeviceRTL/src/Sanitizer.cpp | 96 +++++++++++ offload/include/Shared/Environment.h | 25 +++ .../common/include/ErrorReporting.h | 46 +++++- .../common/include/PluginInterface.h | 7 + .../common/src/PluginInterface.cpp | 20 +++ offload/test/sanitizer/kernel_trap.c | 24 ++- offload/test/sanitizer/kernel_trap.cpp | 13 +- offload/test/sanitizer/kernel_trap_all.c | 31 ++++ offload/test/sanitizer/kernel_trap_async.c | 14 +- offload/test/sanitizer/kernel_trap_many.c | 15 +- 19 files changed, 535 insertions(+), 26 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h create mode 100644 llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp create mode 100644 llvm/test/Instrumentation/OffloadSanitizer/basic.ll create mode 100644 offload/DeviceRTL/src/Sanitizer.cpp create mode 100644 offload/test/sanitizer/kernel_trap_all.c diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h new file mode 100644 index 00000000000000..6935b7dc390c40 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h @@ -0,0 +1,27 @@ +//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pass to instrument offload code in order to detect errors and communicate +// them to the LLVM/Offload runtimes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class OffloadSanitizerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index df7c9a4fbb9387..5cd9894943fc95 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -208,6 +208,7 @@ #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index da179a6610afd5..8d5e0c80f5863c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -106,6 +106,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("nsan", NumericalStabilitySanitizerPass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) +MODULE_PASS("offload-sanitizer", OffloadSanitizerPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 603339e200dde9..0b481897e337ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -74,6 +74,7 @@ #include "llvm/Transforms/IPO/ExpandVariadics.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" @@ -448,6 +449,11 @@ static cl::opt cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden); +static cl::opt + EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer", + cl::desc("Enable the offload sanitizer"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -823,6 +829,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerFullLinkTimeOptimizationLastEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + if (EnableOffloadSanitizer) + PM.addPass(OffloadSanitizerPass()); + // We want to support the -lto-partitions=N option as "best effort". // For that, we need to lower LDS earlier in the pipeline before the // module is partitioned for codegen. diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index fed29c3e14aae2..d74f484a172048 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -186,6 +186,7 @@ add_llvm_target(AMDGPUCodeGen Core GlobalISel HipStdPar + Instrumentation IPO IRPrinter Instrumentation diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 3e3c3eced4bb9c..e3f34277f5be3b 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation MemProfiler.cpp MemorySanitizer.cpp NumericalStabilitySanitizer.cpp + OffloadSanitizer.cpp IndirectCallPromotion.cpp InstrOrderFile.cpp InstrProfiling.cpp diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp new file mode 100644 index 00000000000000..adae3fcb16ffa8 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp @@ -0,0 +1,150 @@ +//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "offload-sanitizer" + +namespace { + +class OffloadSanitizerImpl final { +public: + OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM) + : M(M), FAM(FAM), Ctx(M.getContext()) {} + + bool instrument(); + +private: + bool shouldInstrumentFunction(Function &Fn); + bool instrumentFunction(Function &Fn); + bool instrumentTrapInstruction(IntrinsicInst &II); + + FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy, + ArrayRef ArgTys) { + if (!FC) { + auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false); + FC = M.getOrInsertFunction(Name, NewAllocationFnTy); + } + return FC; + } + + /// void __offload_san_trap_info(Int64Ty); + FunctionCallee TrapInfoFn; + FunctionCallee getTrapInfoFn() { + return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy, + {/*PC*/ Int64Ty}); + } + + CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee, + ArrayRef Args = std::nullopt, + const Twine &Name = "") { + Calls.push_back(IRB.CreateCall(Callee, Args, Name)); + return Calls.back(); + } + SmallVector Calls; + + Value *getPC(IRBuilder<> &IRB) { + return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr, + "PC"); + } + + Module &M; + FunctionAnalysisManager &FAM; + LLVMContext &Ctx; + + Type *VoidTy = Type::getVoidTy(Ctx); + Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx); + PointerType *PtrTy = PointerType::getUnqual(Ctx); + IntegerType *Int8Ty = Type::getInt8Ty(Ctx); + IntegerType *Int32Ty = Type::getInt32Ty(Ctx); + IntegerType *Int64Ty = Type::getInt64Ty(Ctx); + + const DataLayout &DL = M.getDataLayout(); +}; + +} // end anonymous namespace + +bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) { + if (Fn.isDeclaration()) + return false; + return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); +} + +bool OffloadSanitizerImpl::instrumentTrapInstruction(IntrinsicInst &II) { + IRBuilder<> IRB(&II); + createCall(IRB, getTrapInfoFn(), {getPC(IRB)}); + return true; +} + +bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) { + if (!shouldInstrumentFunction(Fn)) + return false; + + bool Changed = false; + for (auto &I : instructions(Fn)) { + switch (I.getOpcode()) { + case Instruction::Call: { + auto &CI = cast(I); + if (auto *II = dyn_cast(&CI)) + if (II->isNonContinuableTrap()) + Changed |= instrumentTrapInstruction(*II); + break; + } + default: + break; + } + } + + + return Changed; +} + +bool OffloadSanitizerImpl::instrument() { + bool Changed = false; + + for (Function &Fn : M) + Changed |= instrumentFunction(Fn); + + removeFromUsedLists(M, [&](Constant *C) { + if (!C->getName().starts_with("__offload_san")) + return false; + return Changed = true; + }); + + return Changed; +} + +PreservedAnalyses OffloadSanitizerPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + OffloadSanitizerImpl Impl(M, FAM); + if (!Impl.instrument()) + return PreservedAnalyses::all(); + LLVM_DEBUG(M.dump()); + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll new file mode 100644 index 00000000000000..f8c863e9714b23 --- /dev/null +++ b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target triple = "amdgcn-amd-amdhsa" + +; Test basic offload sanitizer trap instrumentation. + +; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s + +define void @test_trap1() { +; CHECK-LABEL: define void @test_trap1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; +entry: + call void @llvm.trap() + ret void +} + +define void @test_trap2() { +; CHECK-LABEL: define void @test_trap2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; +entry: + call void @llvm.trap() + unreachable +} + +define void @test_trap3(i1 %c) { +; CHECK-LABEL: define void @test_trap3( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CHECK: [[T]]: +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; CHECK: [[F]]: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %t ,label %f +t: + call void @llvm.trap() + unreachable +f: + ret void +} + +define void @test_ubsantrap(i1 %c) { +; CHECK-LABEL: define void @test_ubsantrap( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.ubsantrap(i8 42) +; CHECK-NEXT: unreachable +; + call void @llvm.ubsantrap(i8 42) + unreachable +} + +define void @test_trap_no_san_attr(i1 %c) disable_sanitizer_instrumentation { +; __attribute__((disable_sanitizer_instrumentation)) +; CHECK-LABEL: define void @test_trap_no_san_attr( +; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; + call void @llvm.trap() + ret void +} diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 96cb79b7d071c5..8d20e3a396226e 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -96,6 +96,7 @@ set(src_files ${source_directory}/Parallelism.cpp ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp + ${source_directory}/Sanitizer.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp ${source_directory}/Tasking.cpp diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp new file mode 100644 index 00000000000000..d524eed0d9a930 --- /dev/null +++ b/offload/DeviceRTL/src/Sanitizer.cpp @@ -0,0 +1,96 @@ +//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "DeviceTypes.h" +#include "DeviceUtils.h" +#include "Mapping.h" +#include "Shared/Environment.h" +#include "Synchronization.h" +#include "gpuintrin.h" + +using namespace ompx; + +#define _SAN_ATTRS \ + [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]] +#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS + +#pragma omp begin declare target device_type(nohost) + +[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy + *__sanitizer_environment_ptr; + +namespace { + +/// Helper to lock the sanitizer environment. While we never unlock it, this +/// allows us to have a no-op "side effect" in the spin-wait function below. +_SAN_ATTRS bool +getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE, + SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) { + return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE, + ErrorCode, atomic::OrderingTy::seq_cst, + atomic::OrderingTy::seq_cst); +} + +/// The spin-wait function should not be inlined, it's a catch all to give one +/// thread time to setup the sanitizer environment. +[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) { + while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire)) + ; + __builtin_trap(); +} + +_SAN_ATTRS +void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) { + for (int I = 0; I < 3; ++I) { + SE.ThreadId[I] = mapping::getThreadIdInBlock(I); + SE.BlockId[I] = mapping::getBlockIdInKernel(I); + } + SE.PC = PC; + + // This is the last step to initialize the sanitizer environment, time to + // trap via the spinWait. Flush the memory writes and signal for the end. + fence::system(atomic::OrderingTy::release); + atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release); +} + +_SAN_ATTRS +void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode, + uint64_t PC) { + SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr; + bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode); + + // If no thread of this warp has the lock, end execution gracefully. + bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock); + if (!AnyThreadHasLock) + __gpu_exit(); + + // One thread will set the location information and signal that the rest of + // the wapr that the actual trap can be executed now. + if (HasLock) + setLocation(SE, PC); + + synchronize::warp(lanes::All); + + // This is not the first thread that encountered the trap, to avoid a race + // on the sanitizer environment, this thread is simply going to spin-wait. + // The trap above will end the program for all threads. + spinWait(SE); +} + +} // namespace + +extern "C" { + +_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) { + raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC); +} +} + +#pragma omp end declare target diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h index 147583c209fc3e..6aab284f6c9743 100644 --- a/offload/include/Shared/Environment.h +++ b/offload/include/Shared/Environment.h @@ -105,4 +105,29 @@ struct KernelLaunchEnvironmentTy { void *ReductionBuffer = nullptr; }; +/// The environment used to communicate sanitizer information from the device to +/// the host. +struct SanitizerEnvironmentTy { + enum ErrorCodeTy : uint8_t { + NONE = 0, + TRAP, + LAST = TRAP, + } ErrorCode; + + /// Flag to indicate the environment has been initialized fully. + uint8_t IsInitialized; + + /// Return the error code location for use in an atomic compare-and-swap. + uint8_t *getErrorCodeLocation() { + return reinterpret_cast(&ErrorCode); + } + + /// Thread info + /// { + uint32_t ThreadId[3]; + uint32_t BlockId[3]; + uint64_t PC; + /// } +}; + #endif // OMPTARGET_SHARED_ENVIRONMENT_H diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h index 8478977a8f86af..8207512e41fd39 100644 --- a/offload/plugins-nextgen/common/include/ErrorReporting.h +++ b/offload/plugins-nextgen/common/include/ErrorReporting.h @@ -12,6 +12,7 @@ #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H #include "PluginInterface.h" +#include "Shared/Environment.h" #include "Shared/EnvironmentVar.h" #include "llvm/ADT/STLExtras.h" @@ -105,6 +106,15 @@ class ErrorReporter { print(BoldRed, Format, Args...); print("\n"); } + + /// Print \p Format, instantiated with \p Args to stderr, but colored with + /// a banner. + template + [[gnu::format(__printf__, 1, 2)]] static void + reportWarning(const char *Format, ArgsTy &&...Args) { + print(Yellow, "WARNING: "); + print(Yellow, Format, Args...); + } #pragma clang diagnostic pop static void reportError(const char *Str) { reportError("%s", Str); } @@ -115,6 +125,13 @@ class ErrorReporter { print(Color, "%s", Str.str().c_str()); } + static void reportLocation(SanitizerEnvironmentTy &SE) { + print(BoldLightPurple, + "Triggered by thread <%u,%u,%u> block <%u,%u,%u> PC %p\n", + SE.ThreadId[0], SE.ThreadId[1], SE.ThreadId[2], SE.BlockId[0], + SE.BlockId[1], SE.BlockId[2], (void *)SE.PC); + } + /// Pretty print a stack trace. static void reportStackTrace(StringRef StackTrace) { if (StackTrace.empty()) @@ -280,6 +297,16 @@ class ErrorReporter { std::function AsyncInfoWrapperMatcher) { assert(AsyncInfoWrapperMatcher && "A matcher is required"); + SanitizerEnvironmentTy *SE = nullptr; + for (auto &It : Device.SanitizerEnvironmentMap) { + if (It.second->ErrorCode == SanitizerEnvironmentTy::NONE) + continue; + if (SE) + reportWarning( + "Multiple errors encountered, information might be inaccurate."); + SE = It.second; + } + uint32_t Idx = 0; for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { auto KTI = KTIR.getKernelTraceInfo(I); @@ -298,7 +325,24 @@ class ErrorReporter { llvm::omp::prettifyFunctionName(KTI.Kernel->getName()); reportError("Kernel '%s'", PrettyKernelName.c_str()); } - reportError("execution interrupted by hardware trap instruction"); + assert((!SE || SE->ErrorCode != SanitizerEnvironmentTy::NONE) && + "Unexpected sanitizer environment"); + if (!SE) { + reportError("execution stopped, reason is unknown"); + print(Yellow, "Compile with '-mllvm -amdgpu-enable-offload-sanitizer' " + "improved diagnosis\n"); + } else { + switch (SE->ErrorCode) { + case SanitizerEnvironmentTy::TRAP: + reportError("execution interrupted by hardware trap instruction"); + break; + default: + reportError( + "execution stopped, reason is unknown due to invalid error code"); + } + + reportLocation(*SE); + } if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) { if (!KTI.LaunchTrace.empty()) reportStackTrace(KTI.LaunchTrace); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 41cc0f286a581f..38da73528a1599 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -712,6 +712,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image, uint64_t PoolSize); + /// Setup the sanitizer environment to receive sanitizer information from the + /// device. + Error setupSanitizerEnvironment(GenericPluginTy &Plugin, + DeviceImageTy &Image); + // Setup the RPC server for this device if needed. This may not run on some // plugins like the CPU targets. By default, it will not be executed so it is // up to the target to override this using the shouldSetupRPCServer function. @@ -931,6 +936,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; + DenseMap SanitizerEnvironmentMap; + /// Reference to the underlying plugin that created this device. GenericPluginTy &Plugin; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 25b815b7f96694..33c9fa04797ce2 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -927,6 +927,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, return std::move(Err); } + if (auto Err = setupSanitizerEnvironment(Plugin, *Image)) + return std::move(Err); + if (auto Err = setupRPCServer(Plugin, *Image)) return std::move(Err); @@ -1032,6 +1035,23 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin, return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal); } +Error GenericDeviceTy::setupSanitizerEnvironment(GenericPluginTy &Plugin, + DeviceImageTy &Image) { + GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); + if (!GHandler.isSymbolInImage(*this, Image, "__sanitizer_environment_ptr")) + return Plugin::success(); + + auto *&SanitizerEnvironment = SanitizerEnvironmentMap[&Image]; + SanitizerEnvironment = reinterpret_cast(allocate( + sizeof(*SanitizerEnvironment), &SanitizerEnvironment, TARGET_ALLOC_HOST)); + memset(SanitizerEnvironment, '\0', sizeof(SanitizerEnvironmentTy)); + + GlobalTy SanitizerEnvironmentGlobal("__sanitizer_environment_ptr", + sizeof(SanitizerEnvironment), + &SanitizerEnvironment); + return GHandler.writeGlobalToDevice(*this, Image, SanitizerEnvironmentGlobal); +} + Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image) { // The plugin either does not need an RPC server or it is unavailible. diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c index 91c4c7229159bc..d9162b454425ca 100644 --- a/offload/test/sanitizer/kernel_trap.c +++ b/offload/test/sanitizer/kernel_trap.c @@ -1,11 +1,14 @@ // clang-format off // RUN: %libomptarget-compile-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN // RUN: %libomptarget-compile-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -27,17 +30,22 @@ int main(void) { #pragma omp target { } -#pragma omp target +#pragma omp target teams num_teams(32) thread_limit(128) { - __builtin_trap(); +#pragma omp parallel + if (omp_get_team_num() == 17 && omp_get_thread_num() == 42) + __builtin_trap(); } #pragma omp target { } } // clang-format off -// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)' -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 33 (__omp_offloading_{{.*}}_main_l33)' +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// SANIT: Triggered by thread <42,0,0> block <17,0,0> PC 0x{{.*}} // TRACE: launchKernel // NDEBG: main // DEBUG: main {{.*}}kernel_trap.c: diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp index c67b3857fabba1..e950e6cba4129e 100644 --- a/offload/test/sanitizer/kernel_trap.cpp +++ b/offload/test/sanitizer/kernel_trap.cpp @@ -1,11 +1,11 @@ // clang-format off // RUN: %libomptarget-compilexx-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK -// RUN: %libomptarget-compilexx-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compilexx-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -43,7 +43,8 @@ int main(void) { // clang-format off // CHECK: OFFLOAD ERROR: Kernel 'omp target in void cxx_function_name(int, S*) @ [[LINE:[0-9]+]] (__omp_offloading_{{.*}}__Z17cxx_function_nameI1SEviPT__l[[LINE]])' -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction // TRACE: launchKernel // NDEBG: cxx_function_name(int, S*) // NDEBG: main diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c new file mode 100644 index 00000000000000..379ca8362aa83d --- /dev/null +++ b/offload/test/sanitizer/kernel_trap_all.c @@ -0,0 +1,31 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + +#pragma omp target teams + { +#pragma omp parallel + __builtin_trap(); + } +} +// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20) +// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}} +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_trap_all.c: diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c index 391ff0c7dcaa4e..ec516a924e1eea 100644 --- a/offload/test/sanitizer/kernel_trap_async.c +++ b/offload/test/sanitizer/kernel_trap_async.c @@ -1,11 +1,11 @@ // clang-format off // RUN: %libomptarget-compileopt-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK -// RUN: %libomptarget-compileopt-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,SANIT +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -36,7 +36,9 @@ int main(void) { // clang-format off // CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30) -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction // TRACE: launchKernel // DEBUG: kernel_trap_async.c: // clang-format on diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c index f2e63794168b2b..e6a0ed65f5ec8d 100644 --- a/offload/test/sanitizer/kernel_trap_many.c +++ b/offload/test/sanitizer/kernel_trap_many.c @@ -1,8 +1,10 @@ // clang-format off // RUN: %libomptarget-compile-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG,NOSAN // RUN: %libomptarget-compile-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG,NOSAN +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT,TRACE,DEBUG // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -23,13 +25,16 @@ int main(void) { { } } -#pragma omp target +#pragma omp target thread_limit(1) { __builtin_trap(); } } -// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l26) -// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l28) +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved +// diagnosis SANIT: OFFLOAD ERROR: execution interrupted by hardware trap +// instruction SANIT: Triggered by thread <0,0,0> block <0,0,0> PC 0x{{.*}} // TRACE: launchKernel // NDEBG: main // DEBUG: main {{.*}}kernel_trap_many.c: