diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h new file mode 100644 index 00000000000000..6935b7dc390c40 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h @@ -0,0 +1,27 @@ +//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pass to instrument offload code in order to detect errors and communicate +// them to the LLVM/Offload runtimes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class OffloadSanitizerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index df7c9a4fbb9387..5cd9894943fc95 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -208,6 +208,7 @@ #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index da179a6610afd5..8d5e0c80f5863c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -106,6 +106,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("nsan", NumericalStabilitySanitizerPass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) +MODULE_PASS("offload-sanitizer", OffloadSanitizerPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 603339e200dde9..0b481897e337ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -74,6 +74,7 @@ #include "llvm/Transforms/IPO/ExpandVariadics.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" @@ -448,6 +449,11 @@ static cl::opt cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden); +static cl::opt + EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer", + cl::desc("Enable the offload sanitizer"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -823,6 +829,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerFullLinkTimeOptimizationLastEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + if (EnableOffloadSanitizer) + PM.addPass(OffloadSanitizerPass()); + // We want to support the -lto-partitions=N option as "best effort". // For that, we need to lower LDS earlier in the pipeline before the // module is partitioned for codegen. diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index fed29c3e14aae2..d74f484a172048 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -186,6 +186,7 @@ add_llvm_target(AMDGPUCodeGen Core GlobalISel HipStdPar + Instrumentation IPO IRPrinter Instrumentation diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 3e3c3eced4bb9c..e3f34277f5be3b 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation MemProfiler.cpp MemorySanitizer.cpp NumericalStabilitySanitizer.cpp + OffloadSanitizer.cpp IndirectCallPromotion.cpp InstrOrderFile.cpp InstrProfiling.cpp diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp new file mode 100644 index 00000000000000..adae3fcb16ffa8 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp @@ -0,0 +1,150 @@ +//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "offload-sanitizer" + +namespace { + +class OffloadSanitizerImpl final { +public: + OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM) + : M(M), FAM(FAM), Ctx(M.getContext()) {} + + bool instrument(); + +private: + bool shouldInstrumentFunction(Function &Fn); + bool instrumentFunction(Function &Fn); + bool instrumentTrapInstruction(IntrinsicInst &II); + + FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy, + ArrayRef ArgTys) { + if (!FC) { + auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false); + FC = M.getOrInsertFunction(Name, NewAllocationFnTy); + } + return FC; + } + + /// void __offload_san_trap_info(Int64Ty); + FunctionCallee TrapInfoFn; + FunctionCallee getTrapInfoFn() { + return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy, + {/*PC*/ Int64Ty}); + } + + CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee, + ArrayRef Args = std::nullopt, + const Twine &Name = "") { + Calls.push_back(IRB.CreateCall(Callee, Args, Name)); + return Calls.back(); + } + SmallVector Calls; + + Value *getPC(IRBuilder<> &IRB) { + return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr, + "PC"); + } + + Module &M; + FunctionAnalysisManager &FAM; + LLVMContext &Ctx; + + Type *VoidTy = Type::getVoidTy(Ctx); + Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx); + PointerType *PtrTy = PointerType::getUnqual(Ctx); + IntegerType *Int8Ty = Type::getInt8Ty(Ctx); + IntegerType *Int32Ty = Type::getInt32Ty(Ctx); + IntegerType *Int64Ty = Type::getInt64Ty(Ctx); + + const DataLayout &DL = M.getDataLayout(); +}; + +} // end anonymous namespace + +bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) { + if (Fn.isDeclaration()) + return false; + return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); +} + +bool OffloadSanitizerImpl::instrumentTrapInstruction(IntrinsicInst &II) { + IRBuilder<> IRB(&II); + createCall(IRB, getTrapInfoFn(), {getPC(IRB)}); + return true; +} + +bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) { + if (!shouldInstrumentFunction(Fn)) + return false; + + bool Changed = false; + for (auto &I : instructions(Fn)) { + switch (I.getOpcode()) { + case Instruction::Call: { + auto &CI = cast(I); + if (auto *II = dyn_cast(&CI)) + if (II->isNonContinuableTrap()) + Changed |= instrumentTrapInstruction(*II); + break; + } + default: + break; + } + } + + + return Changed; +} + +bool OffloadSanitizerImpl::instrument() { + bool Changed = false; + + for (Function &Fn : M) + Changed |= instrumentFunction(Fn); + + removeFromUsedLists(M, [&](Constant *C) { + if (!C->getName().starts_with("__offload_san")) + return false; + return Changed = true; + }); + + return Changed; +} + +PreservedAnalyses OffloadSanitizerPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + OffloadSanitizerImpl Impl(M, FAM); + if (!Impl.instrument()) + return PreservedAnalyses::all(); + LLVM_DEBUG(M.dump()); + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll new file mode 100644 index 00000000000000..c7bbbafd9f0580 --- /dev/null +++ b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target triple = "amdgcn-amd-amdhsa" + +; Test basic offload sanitizer trap instrumentation. + +; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s + +define void @test_trap1() { +; CHECK-LABEL: define void @test_trap1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; +entry: + call void @llvm.trap() + ret void +} + +define void @test_trap2() { +; CHECK-LABEL: define void @test_trap2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; +entry: + call void @llvm.trap() + unreachable +} + +define void @test_trap3(i1 %c) { +; CHECK-LABEL: define void @test_trap3( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CHECK: [[T]]: +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; CHECK: [[F]]: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %t, label %f +t: + call void @llvm.trap() + unreachable +f: + ret void +} + +define void @test_ubsantrap(i1 %c) { +; CHECK-LABEL: define void @test_ubsantrap( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]]) +; CHECK-NEXT: call void @llvm.ubsantrap(i8 42) +; CHECK-NEXT: unreachable +; + call void @llvm.ubsantrap(i8 42) + unreachable +} + +define void @test_trap_no_san_attr(i1 %c) disable_sanitizer_instrumentation { +; __attribute__((disable_sanitizer_instrumentation)) +; CHECK-LABEL: define void @test_trap_no_san_attr( +; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; + call void @llvm.trap() + ret void +} diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 96cb79b7d071c5..8d20e3a396226e 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -96,6 +96,7 @@ set(src_files ${source_directory}/Parallelism.cpp ${source_directory}/Profiling.cpp ${source_directory}/Reduction.cpp + ${source_directory}/Sanitizer.cpp ${source_directory}/State.cpp ${source_directory}/Synchronization.cpp ${source_directory}/Tasking.cpp diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp new file mode 100644 index 00000000000000..d524eed0d9a930 --- /dev/null +++ b/offload/DeviceRTL/src/Sanitizer.cpp @@ -0,0 +1,96 @@ +//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "DeviceTypes.h" +#include "DeviceUtils.h" +#include "Mapping.h" +#include "Shared/Environment.h" +#include "Synchronization.h" +#include "gpuintrin.h" + +using namespace ompx; + +#define _SAN_ATTRS \ + [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]] +#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS + +#pragma omp begin declare target device_type(nohost) + +[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy + *__sanitizer_environment_ptr; + +namespace { + +/// Helper to lock the sanitizer environment. While we never unlock it, this +/// allows us to have a no-op "side effect" in the spin-wait function below. +_SAN_ATTRS bool +getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE, + SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) { + return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE, + ErrorCode, atomic::OrderingTy::seq_cst, + atomic::OrderingTy::seq_cst); +} + +/// The spin-wait function should not be inlined, it's a catch all to give one +/// thread time to setup the sanitizer environment. +[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) { + while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire)) + ; + __builtin_trap(); +} + +_SAN_ATTRS +void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) { + for (int I = 0; I < 3; ++I) { + SE.ThreadId[I] = mapping::getThreadIdInBlock(I); + SE.BlockId[I] = mapping::getBlockIdInKernel(I); + } + SE.PC = PC; + + // This is the last step to initialize the sanitizer environment, time to + // trap via the spinWait. Flush the memory writes and signal for the end. + fence::system(atomic::OrderingTy::release); + atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release); +} + +_SAN_ATTRS +void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode, + uint64_t PC) { + SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr; + bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode); + + // If no thread of this warp has the lock, end execution gracefully. + bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock); + if (!AnyThreadHasLock) + __gpu_exit(); + + // One thread will set the location information and signal that the rest of + // the wapr that the actual trap can be executed now. + if (HasLock) + setLocation(SE, PC); + + synchronize::warp(lanes::All); + + // This is not the first thread that encountered the trap, to avoid a race + // on the sanitizer environment, this thread is simply going to spin-wait. + // The trap above will end the program for all threads. + spinWait(SE); +} + +} // namespace + +extern "C" { + +_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) { + raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC); +} +} + +#pragma omp end declare target diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h index 147583c209fc3e..6aab284f6c9743 100644 --- a/offload/include/Shared/Environment.h +++ b/offload/include/Shared/Environment.h @@ -105,4 +105,29 @@ struct KernelLaunchEnvironmentTy { void *ReductionBuffer = nullptr; }; +/// The environment used to communicate sanitizer information from the device to +/// the host. +struct SanitizerEnvironmentTy { + enum ErrorCodeTy : uint8_t { + NONE = 0, + TRAP, + LAST = TRAP, + } ErrorCode; + + /// Flag to indicate the environment has been initialized fully. + uint8_t IsInitialized; + + /// Return the error code location for use in an atomic compare-and-swap. + uint8_t *getErrorCodeLocation() { + return reinterpret_cast(&ErrorCode); + } + + /// Thread info + /// { + uint32_t ThreadId[3]; + uint32_t BlockId[3]; + uint64_t PC; + /// } +}; + #endif // OMPTARGET_SHARED_ENVIRONMENT_H diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h index 8478977a8f86af..8207512e41fd39 100644 --- a/offload/plugins-nextgen/common/include/ErrorReporting.h +++ b/offload/plugins-nextgen/common/include/ErrorReporting.h @@ -12,6 +12,7 @@ #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H #include "PluginInterface.h" +#include "Shared/Environment.h" #include "Shared/EnvironmentVar.h" #include "llvm/ADT/STLExtras.h" @@ -105,6 +106,15 @@ class ErrorReporter { print(BoldRed, Format, Args...); print("\n"); } + + /// Print \p Format, instantiated with \p Args to stderr, but colored with + /// a banner. + template + [[gnu::format(__printf__, 1, 2)]] static void + reportWarning(const char *Format, ArgsTy &&...Args) { + print(Yellow, "WARNING: "); + print(Yellow, Format, Args...); + } #pragma clang diagnostic pop static void reportError(const char *Str) { reportError("%s", Str); } @@ -115,6 +125,13 @@ class ErrorReporter { print(Color, "%s", Str.str().c_str()); } + static void reportLocation(SanitizerEnvironmentTy &SE) { + print(BoldLightPurple, + "Triggered by thread <%u,%u,%u> block <%u,%u,%u> PC %p\n", + SE.ThreadId[0], SE.ThreadId[1], SE.ThreadId[2], SE.BlockId[0], + SE.BlockId[1], SE.BlockId[2], (void *)SE.PC); + } + /// Pretty print a stack trace. static void reportStackTrace(StringRef StackTrace) { if (StackTrace.empty()) @@ -280,6 +297,16 @@ class ErrorReporter { std::function AsyncInfoWrapperMatcher) { assert(AsyncInfoWrapperMatcher && "A matcher is required"); + SanitizerEnvironmentTy *SE = nullptr; + for (auto &It : Device.SanitizerEnvironmentMap) { + if (It.second->ErrorCode == SanitizerEnvironmentTy::NONE) + continue; + if (SE) + reportWarning( + "Multiple errors encountered, information might be inaccurate."); + SE = It.second; + } + uint32_t Idx = 0; for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { auto KTI = KTIR.getKernelTraceInfo(I); @@ -298,7 +325,24 @@ class ErrorReporter { llvm::omp::prettifyFunctionName(KTI.Kernel->getName()); reportError("Kernel '%s'", PrettyKernelName.c_str()); } - reportError("execution interrupted by hardware trap instruction"); + assert((!SE || SE->ErrorCode != SanitizerEnvironmentTy::NONE) && + "Unexpected sanitizer environment"); + if (!SE) { + reportError("execution stopped, reason is unknown"); + print(Yellow, "Compile with '-mllvm -amdgpu-enable-offload-sanitizer' " + "improved diagnosis\n"); + } else { + switch (SE->ErrorCode) { + case SanitizerEnvironmentTy::TRAP: + reportError("execution interrupted by hardware trap instruction"); + break; + default: + reportError( + "execution stopped, reason is unknown due to invalid error code"); + } + + reportLocation(*SE); + } if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) { if (!KTI.LaunchTrace.empty()) reportStackTrace(KTI.LaunchTrace); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 41cc0f286a581f..38da73528a1599 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -712,6 +712,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image, uint64_t PoolSize); + /// Setup the sanitizer environment to receive sanitizer information from the + /// device. + Error setupSanitizerEnvironment(GenericPluginTy &Plugin, + DeviceImageTy &Image); + // Setup the RPC server for this device if needed. This may not run on some // plugins like the CPU targets. By default, it will not be executed so it is // up to the target to override this using the shouldSetupRPCServer function. @@ -931,6 +936,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; + DenseMap SanitizerEnvironmentMap; + /// Reference to the underlying plugin that created this device. GenericPluginTy &Plugin; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 25b815b7f96694..33c9fa04797ce2 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -927,6 +927,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, return std::move(Err); } + if (auto Err = setupSanitizerEnvironment(Plugin, *Image)) + return std::move(Err); + if (auto Err = setupRPCServer(Plugin, *Image)) return std::move(Err); @@ -1032,6 +1035,23 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin, return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal); } +Error GenericDeviceTy::setupSanitizerEnvironment(GenericPluginTy &Plugin, + DeviceImageTy &Image) { + GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); + if (!GHandler.isSymbolInImage(*this, Image, "__sanitizer_environment_ptr")) + return Plugin::success(); + + auto *&SanitizerEnvironment = SanitizerEnvironmentMap[&Image]; + SanitizerEnvironment = reinterpret_cast(allocate( + sizeof(*SanitizerEnvironment), &SanitizerEnvironment, TARGET_ALLOC_HOST)); + memset(SanitizerEnvironment, '\0', sizeof(SanitizerEnvironmentTy)); + + GlobalTy SanitizerEnvironmentGlobal("__sanitizer_environment_ptr", + sizeof(SanitizerEnvironment), + &SanitizerEnvironment); + return GHandler.writeGlobalToDevice(*this, Image, SanitizerEnvironmentGlobal); +} + Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image) { // The plugin either does not need an RPC server or it is unavailible. diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c index 91c4c7229159bc..d9162b454425ca 100644 --- a/offload/test/sanitizer/kernel_trap.c +++ b/offload/test/sanitizer/kernel_trap.c @@ -1,11 +1,14 @@ // clang-format off // RUN: %libomptarget-compile-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN // RUN: %libomptarget-compile-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -27,17 +30,22 @@ int main(void) { #pragma omp target { } -#pragma omp target +#pragma omp target teams num_teams(32) thread_limit(128) { - __builtin_trap(); +#pragma omp parallel + if (omp_get_team_num() == 17 && omp_get_thread_num() == 42) + __builtin_trap(); } #pragma omp target { } } // clang-format off -// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)' -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 33 (__omp_offloading_{{.*}}_main_l33)' +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// SANIT: Triggered by thread <42,0,0> block <17,0,0> PC 0x{{.*}} // TRACE: launchKernel // NDEBG: main // DEBUG: main {{.*}}kernel_trap.c: diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp index c67b3857fabba1..e950e6cba4129e 100644 --- a/offload/test/sanitizer/kernel_trap.cpp +++ b/offload/test/sanitizer/kernel_trap.cpp @@ -1,11 +1,11 @@ // clang-format off // RUN: %libomptarget-compilexx-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK -// RUN: %libomptarget-compilexx-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compilexx-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -43,7 +43,8 @@ int main(void) { // clang-format off // CHECK: OFFLOAD ERROR: Kernel 'omp target in void cxx_function_name(int, S*) @ [[LINE:[0-9]+]] (__omp_offloading_{{.*}}__Z17cxx_function_nameI1SEviPT__l[[LINE]])' -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction // TRACE: launchKernel // NDEBG: cxx_function_name(int, S*) // NDEBG: main diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c new file mode 100644 index 00000000000000..379ca8362aa83d --- /dev/null +++ b/offload/test/sanitizer/kernel_trap_all.c @@ -0,0 +1,31 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + +#pragma omp target teams + { +#pragma omp parallel + __builtin_trap(); + } +} +// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20) +// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}} +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_trap_all.c: diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c index 391ff0c7dcaa4e..ec516a924e1eea 100644 --- a/offload/test/sanitizer/kernel_trap_async.c +++ b/offload/test/sanitizer/kernel_trap_async.c @@ -1,11 +1,11 @@ // clang-format off // RUN: %libomptarget-compileopt-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK -// RUN: %libomptarget-compileopt-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG -// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NOSAN +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN +// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,SANIT +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -36,7 +36,9 @@ int main(void) { // clang-format off // CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30) -// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis +// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction // TRACE: launchKernel // DEBUG: kernel_trap_async.c: // clang-format on diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c index f2e63794168b2b..e6a0ed65f5ec8d 100644 --- a/offload/test/sanitizer/kernel_trap_many.c +++ b/offload/test/sanitizer/kernel_trap_many.c @@ -1,8 +1,10 @@ // clang-format off // RUN: %libomptarget-compile-generic -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG,NOSAN // RUN: %libomptarget-compile-generic -g -// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG,NOSAN +// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT,TRACE,DEBUG // clang-format on // UNSUPPORTED: nvptx64-nvidia-cuda @@ -23,13 +25,16 @@ int main(void) { { } } -#pragma omp target +#pragma omp target thread_limit(1) { __builtin_trap(); } } -// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l26) -// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l28) +// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown +// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved +// diagnosis SANIT: OFFLOAD ERROR: execution interrupted by hardware trap +// instruction SANIT: Triggered by thread <0,0,0> block <0,0,0> PC 0x{{.*}} // TRACE: launchKernel // NDEBG: main // DEBUG: main {{.*}}kernel_trap_many.c: