Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
xmrig committed Apr 1, 2019
2 parents 722a631 + 4630892 commit 9b61fb8
Show file tree
Hide file tree
Showing 38 changed files with 266 additions and 136 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# v2.14.2
- [#260](https://github.com/xmrig/xmrig-nvidia/issues/260) :warning: For `cn/r` algorithm only supported version of CUDA is 10.1.
- [#253](https://github.com/xmrig/xmrig-nvidia/pull/253) Fixed NVRTC dll copy when build miner.
- [#255](https://github.com/xmrig/xmrig-nvidia/pull/255) Fixed CUDA8 support and added memory size display in summary.

# v2.14.1
- [#246](https://github.com/xmrig/xmrig-nvidia/issues/246) Fixed compatibility with old GPUs (compute capability < 3.5).

Expand Down
13 changes: 5 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,11 @@ add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURC
target_link_libraries(${CMAKE_PROJECT_NAME} xmrig-cuda ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${LIBS} ${EXTRA_LIBS} ${CPUID_LIB})

if (WIN32)
if (CUDA_VERSION_MAJOR EQUAL 10)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_100_0.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
else()
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
endif()
file(GLOB NVRTCDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64*.dll")
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCDLL}" $<TARGET_FILE_DIR:xmrig-nvidia>)

file(GLOB NVRTCBUILTINDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64*.dll")
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCBUILTINDLL}" $<TARGET_FILE_DIR:xmrig-nvidia>)
endif()
16 changes: 13 additions & 3 deletions cmake/CUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50")

# Fermi GPUs are only supported with CUDA < 9.0
if (CUDA_VERSION VERSION_LESS 9.0)
list(APPEND DEFAULT_CUDA_ARCH "20")
list(APPEND DEFAULT_CUDA_ARCH "20;21")
endif()

# add Pascal support for CUDA >= 8.0
Expand Down Expand Up @@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
"Use '20' (for compute architecture 2.0) or higher.")
endif()
endforeach()
list(SORT CUDA_ARCH)

option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF)
option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF)
Expand Down Expand Up @@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc")
if (CUDA_VERSION VERSION_LESS 8.0)
add_definitions(-D_FORCE_INLINES)
add_definitions(-D_MWAITXINTRIN_H_INCLUDED)
elseif(CUDA_VERSION VERSION_LESS 9.0)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets")
endif()
foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
# set flags to create device code for the given architecture
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
if("${CUDA_ARCH_ELEM}" STREQUAL "21")
# "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode
# strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"--generate-code arch=compute_20,code=sm_21")
else()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
endif()
endforeach()

# give each thread an independent default stream
Expand Down
2 changes: 1 addition & 1 deletion src/Mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)

uint8_t* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
c->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
c->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
c->generated_code_double = reinterpret_cast<cn_mainloop_fun_ms_abi>(p + 0x2000);

c->generated_code_data.variant = xmrig::VARIANT_MAX;
c->generated_code_data.height = (uint64_t)(-1);
Expand Down
13 changes: 9 additions & 4 deletions src/Summary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -65,10 +67,11 @@ static void print_algo(xmrig::Config *config)

static void print_gpu(xmrig::Config *config)
{
constexpr size_t byteToMiB = 1024u * 1024u;
for (const xmrig::IThread *t : config->threads()) {
auto thread = static_cast<const CudaThread *>(t);
Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d"
: " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d",
Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB"
: " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB",
thread->index(),
thread->pciDomainID(),
thread->pciBusID(),
Expand All @@ -82,7 +85,9 @@ static void print_gpu(xmrig::Config *config)
thread->bsleep(),
thread->arch()[0],
thread->arch()[1],
thread->smx()
thread->smx(),
thread->memoryFree() / byteToMiB,
thread->memoryTotal() / byteToMiB
);
}
}
Expand Down
16 changes: 8 additions & 8 deletions src/crypto/CryptoNight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,22 @@ bool CryptoNight::hash(const xmrig::Job &job, xmrig::JobResult &result, cryptoni
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr;

xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;

xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;

xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;

template<typename T, typename U>
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask)
Expand Down Expand Up @@ -116,22 +116,22 @@ static void patchAsmVariants()
cn_half_mainloop_ivybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x0000);
cn_half_mainloop_ryzen_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x1000);
cn_half_mainloop_bulldozer_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x2000);
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_double_fun> (base + 0x3000);
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x3000);

cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x4000);
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x6000);
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_double_fun> (base + 0x7000);
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x7000);

cn_zls_mainloop_ivybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x8000);
cn_zls_mainloop_ryzen_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0x9000);
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xA000);
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_double_fun> (base + 0xB000);
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xB000);

cn_double_mainloop_ivybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xC000);
cn_double_mainloop_ryzen_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xD000);
cn_double_mainloop_bulldozer_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xE000);
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_double_fun> (base + 0xF000);
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<CpuThread::cn_mainloop_fun> (base + 0xF000);

patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK);
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK);
Expand Down
10 changes: 4 additions & 6 deletions src/crypto/CryptoNight.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "crypto/CryptoNight_constants.h"


#ifdef _MSC_VER
#if defined _MSC_VER || defined XMRIG_ARM
#define ABI_ATTRIBUTE
#else
#define ABI_ATTRIBUTE __attribute__((ms_abi))
Expand All @@ -44,16 +44,14 @@ struct cryptonight_ctx;

namespace xmrig {
namespace CpuThread {
typedef void(*cn_mainloop_fun)(cryptonight_ctx*);
typedef void(*cn_mainloop_double_fun)(cryptonight_ctx*, cryptonight_ctx*);
typedef void(*cn_mainloop_fun)(cryptonight_ctx**);
}

class Job;
class JobResult;
}

typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE;

struct cryptonight_r_data {
int variant;
Expand All @@ -70,7 +68,7 @@ struct cryptonight_ctx {
const uint32_t* saes_table;

cn_mainloop_fun_ms_abi generated_code;
cn_mainloop_double_fun_ms_abi generated_code_double;
cn_mainloop_fun_ms_abi generated_code_double;
cryptonight_r_data generated_code_data;
cryptonight_r_data generated_code_double_data;
};
Expand Down
26 changes: 26 additions & 0 deletions src/crypto/CryptoNight_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,32 @@ template<> inline constexpr Variant cn_base_variant<VARIANT_ZLS>() { return V
template<> inline constexpr Variant cn_base_variant<VARIANT_DOUBLE>() { return VARIANT_2; }


inline Variant cn_base_variant(Variant variant)
{
switch (variant) {
case VARIANT_0:
case VARIANT_XHV:
case VARIANT_XAO:
return VARIANT_0;

case VARIANT_1:
case VARIANT_TUBE:
case VARIANT_XTL:
case VARIANT_MSR:
case VARIANT_RTO:
return VARIANT_1;

case VARIANT_GPU:
return VARIANT_GPU;

default:
break;
}

return VARIANT_2;
}


template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_WOW>() { return true; }
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_4>() { return true; }
Expand Down
Loading

0 comments on commit 9b61fb8

Please sign in to comment.