Merge branch 'dev'

xmrig · Apr 1, 2019 · 9b61fb8 · 9b61fb8
2 parents 722a631 + 4630892
commit 9b61fb8
Show file tree

Hide file tree

Showing 38 changed files with 266 additions and 136 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# v2.14.2
+- [#260](https://github.com/xmrig/xmrig-nvidia/issues/260) :warning: For `cn/r` algorithm only supported version of CUDA is 10.1.
+- [#253](https://github.com/xmrig/xmrig-nvidia/pull/253) Fixed NVRTC dll copy when build miner.
+- [#255](https://github.com/xmrig/xmrig-nvidia/pull/255) Fixed CUDA8 support and added memory size display in summary.
+
 # v2.14.1
 - [#246](https://github.com/xmrig/xmrig-nvidia/issues/246) Fixed compatibility with old GPUs (compute capability < 3.5).
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -293,14 +293,11 @@ add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURC
 target_link_libraries(${CMAKE_PROJECT_NAME} xmrig-cuda ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${LIBS} ${EXTRA_LIBS} ${CPUID_LIB})
 
 if (WIN32)
-    if (CUDA_VERSION_MAJOR EQUAL 10)
-      add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
-          COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_100_0.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
-    else()
-        add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
-    endif()
+    file(GLOB NVRTCDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64*.dll")
+    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCDLL}" $<TARGET_FILE_DIR:xmrig-nvidia>)
 
+    file(GLOB NVRTCBUILTINDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64*.dll")
     add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $<TARGET_FILE_DIR:xmrig-nvidia>)
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCBUILTINDLL}" $<TARGET_FILE_DIR:xmrig-nvidia>)
 endif()
diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake
@@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50")
 
 # Fermi GPUs are only supported with CUDA < 9.0
 if (CUDA_VERSION VERSION_LESS 9.0)
-    list(APPEND DEFAULT_CUDA_ARCH "20")
+    list(APPEND DEFAULT_CUDA_ARCH "20;21")
 endif()
 
 # add Pascal support for CUDA >= 8.0
@@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
                             "Use '20' (for compute architecture 2.0) or higher.")
     endif()
 endforeach()
+list(SORT CUDA_ARCH)
 
 option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF)
 option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF)
@@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc")
     if (CUDA_VERSION VERSION_LESS 8.0)
         add_definitions(-D_FORCE_INLINES)
         add_definitions(-D_MWAITXINTRIN_H_INCLUDED)
+    elseif(CUDA_VERSION VERSION_LESS 9.0)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets")
     endif()
     foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
         # set flags to create device code for the given architecture
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-            "-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
+        if("${CUDA_ARCH_ELEM}" STREQUAL "21")
+            # "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode
+            # strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails)
+            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
+                    "--generate-code arch=compute_20,code=sm_21")
+        else()
+            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
+                    "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
+        endif()
     endforeach()
 
     # give each thread an independent default stream

diff --git a/src/Mem.cpp b/src/Mem.cpp
@@ -53,7 +53,7 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)
 
         uint8_t* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
         c->generated_code  = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
-        c->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
+        c->generated_code_double = reinterpret_cast<cn_mainloop_fun_ms_abi>(p + 0x2000);
 
         c->generated_code_data.variant = xmrig::VARIANT_MAX;
         c->generated_code_data.height = (uint64_t)(-1);

diff --git a/src/Summary.cpp b/src/Summary.cpp
@@ -5,7 +5,9 @@
  * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
  * Copyright 2016      Jay D Dee   <[email protected]>
  * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <[email protected]>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <[email protected]>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -65,10 +67,11 @@ static void print_algo(xmrig::Config *config)
 
 static void print_gpu(xmrig::Config *config)
 {
+    constexpr size_t byteToMiB = 1024u * 1024u;
     for (const xmrig::IThread *t : config->threads()) {
         auto thread = static_cast<const CudaThread *>(t);
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d"
-                                          : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d",
+        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB"
+                                          : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB",
                        thread->index(),
                        thread->pciDomainID(),
                        thread->pciBusID(),
@@ -82,7 +85,9 @@ static void print_gpu(xmrig::Config *config)
                        thread->bsleep(),
                        thread->arch()[0],
                        thread->arch()[1],
-                       thread->smx()
+                       thread->smx(),
+                       thread->memoryFree() / byteToMiB,
+                       thread->memoryTotal() / byteToMiB
         );
     }
 }

diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp
@@ -55,22 +55,22 @@ bool CryptoNight::hash(const xmrig::Job &job, xmrig::JobResult &result, cryptoni
 xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ivybridge_asm             = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ryzen_asm                 = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_bulldozer_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm    = nullptr;
+xmrig::CpuThread::cn_mainloop_fun        cn_half_double_mainloop_sandybridge_asm    = nullptr;
 
 xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ivybridge_asm             = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ryzen_asm                 = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_bulldozer_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm    = nullptr;
+xmrig::CpuThread::cn_mainloop_fun        cn_trtl_double_mainloop_sandybridge_asm    = nullptr;
 
 xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ivybridge_asm              = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ryzen_asm                  = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_bulldozer_asm              = nullptr;
-xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm     = nullptr;
+xmrig::CpuThread::cn_mainloop_fun        cn_zls_double_mainloop_sandybridge_asm     = nullptr;
 
 xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ivybridge_asm           = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ryzen_asm               = nullptr;
 xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_bulldozer_asm           = nullptr;
-xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm  = nullptr;
+xmrig::CpuThread::cn_mainloop_fun        cn_double_double_mainloop_sandybridge_asm  = nullptr;
 
 template<typename T, typename U>
 static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask)
@@ -116,22 +116,22 @@ static void patchAsmVariants()
     cn_half_mainloop_ivybridge_asm              = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x0000);
     cn_half_mainloop_ryzen_asm                  = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x1000);
     cn_half_mainloop_bulldozer_asm              = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x2000);
-    cn_half_double_mainloop_sandybridge_asm     = reinterpret_cast<CpuThread::cn_mainloop_double_fun>  (base + 0x3000);
+    cn_half_double_mainloop_sandybridge_asm     = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x3000);
 
     cn_trtl_mainloop_ivybridge_asm              = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x4000);
     cn_trtl_mainloop_ryzen_asm                  = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x5000);
     cn_trtl_mainloop_bulldozer_asm              = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x6000);
-    cn_trtl_double_mainloop_sandybridge_asm     = reinterpret_cast<CpuThread::cn_mainloop_double_fun>  (base + 0x7000);
+    cn_trtl_double_mainloop_sandybridge_asm     = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x7000);
 
     cn_zls_mainloop_ivybridge_asm               = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x8000);
     cn_zls_mainloop_ryzen_asm                   = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0x9000);
     cn_zls_mainloop_bulldozer_asm               = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xA000);
-    cn_zls_double_mainloop_sandybridge_asm      = reinterpret_cast<CpuThread::cn_mainloop_double_fun>  (base + 0xB000);
+    cn_zls_double_mainloop_sandybridge_asm      = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xB000);
 
     cn_double_mainloop_ivybridge_asm            = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xC000);
     cn_double_mainloop_ryzen_asm                = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xD000);
     cn_double_mainloop_bulldozer_asm            = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xE000);
-    cn_double_double_mainloop_sandybridge_asm   = reinterpret_cast<CpuThread::cn_mainloop_double_fun>  (base + 0xF000);
+    cn_double_double_mainloop_sandybridge_asm   = reinterpret_cast<CpuThread::cn_mainloop_fun>         (base + 0xF000);
 
     patchCode(cn_half_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           CRYPTONIGHT_HALF_ITER,   CRYPTONIGHT_MASK);
     patchCode(cn_half_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               CRYPTONIGHT_HALF_ITER,   CRYPTONIGHT_MASK);

diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h
@@ -34,7 +34,7 @@
 #include "crypto/CryptoNight_constants.h"
 
 
-#ifdef _MSC_VER
+#if defined _MSC_VER || defined XMRIG_ARM
 #define ABI_ATTRIBUTE
 #else
 #define ABI_ATTRIBUTE __attribute__((ms_abi))
@@ -44,16 +44,14 @@ struct cryptonight_ctx;
 
 namespace xmrig {
     namespace CpuThread {
-        typedef void(*cn_mainloop_fun)(cryptonight_ctx*);
-        typedef void(*cn_mainloop_double_fun)(cryptonight_ctx*, cryptonight_ctx*);
+        typedef void(*cn_mainloop_fun)(cryptonight_ctx**);
     }
 
     class Job;
     class JobResult;
 }
 
-typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE;
-typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE;
+typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE;
 
 struct cryptonight_r_data {
     int variant;
@@ -70,7 +68,7 @@ struct cryptonight_ctx {
     const uint32_t* saes_table;
 
     cn_mainloop_fun_ms_abi generated_code;
-    cn_mainloop_double_fun_ms_abi generated_code_double;
+    cn_mainloop_fun_ms_abi generated_code_double;
     cryptonight_r_data generated_code_data;
     cryptonight_r_data generated_code_double_data;
 };

diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h
@@ -215,6 +215,32 @@ template<> inline constexpr Variant cn_base_variant<VARIANT_ZLS>()    { return V
 template<> inline constexpr Variant cn_base_variant<VARIANT_DOUBLE>() { return VARIANT_2; }
 
 
+inline Variant cn_base_variant(Variant variant)
+{
+    switch (variant) {
+    case VARIANT_0:
+    case VARIANT_XHV:
+    case VARIANT_XAO:
+        return VARIANT_0;
+
+    case VARIANT_1:
+    case VARIANT_TUBE:
+    case VARIANT_XTL:
+    case VARIANT_MSR:
+    case VARIANT_RTO:
+        return VARIANT_1;
+
+    case VARIANT_GPU:
+        return VARIANT_GPU;
+
+    default:
+        break;
+    }
+
+    return VARIANT_2;
+}
+
+
 template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }
 template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_WOW>()   { return true; }
 template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_4>()     { return true; }