From ced6999839030323e4f4deb3bf6339115ae13f09 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 14 Mar 2019 15:06:15 +0700 Subject: [PATCH 1/6] Unified ASM functions signature --- src/Mem.cpp | 2 +- src/crypto/CryptoNight.cpp | 16 ++--- src/crypto/CryptoNight.h | 10 ++- src/crypto/CryptoNight_constants.h | 26 +++++++ src/crypto/CryptoNight_x86.h | 70 +++++++++---------- .../asm/CryptonightR_soft_aes_template.inc | 2 + .../CryptonightR_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightR_template.inc | 5 ++ src/crypto/asm/CryptonightR_template_win.inc | 5 ++ .../asm/CryptonightWOW_soft_aes_template.inc | 2 + .../CryptonightWOW_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightWOW_template.inc | 5 ++ .../asm/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../asm/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../asm/cn2/cnv2_main_loop_ivybridge.inc | 2 + src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc | 2 + .../asm/cn2/cnv2_rwz_double_main_loop.inc | 3 + src/crypto/asm/cn2/cnv2_rwz_main_loop.inc | 2 + src/crypto/asm/cn_main_loop.S | 2 - .../CryptonightR_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightR_template_win.inc | 5 ++ .../CryptonightWOW_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../win64/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../win64/cn2/cnv2_main_loop_ivybridge.inc | 2 + .../asm/win64/cn2/cnv2_main_loop_ryzen.inc | 2 + .../win64/cn2/cnv2_rwz_double_main_loop.inc | 3 + .../asm/win64/cn2/cnv2_rwz_main_loop.inc | 2 + 30 files changed, 144 insertions(+), 52 deletions(-) diff --git a/src/Mem.cpp b/src/Mem.cpp index 4fa794d6..01a2157b 100644 --- a/src/Mem.cpp +++ b/src/Mem.cpp @@ -53,7 +53,7 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); c->generated_code = reinterpret_cast(p); - c->generated_code_double = reinterpret_cast(p + 0x2000); + c->generated_code_double = reinterpret_cast(p + 0x2000); c->generated_code_data.variant = xmrig::VARIANT_MAX; c->generated_code_data.height = (uint64_t)(-1); diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index 35ce910f..74a47f3e 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -55,22 +55,22 @@ bool CryptoNight::hash(const xmrig::Job &job, xmrig::JobResult &result, cryptoni xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; template static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) @@ -116,22 +116,22 @@ static void patchAsmVariants() cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); - cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); + cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); - cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); + cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index aacd9da1..91c7fe01 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -34,7 +34,7 @@ #include "crypto/CryptoNight_constants.h" -#ifdef _MSC_VER +#if defined _MSC_VER || defined XMRIG_ARM #define ABI_ATTRIBUTE #else #define ABI_ATTRIBUTE __attribute__((ms_abi)) @@ -44,16 +44,14 @@ struct cryptonight_ctx; namespace xmrig { namespace CpuThread { - typedef void(*cn_mainloop_fun)(cryptonight_ctx*); - typedef void(*cn_mainloop_double_fun)(cryptonight_ctx*, cryptonight_ctx*); + typedef void(*cn_mainloop_fun)(cryptonight_ctx**); } class Job; class JobResult; } -typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE; -typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE; +typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE; struct cryptonight_r_data { int variant; @@ -70,7 +68,7 @@ struct cryptonight_ctx { const uint32_t* saes_table; cn_mainloop_fun_ms_abi generated_code; - cn_mainloop_double_fun_ms_abi generated_code_double; + cn_mainloop_fun_ms_abi generated_code_double; cryptonight_r_data generated_code_data; cryptonight_r_data generated_code_double_data; }; diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h index 58a3915f..1bc06a3b 100644 --- a/src/crypto/CryptoNight_constants.h +++ b/src/crypto/CryptoNight_constants.h @@ -215,6 +215,32 @@ template<> inline constexpr Variant cn_base_variant() { return V template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +inline Variant cn_base_variant(Variant variant) +{ + switch (variant) { + case VARIANT_0: + case VARIANT_XHV: + case VARIANT_XAO: + return VARIANT_0; + + case VARIANT_1: + case VARIANT_TUBE: + case VARIANT_XTL: + case VARIANT_MSR: + case VARIANT_RTO: + return VARIANT_1; + + case VARIANT_GPU: + return VARIANT_GPU; + + default: + break; + } + + return VARIANT_2; +} + + template inline constexpr bool cn_is_cryptonight_r() { return false; } template<> inline constexpr bool cn_is_cryptonight_r() { return true; } template<> inline constexpr bool cn_is_cryptonight_r() { return true; } diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 8b9ea783..202b662a 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -590,7 +590,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } ctx[0]->saes_table = (const uint32_t*)saes_table; - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } else { #endif @@ -750,32 +750,32 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_ #ifndef XMRIG_NO_ASM -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); -extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); @@ -824,64 +824,64 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ if (VARIANT == xmrig::VARIANT_2) { if (ASM == xmrig::ASM_INTEL) { - cnv2_mainloop_ivybridge_asm(ctx[0]); + cnv2_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cnv2_mainloop_ryzen_asm(ctx[0]); + cnv2_mainloop_ryzen_asm(ctx); } else { - cnv2_mainloop_bulldozer_asm(ctx[0]); + cnv2_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_HALF) { if (ASM == xmrig::ASM_INTEL) { - cn_half_mainloop_ivybridge_asm(ctx[0]); + cn_half_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_half_mainloop_ryzen_asm(ctx[0]); + cn_half_mainloop_ryzen_asm(ctx); } else { - cn_half_mainloop_bulldozer_asm(ctx[0]); + cn_half_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_TRTL) { if (ASM == xmrig::ASM_INTEL) { - cn_trtl_mainloop_ivybridge_asm(ctx[0]); + cn_trtl_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_trtl_mainloop_ryzen_asm(ctx[0]); + cn_trtl_mainloop_ryzen_asm(ctx); } else { - cn_trtl_mainloop_bulldozer_asm(ctx[0]); + cn_trtl_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_mainloop_asm(ctx[0]); + cnv2_rwz_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { if (ASM == xmrig::ASM_INTEL) { - cn_zls_mainloop_ivybridge_asm(ctx[0]); + cn_zls_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_zls_mainloop_ryzen_asm(ctx[0]); + cn_zls_mainloop_ryzen_asm(ctx); } else { - cn_zls_mainloop_bulldozer_asm(ctx[0]); + cn_zls_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_DOUBLE) { if (ASM == xmrig::ASM_INTEL) { - cn_double_mainloop_ivybridge_asm(ctx[0]); + cn_double_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_double_mainloop_ryzen_asm(ctx[0]); + cn_double_mainloop_ryzen_asm(ctx); } else { - cn_double_mainloop_bulldozer_asm(ctx[0]); + cn_double_mainloop_bulldozer_asm(ctx); } } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); @@ -910,25 +910,25 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory)); if (VARIANT == xmrig::VARIANT_2) { - cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cnv2_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_HALF) { - cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_half_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_TRTL) { - cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_trtl_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]); + cnv2_rwz_double_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { - cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_zls_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_DOUBLE) { - cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_double_double_mainloop_sandybridge_asm(ctx); } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code_double(ctx[0], ctx[1]); + ctx[0]->generated_code_double(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc index 40c7874d..e9e1bb4f 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightR_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc index d771f69c..589192ca 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc index 8ecab724..61b6b985 100644 --- a/src/crypto/asm/CryptonightR_template.inc +++ b/src/crypto/asm/CryptonightR_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightR_template_double_end) ALIGN(64) FN_PREFIX(CryptonightR_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ FN_PREFIX(CryptonightR_template_end): ALIGN(64) FN_PREFIX(CryptonightR_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc index a170f2d2..1bb89eb1 100644 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ b/src/crypto/asm/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc index feea3949..53b7016a 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightWOW_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc index 6ebad99f..b3202b78 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc index 7183a659..82d455f6 100644 --- a/src/crypto/asm/CryptonightWOW_template.inc +++ b/src/crypto/asm/CryptonightWOW_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) ALIGN(64) FN_PREFIX(CryptonightWOW_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ FN_PREFIX(CryptonightWOW_template_end): ALIGN(64) FN_PREFIX(CryptonightWOW_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc index c5652e27..644c01f1 100644 --- a/src/crypto/asm/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index aa5101a8..1710cac7 100644 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc index c764501d..b881b669 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 06f1d28b..863673de 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc index 5dbf5917..8ccc5e17 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc index d2d87173..d9bfc9c1 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc index 021f787e..b59c02d6 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 347f0e08..7aed6c20 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -49,7 +49,6 @@ ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_double_main_loop_sandybridge.inc" add rsp, 48 ret 0 @@ -68,7 +67,6 @@ ALIGN(64) FN_PREFIX(cnv2_rwz_double_mainloop_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_rwz_double_main_loop.inc" add rsp, 48 ret 0 diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc index d6d393a9..6898a604 100644 --- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc index 60ee3441..d24eedaa 100644 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc index 68209036..1c73f77c 100644 --- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc index 9db2cf39..55c8c8df 100644 --- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc index 05af9393..85077a20 100644 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc index 03a36f48..f17017a0 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc index 77e28f80..a12ac35c 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc index 7e5c127f..044235d8 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc index 69ca8793..97fb691b 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc index 99317730..e2b7a5fc 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi From 96caf2345be34982686e8086b9bad99b296060fe Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Mon, 18 Mar 2019 11:19:19 -0600 Subject: [PATCH 2/6] Repair NVRTC DLL copy phase for all CUDA versions forever --- CMakeLists.txt | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fef7d491..7cebb691 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -293,14 +293,11 @@ add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURC target_link_libraries(${CMAKE_PROJECT_NAME} xmrig-cuda ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${LIBS} ${EXTRA_LIBS} ${CPUID_LIB}) if (WIN32) - if (CUDA_VERSION_MAJOR EQUAL 10) - add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_100_0.dll" $) - else() - add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $) - endif() + file(GLOB NVRTCDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64*.dll") + add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCDLL}" $) + file(GLOB NVRTCBUILTINDLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64*.dll") add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $) + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${NVRTCBUILTINDLL}" $) endif() From 4570e5d23b6a4f9aba15d500d5b230856f499aa4 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Wed, 20 Mar 2019 13:57:16 -0600 Subject: [PATCH 3/6] CUDA8 would crash on init randomly but constantly: moving the memory-size-check to before the cudaGetDeviceProperties fixes it; Bonus, now the Summary line shows GPU memory sizes at startup --- cmake/CUDA.cmake | 16 ++++++++++++--- src/Summary.cpp | 11 ++++++---- src/nvidia/cryptonight.h | 2 ++ src/nvidia/cuda_extra.cu | 42 ++++++++++++++++++++------------------ src/workers/CudaThread.cpp | 6 ++++++ src/workers/CudaThread.h | 8 ++++++-- 6 files changed, 56 insertions(+), 29 deletions(-) diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake index 9cef12a5..b8d70c78 100644 --- a/cmake/CUDA.cmake +++ b/cmake/CUDA.cmake @@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50") # Fermi GPUs are only supported with CUDA < 9.0 if (CUDA_VERSION VERSION_LESS 9.0) - list(APPEND DEFAULT_CUDA_ARCH "20") + list(APPEND DEFAULT_CUDA_ARCH "20 21") endif() # add Pascal support for CUDA >= 8.0 @@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) "Use '20' (for compute architecture 2.0) or higher.") endif() endforeach() +list(SORT CUDA_ARCH) option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF) option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF) @@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc") if (CUDA_VERSION VERSION_LESS 8.0) add_definitions(-D_FORCE_INLINES) add_definitions(-D_MWAITXINTRIN_H_INCLUDED) + elseif(CUDA_VERSION VERSION_LESS 9.0) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets") endif() foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) # set flags to create device code for the given architecture - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - "-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") + if("${CUDA_ARCH_ELEM}" STREQUAL "21") + # "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode + # strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "--generate-code arch=compute_20,code=sm_21") + else() + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") + endif() endforeach() # give each thread an independent default stream diff --git a/src/Summary.cpp b/src/Summary.cpp index 44b6d6f8..26526d5a 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -65,10 +65,11 @@ static void print_algo(xmrig::Config *config) static void print_gpu(xmrig::Config *config) { + constexpr size_t byteToMiB = 1024u * 1024u; for (const xmrig::IThread *t : config->threads()) { - auto thread = static_cast(t); - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d" - : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d", + auto thread = dynamic_cast(t); + Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB" + : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB", thread->index(), thread->pciDomainID(), thread->pciBusID(), @@ -82,7 +83,9 @@ static void print_gpu(xmrig::Config *config) thread->bsleep(), thread->arch()[0], thread->arch()[1], - thread->smx() + thread->smx(), + thread->memoryFree() / byteToMiB, + thread->memoryTotal() / byteToMiB ); } } diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h index 673c7ae8..10bd2807 100644 --- a/src/nvidia/cryptonight.h +++ b/src/nvidia/cryptonight.h @@ -50,6 +50,8 @@ typedef struct { int device_bsleep; int device_clockRate; int device_memoryClockRate; + size_t device_memoryTotal; + size_t device_memoryFree; uint32_t device_pciBusID; uint32_t device_pciDeviceID; uint32_t device_pciDomainID; diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index 7c53ad49..c6a7e623 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -529,6 +529,28 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2) return 1; } + // a device must be selected to get the right memory usage later on + if (cudaSetDevice(ctx->device_id) != cudaSuccess) { + printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id); + return 2; + } + + // trigger that a context on the gpu will be allocated + int* tmp; + if (cudaMalloc(&tmp, 256) != cudaSuccess) { + printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id); + return 3; + } + + size_t freeMemory = 0; + size_t totalMemory = 0; + + CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); + CUDA_CHECK(ctx->device_id, cudaFree(tmp)); + CUDA_CHECK(ctx->device_id, cudaDeviceReset()); + ctx->device_memoryFree = freeMemory; + ctx->device_memoryTotal = totalMemory; + cudaDeviceProp props; err = cudaGetDeviceProperties(&props, ctx->device_id); if (err != cudaSuccess) { @@ -593,26 +615,6 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2) maxMemUsage = size_t(1024u) * byteToMiB; } - // a device must be selected to get the right memory usage later on - if (cudaSetDevice(ctx->device_id) != cudaSuccess) { - printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id); - return 2; - } - - // trigger that a context on the gpu will be allocated - int* tmp; - if (cudaMalloc(&tmp, 256) != cudaSuccess) { - printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id); - return 3; - } - - size_t freeMemory = 0; - size_t totalMemory = 0; - - CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); - CUDA_CHECK(ctx->device_id, cudaFree(tmp)); - CUDA_CHECK(ctx->device_id, cudaDeviceReset()); - const size_t hashMemSize = xmrig::cn_select_memory(algo); # ifdef _WIN32 /* We use in windows bfactor (split slow kernel into smaller parts) to avoid diff --git a/src/workers/CudaThread.cpp b/src/workers/CudaThread.cpp index d322aa88..6f3de186 100644 --- a/src/workers/CudaThread.cpp +++ b/src/workers/CudaThread.cpp @@ -35,6 +35,8 @@ CudaThread::CudaThread() : m_bsleep(0), m_clockRate(0), m_memoryClockRate(0), + m_memoryTotal(0), + m_memoryFree(0), m_nvmlId(-1), m_smx(0), m_threads(0), @@ -58,6 +60,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori m_bsleep(ctx.device_bsleep), m_clockRate(ctx.device_clockRate), m_memoryClockRate(ctx.device_memoryClockRate), + m_memoryTotal(ctx.device_memoryTotal), + m_memoryFree(ctx.device_memoryFree), m_nvmlId(-1), m_smx(ctx.device_mpcount), m_threads(ctx.device_threads), @@ -145,6 +149,8 @@ bool CudaThread::init(xmrig::Algo algorithm) m_clockRate = ctx.device_clockRate; m_memoryClockRate = ctx.device_memoryClockRate; + m_memoryTotal = ctx.device_memoryTotal; + m_memoryFree = ctx.device_memoryFree; m_pciBusID = ctx.device_pciBusID; m_pciDeviceID = ctx.device_pciDeviceID; m_pciDomainID = ctx.device_pciDomainID; diff --git a/src/workers/CudaThread.h b/src/workers/CudaThread.h index a9a00076..58603464 100644 --- a/src/workers/CudaThread.h +++ b/src/workers/CudaThread.h @@ -48,6 +48,8 @@ class CudaThread : public xmrig::IThread inline int bsleep() const { return m_bsleep; } inline int clockRate() const { return m_clockRate; } inline int memoryClockRate() const { return m_memoryClockRate; } + inline size_t memoryTotal() const { return m_memoryTotal; } + inline size_t memoryFree() const { return m_memoryFree; } inline int nvmlId() const { return m_nvmlId; } inline int smx() const { return m_smx; } inline int threads() const { return m_threads; } @@ -75,8 +77,8 @@ class CudaThread : public xmrig::IThread inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; } protected: -# ifdef APP_DEBUG - void print() const override; +# ifdef APP_DEBUG + void print() const override; # endif # ifndef XMRIG_NO_API @@ -99,6 +101,8 @@ class CudaThread : public xmrig::IThread int64_t m_affinity; size_t m_index; size_t m_threadId; + size_t m_memoryTotal; + size_t m_memoryFree; uint32_t m_pciBusID; uint32_t m_pciDeviceID; uint32_t m_pciDomainID; From 8913809f3e48cd7d6f709cf4b60f775b065026c2 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 21 Mar 2019 15:01:32 +0700 Subject: [PATCH 4/6] After PR cleanup, added copyright, reverted static_cast, class member sorting. --- src/Summary.cpp | 6 +++-- src/nvidia/cryptonight.h | 46 +++++++++++++++++++------------------ src/nvidia/cuda_extra.cu | 47 +++++++++++++++++++------------------- src/workers/CudaThread.cpp | 12 ++++++---- src/workers/CudaThread.h | 8 ++++--- 5 files changed, 64 insertions(+), 55 deletions(-) diff --git a/src/Summary.cpp b/src/Summary.cpp index 26526d5a..769cd314 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,7 +69,7 @@ static void print_gpu(xmrig::Config *config) { constexpr size_t byteToMiB = 1024u * 1024u; for (const xmrig::IThread *t : config->threads()) { - auto thread = dynamic_cast(t); + auto thread = static_cast(t); Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB" : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB", thread->index(), diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h index 10bd2807..3eceb1a3 100644 --- a/src/nvidia/cryptonight.h +++ b/src/nvidia/cryptonight.h @@ -1,26 +1,28 @@ /* XMRig -* Copyright 2010 Jeff Garzik -* Copyright 2012-2014 pooler -* Copyright 2014 Lucas Jones -* Copyright 2014-2016 Wolf9466 -* Copyright 2016 Jay D Dee -* Copyright 2017-2018 XMR-Stak , -* Copyright 2018 Lee Clagett -* Copyright 2016-2018 XMRig , -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -*/ + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ #pragma once diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index c6a7e623..a53377e8 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -1,27 +1,28 @@ /* XMRig -* Copyright 2010 Jeff Garzik -* Copyright 2012-2014 pooler -* Copyright 2014 Lucas Jones -* Copyright 2014-2016 Wolf9466 -* Copyright 2016 Jay D Dee -* Copyright 2017-2018 XMR-Stak , -* Copyright 2018 Lee Clagett -* Copyright 2016-2018 XMRig , -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -*/ - + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ #include #include diff --git a/src/workers/CudaThread.cpp b/src/workers/CudaThread.cpp index 6f3de186..9f169e50 100644 --- a/src/workers/CudaThread.cpp +++ b/src/workers/CudaThread.cpp @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,13 +37,13 @@ CudaThread::CudaThread() : m_bsleep(0), m_clockRate(0), m_memoryClockRate(0), - m_memoryTotal(0), - m_memoryFree(0), m_nvmlId(-1), m_smx(0), m_threads(0), m_affinity(-1), m_index(0), + m_memoryFree(0), + m_memoryTotal(0), m_threadId(0), m_pciBusID(0), m_pciDeviceID(0), @@ -60,13 +62,13 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori m_bsleep(ctx.device_bsleep), m_clockRate(ctx.device_clockRate), m_memoryClockRate(ctx.device_memoryClockRate), - m_memoryTotal(ctx.device_memoryTotal), - m_memoryFree(ctx.device_memoryFree), m_nvmlId(-1), m_smx(ctx.device_mpcount), m_threads(ctx.device_threads), m_affinity(affinity), m_index(static_cast(ctx.device_id)), + m_memoryFree(ctx.device_memoryFree), + m_memoryTotal(ctx.device_memoryTotal), m_threadId(0), m_pciBusID(ctx.device_pciBusID), m_pciDeviceID(ctx.device_pciDeviceID), diff --git a/src/workers/CudaThread.h b/src/workers/CudaThread.h index 58603464..f6636686 100644 --- a/src/workers/CudaThread.h +++ b/src/workers/CudaThread.h @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -100,9 +102,9 @@ class CudaThread : public xmrig::IThread int m_threads; int64_t m_affinity; size_t m_index; - size_t m_threadId; - size_t m_memoryTotal; size_t m_memoryFree; + size_t m_memoryTotal; + size_t m_threadId; uint32_t m_pciBusID; uint32_t m_pciDeviceID; uint32_t m_pciDomainID; From c68451b7c5e690348bf7e5920daf3f800a9fd0b8 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 23 Mar 2019 01:09:17 +0700 Subject: [PATCH 5/6] Fix CUDA 8.0 compile issue. --- cmake/CUDA.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake index b8d70c78..f5239568 100644 --- a/cmake/CUDA.cmake +++ b/cmake/CUDA.cmake @@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50") # Fermi GPUs are only supported with CUDA < 9.0 if (CUDA_VERSION VERSION_LESS 9.0) - list(APPEND DEFAULT_CUDA_ARCH "20 21") + list(APPEND DEFAULT_CUDA_ARCH "20;21") endif() # add Pascal support for CUDA >= 8.0 From 46308928166153d77514a6c31a3b0bc0b8b82ad7 Mon Sep 17 00:00:00 2001 From: xmrig Date: Mon, 1 Apr 2019 20:42:59 +0700 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6352060..1de1d7b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# v2.14.2 +- [#260](https://github.com/xmrig/xmrig-nvidia/issues/260) :warning: For `cn/r` algorithm only supported version of CUDA is 10.1. +- [#253](https://github.com/xmrig/xmrig-nvidia/pull/253) Fixed NVRTC dll copy when build miner. +- [#255](https://github.com/xmrig/xmrig-nvidia/pull/255) Fixed CUDA8 support and added memory size display in summary. + # v2.14.1 - [#246](https://github.com/xmrig/xmrig-nvidia/issues/246) Fixed compatibility with old GPUs (compute capability < 3.5).