diff --git a/CHANGELOG.md b/CHANGELOG.md index d94e86e0..3243dba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v2.12.0-beta +- [#235](https://github.com/xmrig/xmrig-nvidia/pull/235) Added support for new algorithm `cryptonight/wow`, short alias `cn/wow` (also known as CryptonightR), for upcoming [Wownero](http://wownero.org) fork on February 14. + # v2.11.0 - [#928](https://github.com/xmrig/xmrig/issues/928) Added support for new algorithm `cryptonight/gpu`, short alias `cn/gpu` (original name `cryptonight-gpu`), for upcoming [Ryo currency](https://ryo-currency.com) fork on February 14. diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bef1a22..2ba358c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ option(WITH_CN_GPU "CryptoNight-GPU support" ON) option(WITH_HTTPD "HTTP REST API" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) +option(WITH_ASM "Enable ASM PoW implementations" ON) option(BUILD_STATIC "Build static binary" OFF) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) @@ -122,9 +123,11 @@ set(SOURCES src/common/Platform.cpp src/core/Config.cpp src/core/Controller.cpp + src/Mem.cpp src/net/Network.cpp src/net/strategies/DonateStrategy.cpp src/nvidia/CudaCLI.cpp + src/nvidia/CudaCryptonightR_gen.cpp src/Summary.cpp src/workers/CudaWorker.cpp src/workers/CudaThread.cpp @@ -142,11 +145,17 @@ set(SOURCES_CRYPTO src/crypto/CryptoNight.cpp ) +if (WITH_ASM) + set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/asm/CryptonightR_template.h) + set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/CryptonightR_gen.cpp) +endif() + if (WIN32) set(SOURCES_OS res/app.rc src/App_win.cpp src/common/Platform_win.cpp + src/Mem_win.cpp ) add_definitions(/DWIN32) @@ -155,11 +164,13 @@ elseif (APPLE) set(SOURCES_OS src/App_unix.cpp src/common/Platform_mac.cpp + src/Mem_unix.cpp ) else() set(SOURCES_OS src/App_unix.cpp src/common/Platform_unix.cpp + src/Mem_unix.cpp ) set(EXTRA_LIBS pthread rt dl) @@ -169,7 +180,6 @@ add_definitions(/D__STDC_FORMAT_MACROS) add_definitions(/DUNICODE) add_definitions(/DXMRIG_NVIDIA_PROJECT) add_definitions(/DXMRIG_NO_LIBCPUID) -add_definitions(/DXMRIG_NO_ASM) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") @@ -205,6 +215,7 @@ endif() include(cmake/OpenSSL.cmake) include(cmake/cn-gpu.cmake) +include(cmake/asm.cmake) CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H) if (HAVE_SYSLOG_H) @@ -250,5 +261,18 @@ if (WITH_DEBUG_LOG) add_definitions(/DAPP_DEBUG) endif() -add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_NVML} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} xmrig-cuda ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${LIBS} ${EXTRA_LIBS} ${CPUID_LIB}) +add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_NVML} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${CN_GPU_SOURCES} ${XMRIG_ASM_SOURCES}) +target_link_libraries(${CMAKE_PROJECT_NAME} xmrig-cuda ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${LIBS} ${EXTRA_LIBS} ${CPUID_LIB}) + +if (WIN32) + if (CUDA_VERSION_MAJOR EQUAL 10) + add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_100_0.dll" $) + else() + add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $) + endif() + + add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc-builtins64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" $) +endif() diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake index 45642c4f..7b19c1c9 100644 --- a/cmake/CUDA.cmake +++ b/cmake/CUDA.cmake @@ -23,7 +23,10 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") set(CUDA_STATIC ON) find_package(CUDA 7.5 REQUIRED) -set(LIBS ${LIBS} ${CUDA_LIBRARIES}) +find_library(CUDA_LIB libcuda cuda HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBCUDA_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64) +find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64) + +set(LIBS ${LIBS} ${CUDA_LIBRARIES} ${CUDA_LIB} ${CUDA_NVRTC_LIB}) set(DEFAULT_CUDA_ARCH "30;50") diff --git a/cmake/asm.cmake b/cmake/asm.cmake new file mode 100644 index 00000000..1e0d8308 --- /dev/null +++ b/cmake/asm.cmake @@ -0,0 +1,45 @@ +if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) + set(XMRIG_ASM_LIBRARY "xmrig-asm") + + if (CMAKE_C_COMPILER_ID MATCHES MSVC) + enable_language(ASM_MASM) + + if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) + set(XMRIG_ASM_FILE + "src/crypto/asm/cn_main_loop.asm" + "src/crypto/asm/CryptonightR_template.asm" + ) + else() + set(XMRIG_ASM_FILE + "src/crypto/asm/win64/cn_main_loop.asm" + "src/crypto/asm/win64/CryptonightR_template.asm" + ) + endif() + + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM) + else() + enable_language(ASM) + + if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) + set(XMRIG_ASM_FILE + "src/crypto/asm/win64/cn_main_loop.S" + "src/crypto/asm/win64/CryptonightR_template.S" + ) + else() + set(XMRIG_ASM_FILE + "src/crypto/asm/cn_main_loop.S" + "src/crypto/asm/CryptonightR_template.S" + ) + endif() + + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C) + endif() + + add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE}) + set(XMRIG_ASM_SOURCES "") + set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) +else() + set(XMRIG_ASM_SOURCES "") + set(XMRIG_ASM_LIBRARY "") + add_definitions(/DXMRIG_NO_ASM) +endif() diff --git a/src/App.cpp b/src/App.cpp index 52582e2a..c784add3 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -24,6 +24,7 @@ #include #include +#include #include "api/Api.h" @@ -67,6 +68,8 @@ App::App(int argc, char **argv) : uv_signal_init(uv_default_loop(), &m_sigHUP); uv_signal_init(uv_default_loop(), &m_sigINT); uv_signal_init(uv_default_loop(), &m_sigTERM); + + cuInit(0); } diff --git a/src/Mem.cpp b/src/Mem.cpp new file mode 100644 index 00000000..1aa2f018 --- /dev/null +++ b/src/Mem.cpp @@ -0,0 +1,75 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "common/utils/mm_malloc.h" +#include "crypto/CryptoNight.h" +#include "crypto/CryptoNight_constants.h" +#include "Mem.h" + + +bool Mem::m_enabled = true; +int Mem::m_flags = 0; + + +MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) +{ + using namespace xmrig; + + MemInfo info; + info.size = cn_select_memory(algorithm) * count; + + constexpr const size_t align_size = 2 * 1024 * 1024; + info.size = ((info.size + align_size - 1) / align_size) * align_size; + info.pages = info.size / align_size; + + allocate(info, m_enabled); + + for (size_t i = 0; i < count; ++i) { + cryptonight_ctx *c = static_cast(_mm_malloc(sizeof(cryptonight_ctx), 4096)); + c->memory = info.memory + (i * cn_select_memory(algorithm)); + + uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); + c->generated_code = reinterpret_cast(p); + c->generated_code_double = reinterpret_cast(p + 0x2000); + c->generated_code_height = (uint64_t)(-1); + c->generated_code_double_height = (uint64_t)(-1); + + ctx[i] = c; + } + + return info; +} + + +void Mem::release(cryptonight_ctx **ctx, size_t count, MemInfo &info) +{ + release(info); + + for (size_t i = 0; i < count; ++i) { + _mm_free(ctx[i]); + } +} + diff --git a/src/Mem.h b/src/Mem.h new file mode 100644 index 00000000..9e39e963 --- /dev/null +++ b/src/Mem.h @@ -0,0 +1,78 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_MEM_H +#define XMRIG_MEM_H + + +#include +#include + + +#include "common/xmrig.h" + + +struct cryptonight_ctx; + + +struct MemInfo +{ + alignas(16) uint8_t *memory; + + size_t hugePages; + size_t pages; + size_t size; +}; + + +class Mem +{ +public: + enum Flags { + HugepagesAvailable = 1, + HugepagesEnabled = 2, + Lock = 4 + }; + + static MemInfo create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count); + static void init(bool enabled); + static void release(cryptonight_ctx **ctx, size_t count, MemInfo &info); + + static void *allocateExecutableMemory(size_t size); + static void protectExecutableMemory(void *p, size_t size); + static void flushInstructionCache(void *p, size_t size); + + static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; } + +private: + static void allocate(MemInfo &info, bool enabled); + static void release(MemInfo &info); + + static int m_flags; + static bool m_enabled; +}; + + +#endif /* XMRIG_MEM_H */ diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp new file mode 100644 index 00000000..833c200c --- /dev/null +++ b/src/Mem_unix.cpp @@ -0,0 +1,114 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include + + +#include "common/log/Log.h" +#include "common/utils/mm_malloc.h" +#include "common/xmrig.h" +#include "crypto/CryptoNight.h" +#include "Mem.h" + + +void Mem::init(bool enabled) +{ + m_enabled = enabled; +} + + +void Mem::allocate(MemInfo &info, bool enabled) +{ + info.hugePages = 0; + + if (!enabled) { + info.memory = static_cast(_mm_malloc(info.size, 4096)); + + return; + } + +# if defined(__APPLE__) + info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0)); +# elif defined(__FreeBSD__) + info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0)); +# else + info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0)); +# endif + + if (info.memory == MAP_FAILED) { + return allocate(info, false);; + } + + info.hugePages = info.pages; + + if (madvise(info.memory, info.size, MADV_RANDOM | MADV_WILLNEED) != 0) { + LOG_ERR("madvise failed"); + } + + if (mlock(info.memory, info.size) == 0) { + m_flags |= Lock; + } +} + + +void Mem::release(MemInfo &info) +{ + if (info.hugePages) { + if (m_flags & Lock) { + munlock(info.memory, info.size); + } + + munmap(info.memory, info.size); + } + else { + _mm_free(info.memory); + } +} + + +void *Mem::allocateExecutableMemory(size_t size) +{ +# if defined(__APPLE__) + return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); +# else + return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +# endif +} + + +void Mem::protectExecutableMemory(void *p, size_t size) +{ + mprotect(p, size, PROT_READ | PROT_EXEC); +} + + +void Mem::flushInstructionCache(void *p, size_t size) +{ +# ifndef __FreeBSD__ + __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size); +# endif +} diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp new file mode 100644 index 00000000..27c1348b --- /dev/null +++ b/src/Mem_win.cpp @@ -0,0 +1,204 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include +#include +#include + + +#include "common/log/Log.h" +#include "common/utils/mm_malloc.h" +#include "common/xmrig.h" +#include "crypto/CryptoNight.h" +#include "crypto/CryptoNight_constants.h" +#include "Mem.h" + + +/***************************************************************** +SetLockPagesPrivilege: a function to obtain or +release the privilege of locking physical pages. + +Inputs: + +HANDLE hProcess: Handle for the process for which the +privilege is needed + +BOOL bEnable: Enable (TRUE) or disable? + +Return value: TRUE indicates success, FALSE failure. + +*****************************************************************/ +/** + * AWE Example: https://msdn.microsoft.com/en-us/library/windows/desktop/aa366531(v=vs.85).aspx + * Creating a File Mapping Using Large Pages: https://msdn.microsoft.com/en-us/library/aa366543(VS.85).aspx + */ +static BOOL SetLockPagesPrivilege() { + HANDLE token; + + if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) { + return FALSE; + } + + TOKEN_PRIVILEGES tp; + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) { + return FALSE; + } + + BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, NULL, NULL); + if (rc != TRUE || GetLastError() != ERROR_SUCCESS) { + return FALSE; + } + + CloseHandle(token); + + return TRUE; +} + + +static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) { + LSA_UNICODE_STRING lsaString; + + DWORD dwLen = (DWORD) wcslen(string); + lsaString.Buffer = (LPWSTR) string; + lsaString.Length = (USHORT)((dwLen) * sizeof(WCHAR)); + lsaString.MaximumLength = (USHORT)((dwLen + 1) * sizeof(WCHAR)); + return lsaString; +} + + +static BOOL ObtainLockPagesPrivilege() { + HANDLE token; + PTOKEN_USER user = NULL; + + if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) { + DWORD size = 0; + + GetTokenInformation(token, TokenUser, NULL, 0, &size); + if (size) { + user = (PTOKEN_USER) LocalAlloc(LPTR, size); + } + + GetTokenInformation(token, TokenUser, user, size, &size); + CloseHandle(token); + } + + if (!user) { + return FALSE; + } + + LSA_HANDLE handle; + LSA_OBJECT_ATTRIBUTES attributes; + ZeroMemory(&attributes, sizeof(attributes)); + + BOOL result = FALSE; + if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { + LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME)); + + if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { + LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); + result = TRUE; + } + + LsaClose(handle); + } + + LocalFree(user); + return result; +} + + +static BOOL TrySetLockPagesPrivilege() { + if (SetLockPagesPrivilege()) { + return TRUE; + } + + return ObtainLockPagesPrivilege() && SetLockPagesPrivilege(); +} + + +void Mem::init(bool enabled) +{ + m_enabled = enabled; + + if (enabled && TrySetLockPagesPrivilege()) { + m_flags |= HugepagesAvailable; + } +} + + +void Mem::allocate(MemInfo &info, bool enabled) +{ + info.hugePages = 0; + + if (!enabled) { + info.memory = static_cast(_mm_malloc(info.size, 4096)); + + return; + } + + info.memory = static_cast(VirtualAlloc(nullptr, info.size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE)); + if (info.memory) { + info.hugePages = info.pages; + + return; + } + + allocate(info, false); +} + + +void Mem::release(MemInfo &info) +{ + if (info.hugePages) { + VirtualFree(info.memory, 0, MEM_RELEASE); + } + else { + _mm_free(info.memory); + } +} + + +void *Mem::allocateExecutableMemory(size_t size) +{ + return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + + +void Mem::protectExecutableMemory(void *p, size_t size) +{ + DWORD oldProtect; + VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect); +} + + +void Mem::flushInstructionCache(void *p, size_t size) +{ + ::FlushInstructionCache(GetCurrentProcess(), p, size); +} diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 7bd01364..0d360a40 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -64,6 +64,7 @@ static AlgoData const algorithms[] = { { "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 }, { "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, + { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, @@ -127,7 +128,8 @@ static const char *variants[] = { "2", "half", "trtl", - "gpu" + "gpu", + "wow", }; diff --git a/src/common/net/Client.cpp b/src/common/net/Client.cpp index 8458b1e2..36ddba31 100644 --- a/src/common/net/Client.cpp +++ b/src/common/net/Client.cpp @@ -218,6 +218,10 @@ int64_t Client::submit(const JobResult &result) } # endif + if (m_job.algorithm().variant() == xmrig::VARIANT_WOW && m_job.id() != result.jobId) { + return -1; + } + using namespace rapidjson; # ifdef XMRIG_PROXY_PROJECT @@ -354,6 +358,14 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) } } + if (params.HasMember("height")) { + const rapidjson::Value &variant = params["height"]; + + if (variant.IsUint64()) { + job.setHeight(variant.GetUint64()); + } + } + if (!verifyAlgorithm(job.algorithm())) { *code = 6; diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index 7da2ed83..ee76f732 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -31,7 +31,7 @@ #include "common/net/Job.h" -static inline unsigned char hf_hex2bin(char c, bool &err) +unsigned char hf_hex2bin(char c, bool &err) { if (c >= '0' && c <= '9') { return c - '0'; @@ -48,7 +48,7 @@ static inline unsigned char hf_hex2bin(char c, bool &err) } -static inline char hf_bin2hex(unsigned char c) +char hf_bin2hex(unsigned char c) { if (c <= 0x9) { return '0' + c; @@ -66,7 +66,8 @@ Job::Job() : m_size(0), m_diff(0), m_target(0), - m_blob() + m_blob(), + m_height(0) { } @@ -80,6 +81,7 @@ Job::Job(int poolId, bool nicehash, const xmrig::Algorithm &algorithm, const xmr m_diff(0), m_target(0), m_blob(), + m_height(0), m_algorithm(algorithm), m_clientId(clientId) { @@ -132,6 +134,9 @@ bool Job::setBlob(const char *blob) else if (m_algorithm.variant() == xmrig::VARIANT_MSR && m_blob[0] >= 8) { m_algorithm.setVariant(xmrig::VARIANT_HALF); } + else if (m_algorithm.variant() == xmrig::VARIANT_WOW && m_blob[0] < 11) { + m_algorithm.setVariant(xmrig::VARIANT_2); + } } # ifdef XMRIG_PROXY_PROJECT @@ -195,6 +200,12 @@ void Job::setAlgorithm(const char *algo) } +void Job::setHeight(uint64_t height) +{ + m_height = height; +} + + bool Job::fromHex(const char* in, unsigned int len, unsigned char* out) { bool error = false; diff --git a/src/common/net/Job.h b/src/common/net/Job.h index 398e99ae..4fdac3ad 100644 --- a/src/common/net/Job.h +++ b/src/common/net/Job.h @@ -50,6 +50,7 @@ class Job bool setBlob(const char *blob); bool setTarget(const char *target); void setAlgorithm(const char *algo); + void setHeight(uint64_t height); inline bool isNicehash() const { return m_nicehash; } inline bool isValid() const { return m_size > 0 && m_diff > 0; } @@ -65,6 +66,7 @@ class Job inline uint32_t *nonce() { return reinterpret_cast(m_blob + 39); } inline uint32_t diff() const { return static_cast(m_diff); } inline uint64_t target() const { return m_target; } + inline uint64_t height() const { return m_height; } inline void reset() { m_size = 0; m_diff = 0; } inline void setClientId(const xmrig::Id &id) { m_clientId = id; } inline void setPoolId(int poolId) { m_poolId = poolId; } @@ -100,6 +102,7 @@ class Job uint64_t m_diff; uint64_t m_target; uint8_t m_blob[kMaxBlobSize]; + uint64_t m_height; xmrig::Algorithm m_algorithm; xmrig::Id m_clientId; xmrig::Id m_id; diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index ad1ac663..585e4596 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -412,6 +412,7 @@ void Pool::rebuild() m_algorithms.push_back(m_algorithm); # ifndef XMRIG_PROXY_PROJECT + addVariant(xmrig::VARIANT_WOW); addVariant(xmrig::VARIANT_2); addVariant(xmrig::VARIANT_1); addVariant(xmrig::VARIANT_0); diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 09bba08b..c861d11c 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -74,6 +74,7 @@ enum Variant { VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite) VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL) VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) + VARIANT_WOW = 12, // CryptoNightR (Wownero) VARIANT_MAX }; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index fb9777c7..903bf2d9 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -25,11 +25,13 @@ #include +#include #include "common/cpu/Cpu.h" +#include "common/log/Log.h" #include "common/net/Job.h" -#include "common/utils/mm_malloc.h" +#include "Mem.h" #include "crypto/CryptoNight.h" #include "crypto/CryptoNight_test.h" #include "crypto/CryptoNight_x86.h" @@ -40,25 +42,99 @@ alignas(16) cryptonight_ctx *CryptoNight::m_ctx = nullptr; xmrig::Algo CryptoNight::m_algorithm = xmrig::CRYPTONIGHT; xmrig::AlgoVerify CryptoNight::m_av = xmrig::VERIFY_HW_AES; - bool CryptoNight::hash(const Job &job, JobResult &result, cryptonight_ctx *ctx) { - fn(job.algorithm().variant())(job.blob(), job.size(), result.result, &ctx); + fn(job.algorithm().variant())(job.blob(), job.size(), result.result, &ctx, job.height()); return *reinterpret_cast(result.result + 24) < job.target(); } +#ifndef XMRIG_NO_ASM +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; + +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; +xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; + +template +static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) +{ + const uint8_t* p = reinterpret_cast(src); + + // Workaround for Visual Studio placing trampoline in debug builds. +# if defined(_MSC_VER) + if (p[0] == 0xE9) { + p += *(int32_t*)(p + 1) + 5; + } +# endif + + size_t size = 0; + while (*(uint32_t*)(p + size) != 0xDEADC0DE) { + ++size; + } + size += sizeof(uint32_t); + + memcpy((void*)dst, (const void*)src, size); + + uint8_t* patched_data = reinterpret_cast(dst); + for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) { + switch (*(uint32_t*)(patched_data + i)) { + case xmrig::CRYPTONIGHT_ITER: + *(uint32_t*)(patched_data + i) = iterations; + break; + + case xmrig::CRYPTONIGHT_MASK: + *(uint32_t*)(patched_data + i) = mask; + break; + } + } +} + +static void patchAsmVariants() +{ + const int allocation_size = 65536; + uint8_t *base = static_cast(Mem::allocateExecutableMemory(allocation_size)); + + cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); + cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); + cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); + cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + + cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); + cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); + cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + + patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); + patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); + patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); + patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); + + patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + + Mem::protectExecutableMemory(base, allocation_size); + Mem::flushInstructionCache(base, allocation_size); +} +#endif + bool CryptoNight::init(xmrig::Algo algorithm) { +#ifndef XMRIG_NO_ASM + patchAsmVariants(); +#endif + m_algorithm = algorithm; m_av = xmrig::Cpu::info()->hasAES() ? xmrig::VERIFY_HW_AES : xmrig::VERIFY_SOFT_AES; - const bool valid = selfTest(); - freeCtx(m_ctx); - m_ctx = nullptr; - - return valid; + return selfTest(); } @@ -91,11 +167,19 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif cryptonight_single_hash, cryptonight_single_hash, +# ifdef XMRIG_NO_ASM cryptonight_single_hash, - cryptonight_single_hash, +# else + cryptonight_single_hash_asm, +# endif + cryptonight_single_hash, +# ifdef XMRIG_NO_ASM cryptonight_single_hash, - cryptonight_single_hash, +# else + cryptonight_single_hash_asm, +# endif + cryptonight_single_hash, nullptr, nullptr, // VARIANT_TRTL @@ -106,6 +190,13 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_GPU # endif +# ifdef XMRIG_NO_ASM + cryptonight_single_hash, +# else + cryptonight_single_hash_asm, +# endif + cryptonight_single_hash, + # ifndef XMRIG_NO_AEON cryptonight_single_hash, cryptonight_single_hash, @@ -123,6 +214,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_HALF nullptr, nullptr, // VARIANT_TRTL nullptr, nullptr, // VARIANT_GPU + nullptr, nullptr, // VARIANT_WOW # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -130,6 +222,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif # ifndef XMRIG_NO_SUMO @@ -153,6 +246,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_HALF nullptr, nullptr, // VARIANT_TRTL nullptr, nullptr, // VARIANT_GPU + nullptr, nullptr, // VARIANT_WOW # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -160,6 +254,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif # ifndef XMRIG_NO_CN_PICO nullptr, nullptr, // VARIANT_0 @@ -173,10 +268,15 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_2 nullptr, nullptr, // VARIANT_HALF +# ifdef XMRIG_NO_ASM cryptonight_single_hash, - cryptonight_single_hash, +# else + cryptonight_single_hash_asm, +# endif + cryptonight_single_hash, nullptr, nullptr, // VARIANT_GPU + nullptr, nullptr, // VARIANT_WOW #else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -184,6 +284,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif }; @@ -204,38 +305,23 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif } -cryptonight_ctx *CryptoNight::createCtx(xmrig::Algo algorithm) -{ - cryptonight_ctx *ctx = static_cast(_mm_malloc(sizeof(cryptonight_ctx), 32)); - ctx->memory = static_cast(_mm_malloc(xmrig::cn_select_memory(algorithm), 32)); - - return ctx; -} - - -void CryptoNight::freeCtx(cryptonight_ctx *ctx) -{ - _mm_free(ctx->memory); - _mm_free(ctx); -} - - bool CryptoNight::selfTest() { using namespace xmrig; - m_ctx = createCtx(m_algorithm); - if (!m_ctx) { - return false; - } + MemInfo info = Mem::create(&m_ctx, m_algorithm, 1); if (m_algorithm == xmrig::CRYPTONIGHT) { - return verify(VARIANT_0, test_output_v0) && - verify(VARIANT_1, test_output_v1) && - verify(VARIANT_2, test_output_v2) && - verify(VARIANT_XTL, test_output_xtl) && - verify(VARIANT_MSR, test_output_msr) && - verify(VARIANT_XAO, test_output_xao) && - verify(VARIANT_RTO, test_output_rto) && + if (!verify2(VARIANT_WOW, test_input_WOW)) { + LOG_WARN("CryptonightR (Wownero) self-test failed"); + return false; + } + return verify(VARIANT_0, test_output_v0) && + verify(VARIANT_1, test_output_v1) && + verify(VARIANT_2, test_output_v2) && + verify(VARIANT_XTL, test_output_xtl) && + verify(VARIANT_MSR, test_output_msr) && + verify(VARIANT_XAO, test_output_xao) && + verify(VARIANT_RTO, test_output_rto) && # ifndef XMRIG_NO_CN_GPU verify(VARIANT_GPU, test_output_gpu) && # endif @@ -280,6 +366,60 @@ bool CryptoNight::verify(xmrig::Variant variant, const uint8_t *referenceValue) return false; } - func(test_input, 76, output, &m_ctx); + func(test_input, 76, output, &m_ctx, 0); + return memcmp(output, referenceValue, 32) == 0; } + +bool CryptoNight::verify2(xmrig::Variant variant, const char *test_data) +{ + cn_hash_fun func = fn(variant); + if (!func) { + return false; + } + + std::stringstream s(test_data); + std::string expected_hex; + std::string input_hex; + uint64_t height; + while (!s.eof()) + { + uint8_t referenceValue[32]; + uint8_t input[256]; + + s >> expected_hex; + s >> input_hex; + s >> height; + + if ((expected_hex.length() != 64) || (input_hex.length() > 512)) + { + return false; + } + + bool err = false; + + for (int i = 0; i < 32; ++i) + { + referenceValue[i] = (hf_hex2bin(expected_hex[i * 2], err) << 4) + hf_hex2bin(expected_hex[i * 2 + 1], err); + } + + const size_t input_len = input_hex.length() / 2; + for (size_t i = 0; i < input_len; ++i) + { + input[i] = (hf_hex2bin(input_hex[i * 2], err) << 4) + hf_hex2bin(input_hex[i * 2 + 1], err); + } + + if (err) + { + return false; + } + + uint8_t hash[32]; + func(input, input_len, hash, &m_ctx, height); + if (memcmp(hash, referenceValue, sizeof(hash)) != 0) + { + return false; + } + } + return true; +} diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index 9f2a6de4..166130e7 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -34,9 +34,31 @@ #include "crypto/CryptoNight_constants.h" +#ifdef _MSC_VER +#define ABI_ATTRIBUTE +#else +#define ABI_ATTRIBUTE __attribute__((ms_abi)) +#endif + +struct cryptonight_ctx; + +namespace xmrig { + namespace CpuThread { + typedef void(*cn_mainloop_fun)(cryptonight_ctx*); + typedef void(*cn_mainloop_double_fun)(cryptonight_ctx*, cryptonight_ctx*); + } +} + +typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE; +typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE; + struct cryptonight_ctx { - uint8_t state[224]; - uint8_t *memory; + alignas(16) uint8_t state[224]; + alignas(16) uint8_t *memory; + cn_mainloop_fun_ms_abi generated_code; + cn_mainloop_double_fun_ms_abi generated_code_double; + uint64_t generated_code_height; + uint64_t generated_code_double_height; }; @@ -47,19 +69,18 @@ class JobResult; class CryptoNight { public: - typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx); + typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height); static inline cn_hash_fun fn(xmrig::Variant variant) { return fn(m_algorithm, m_av, variant); } static bool hash(const Job &job, JobResult &result, cryptonight_ctx *ctx); static bool init(xmrig::Algo algorithm); static cn_hash_fun fn(xmrig::Algo algorithm, xmrig::AlgoVerify av, xmrig::Variant variant); - static cryptonight_ctx *createCtx(xmrig::Algo algorithm); - static void freeCtx(cryptonight_ctx *ctx); private: static bool selfTest(); static bool verify(xmrig::Variant variant, const uint8_t *referenceValue); + static bool verify2(xmrig::Variant variant, const char *test_data); alignas(16) static cryptonight_ctx *m_ctx; static xmrig::Algo m_algorithm; diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h index f7a9542b..00563d2d 100644 --- a/src/crypto/CryptoNight_constants.h +++ b/src/crypto/CryptoNight_constants.h @@ -126,6 +126,7 @@ template inline constexpr uint32_t cn_select_iter() template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } +template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } @@ -192,6 +193,7 @@ template<> inline constexpr Variant cn_base_variant() { return VA template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } } /* namespace xmrig */ diff --git a/src/crypto/CryptoNight_monero.h b/src/crypto/CryptoNight_monero.h index 20fcfacb..9c26ae5f 100644 --- a/src/crypto/CryptoNight_monero.h +++ b/src/crypto/CryptoNight_monero.h @@ -147,4 +147,32 @@ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ } while (0) #endif + +#define SWAP32LE(x) x +#define SWAP64LE(x) x +#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length)) + +#include "variant4_random_math.h" + +#define VARIANT4_RANDOM_MATH_INIT(part) \ + uint32_t r##part[8]; \ + struct V4_Instruction code##part[256]; \ + if (VARIANT == xmrig::VARIANT_WOW) { \ + r##part[0] = (uint32_t)(h##part[12]); \ + r##part[1] = (uint32_t)(h##part[12] >> 32); \ + r##part[2] = (uint32_t)(h##part[13]); \ + r##part[3] = (uint32_t)(h##part[13] >> 32); \ + } \ + v4_random_math_init(code##part, height); + +#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \ + if (VARIANT == xmrig::VARIANT_WOW) { \ + cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \ + r##part[4] = static_cast(al); \ + r##part[5] = static_cast(ah); \ + r##part[6] = static_cast(_mm_cvtsi128_si32(bx0)); \ + r##part[7] = static_cast(_mm_cvtsi128_si32(bx1)); \ + v4_random_math(code##part, r##part); \ + } + #endif /* XMRIG_CRYPTONIGHT_MONERO_H */ diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index 31a52dc5..d3da28c1 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -58,6 +58,18 @@ const static uint8_t test_input[380] = { 0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04 }; +const static char* test_input_WOW = R"===( +9d47bf4c41b7e8e727e681715acb47fa1677cdba9ca7bcb05ad8cc8abd5daa66 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260 +0d4a495cb844a3ca8ba4edb8e6bcf829ef1c06d9cdea2b62ca46c2a21b8b0a79 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261 +a1d6d848b5c5915fccd2f64cf216c6b1a02cf7c77bc80d8d4e51b419e88ff0dd 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262 +af3a8544a0221a148c2ac90484b19861e3afca33fe17021efb8ad6496b567915 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263 +313399e0963ae8a99dab8af66d343e097dae0c0feb08dbc43ccdafef5515f413 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264 +6021c6ef90bff9ae94a7506d623d3a7a86c1756d655f50dd558f716d64622a34 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265 +2b13000535f3db5f9b9b84a65c4351f386cd2cdedebb8c3ad2eab086e6a3fee5 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266 +fc0e1dad8e895749dc90eb690bc1ba059a1cd772afaaf65a106bf9e5e6b80503 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267 +b60b0afe144deff7d903ed2d5545e77ebe66a3c51fee7016eeb8fee9eb630c0f 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268 +64774b27e7d5fec862fc4c0c13ac6bf09123b6f05bb0e4b75c97f379a2b3a679 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269 +)==="; // "cn/0" const static uint8_t test_output_v0[160] = { @@ -79,7 +91,7 @@ const static uint8_t test_output_v1[160] = { 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D, - 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22, + 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22, 0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98, 0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C, 0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE, @@ -274,6 +286,8 @@ const static uint8_t test_output_pico_trtl[160] = { }; #endif +unsigned char hf_hex2bin(char c, bool &err); +char hf_bin2hex(unsigned char c); #ifndef XMRIG_NO_CN_GPU // "cn/gpu" diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 1ae7b795..b6969571 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -480,7 +480,7 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, template -inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::cn_select_mask(); constexpr size_t ITERATIONS = xmrig::cn_select_iter(); @@ -504,6 +504,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si VARIANT1_INIT(0); VARIANT2_INIT(0); VARIANT2_SET_ROUNDING_MODE(); + VARIANT4_RANDOM_MATH_INIT(0); uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -525,7 +526,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si else if (SOFT_AES) { cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); } - else { + else { cx = _mm_aesenc_si128(cx, ax0); } @@ -542,12 +543,17 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si ch = ((uint64_t*) &l0[idx0 & MASK])[1]; if (BASE == xmrig::VARIANT_2) { - VARIANT2_INTEGER_MATH(0, cl, cx); - lo = __umul128(idx0, cl, &hi); - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo); + if (VARIANT == xmrig::VARIANT_WOW) { + VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); + } else { + VARIANT2_INTEGER_MATH(0, cl, cx); + } } - else { - lo = __umul128(idx0, cl, &hi); + + lo = __umul128(idx0, cl, &hi); + + if (BASE == xmrig::VARIANT_2) { + VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo); } al0 += hi; @@ -605,7 +611,7 @@ void cn_gpu_inner_ssse3(const uint8_t *spad, uint8_t *lpad); template -inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; constexpr size_t ITERATIONS = xmrig::cn_select_iter(); @@ -640,7 +646,7 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_ extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; @@ -652,12 +658,23 @@ extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); +void v4_64_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); +void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); +void v4_64_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); template -inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MEM = xmrig::cn_select_memory(); + if ((VARIANT == xmrig::VARIANT_WOW) && (height != ctx[0]->generated_code_height)) { + V4_Instruction code[256]; + const int code_size = v4_random_math_init(code, height); + v4_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), ASM); + ctx[0]->generated_code_height = height; + } + xmrig::keccak(input, size, ctx[0]->state); cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory)); @@ -694,6 +711,9 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ cn_trtl_mainloop_bulldozer_asm(ctx[0]); } } + else if (VARIANT == xmrig::VARIANT_WOW) { + ctx[0]->generated_code(ctx[0]); + } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); @@ -702,10 +722,17 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ template -inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MEM = xmrig::cn_select_memory(); + if ((VARIANT == xmrig::VARIANT_WOW) && (height != ctx[0]->generated_code_double_height)) { + V4_Instruction code[256]; + const int code_size = v4_random_math_init(code, height); + v4_compile_code_double(code, code_size, reinterpret_cast(ctx[0]->generated_code_double), ASM); + ctx[0]->generated_code_double_height = height; + } + xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input + size, size, ctx[1]->state); @@ -721,6 +748,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ else if (VARIANT == xmrig::VARIANT_TRTL) { cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); } + else if (VARIANT == xmrig::VARIANT_WOW) { + ctx[0]->generated_code_double(ctx[0], ctx[1]); + } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state)); @@ -735,7 +765,7 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ template -inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::cn_select_mask(); constexpr size_t ITERATIONS = xmrig::cn_select_iter(); @@ -760,6 +790,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si VARIANT2_INIT(0); VARIANT2_INIT(1); VARIANT2_SET_ROUNDING_MODE(); + VARIANT4_RANDOM_MATH_INIT(0); + VARIANT4_RANDOM_MATH_INIT(1); cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); @@ -815,11 +847,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si ch = ((uint64_t*) &l0[idx0 & MASK])[1]; if (BASE == xmrig::VARIANT_2) { - VARIANT2_INTEGER_MATH(0, cl, cx0); - lo = __umul128(idx0, cl, &hi); + if (VARIANT == xmrig::VARIANT_WOW) { + VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); + } else { + VARIANT2_INTEGER_MATH(0, cl, cx0); + } + } + + lo = __umul128(idx0, cl, &hi); + + if (BASE == xmrig::VARIANT_2) { VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo); - } else { - lo = __umul128(idx0, cl, &hi); } al0 += hi; @@ -857,11 +895,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si ch = ((uint64_t*) &l1[idx1 & MASK])[1]; if (BASE == xmrig::VARIANT_2) { - VARIANT2_INTEGER_MATH(1, cl, cx1); - lo = __umul128(idx1, cl, &hi); + if (VARIANT == xmrig::VARIANT_WOW) { + VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); + } else { + VARIANT2_INTEGER_MATH(1, cl, cx1); + } + } + + lo = __umul128(idx1, cl, &hi); + + if (BASE == xmrig::VARIANT_2) { VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo); - } else { - lo = __umul128(idx1, cl, &hi); } al1 += hi; @@ -946,11 +990,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si #define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \ if (BASE == xmrig::VARIANT_2) { \ - VARIANT2_INTEGER_MATH(part, cl##part, c); \ - lo = __umul128(idx, cl##part, &hi); \ + if (VARIANT == xmrig::VARIANT_WOW) { \ + const uint64_t al = _mm_cvtsi128_si64(a); \ + const uint64_t ah = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \ + VARIANT4_RANDOM_MATH(part, al, ah, cl##part, b0, b1); \ + } else { \ + VARIANT2_INTEGER_MATH(part, cl##part, c); \ + } \ + } \ + lo = __umul128(idx, cl##part, &hi); \ + if (BASE == xmrig::VARIANT_2) { \ VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \ - } else { \ - lo = __umul128(idx, cl##part, &hi); \ } \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ \ @@ -1000,11 +1050,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si __m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \ __m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \ __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \ - __m128i cx##n = _mm_setzero_si128(); + __m128i cx##n = _mm_setzero_si128(); \ + VARIANT4_RANDOM_MATH_INIT(n); template -inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::cn_select_mask(); constexpr size_t ITERATIONS = xmrig::cn_select_iter(); @@ -1068,7 +1119,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si template -inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::cn_select_mask(); constexpr size_t ITERATIONS = xmrig::cn_select_iter(); @@ -1141,7 +1192,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size template -inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) { constexpr size_t MASK = xmrig::cn_select_mask(); constexpr size_t ITERATIONS = xmrig::cn_select_iter(); diff --git a/src/crypto/CryptonightR_gen.cpp b/src/crypto/CryptonightR_gen.cpp new file mode 100644 index 00000000..38225955 --- /dev/null +++ b/src/crypto/CryptonightR_gen.cpp @@ -0,0 +1,132 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include "crypto/CryptoNight_monero.h" + +typedef void(*void_func)(); + +#include "crypto/asm/CryptonightR_template.h" +#include "Mem.h" + +#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM + +static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)()) +{ + const ptrdiff_t size = reinterpret_cast(p2) - reinterpret_cast(p1); + if (size > 0) { + memcpy(p, reinterpret_cast(p1), size); + p += size; + } +} + +static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, xmrig::Assembly ASM) +{ + uint32_t prev_rot_src = (uint32_t)(-1); + + for (int i = 0;; ++i) { + const V4_Instruction inst = code[i]; + if (inst.opcode == RET) { + break; + } + + uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2); + uint8_t dst_index = inst.dst_index; + uint8_t src_index = inst.src_index; + + const uint32_t a = inst.dst_index; + const uint32_t b = inst.src_index; + const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (src_index << (V4_OPCODE_BITS + V4_DST_INDEX_BITS)); + + switch (inst.opcode) { + case ROR: + case ROL: + if (b != prev_rot_src) { + prev_rot_src = b; + add_code(p, instructions_mov[c], instructions_mov[c + 1]); + } + break; + } + + if (a == prev_rot_src) { + prev_rot_src = (uint32_t)(-1); + } + + void_func begin = instructions[c]; + + if ((ASM = xmrig::ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { + // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL + // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41 + uint8_t* prefix = reinterpret_cast(begin); + + if (*prefix == 0x49) { + *(p++) = 0x41; + } + + begin = reinterpret_cast(prefix + 1); + } + + add_code(p, begin, instructions[c + 1]); + + if (inst.opcode == ADD) { + *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C; + if (is_64_bit) { + prev_rot_src = (uint32_t)(-1); + } + } + } +} + +void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) +{ + uint8_t* p0 = reinterpret_cast(machine_code); + uint8_t* p = p0; + + add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); + add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); + add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); + add_code(p, CryptonightR_template_part3, CryptonightR_template_end); + + Mem::flushInstructionCache(machine_code, p - p0); +} + +void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) +{ + uint8_t* p0 = reinterpret_cast(machine_code); + uint8_t* p = p0; + + add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); + add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); + add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); + add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); + add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); + add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end); + + Mem::flushInstructionCache(machine_code, p - p0); +} + +#endif diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S new file mode 100644 index 00000000..e8478beb --- /dev/null +++ b/src/crypto/asm/CryptonightR_template.S @@ -0,0 +1,1592 @@ +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif +.intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn +.section .text +#endif + +#define PUBLIC .global + +PUBLIC FN_PREFIX(CryptonightR_instruction0) +PUBLIC FN_PREFIX(CryptonightR_instruction1) +PUBLIC FN_PREFIX(CryptonightR_instruction2) +PUBLIC FN_PREFIX(CryptonightR_instruction3) +PUBLIC FN_PREFIX(CryptonightR_instruction4) +PUBLIC FN_PREFIX(CryptonightR_instruction5) +PUBLIC FN_PREFIX(CryptonightR_instruction6) +PUBLIC FN_PREFIX(CryptonightR_instruction7) +PUBLIC FN_PREFIX(CryptonightR_instruction8) +PUBLIC FN_PREFIX(CryptonightR_instruction9) +PUBLIC FN_PREFIX(CryptonightR_instruction10) +PUBLIC FN_PREFIX(CryptonightR_instruction11) +PUBLIC FN_PREFIX(CryptonightR_instruction12) +PUBLIC FN_PREFIX(CryptonightR_instruction13) +PUBLIC FN_PREFIX(CryptonightR_instruction14) +PUBLIC FN_PREFIX(CryptonightR_instruction15) +PUBLIC FN_PREFIX(CryptonightR_instruction16) +PUBLIC FN_PREFIX(CryptonightR_instruction17) +PUBLIC FN_PREFIX(CryptonightR_instruction18) +PUBLIC FN_PREFIX(CryptonightR_instruction19) +PUBLIC FN_PREFIX(CryptonightR_instruction20) +PUBLIC FN_PREFIX(CryptonightR_instruction21) +PUBLIC FN_PREFIX(CryptonightR_instruction22) +PUBLIC FN_PREFIX(CryptonightR_instruction23) +PUBLIC FN_PREFIX(CryptonightR_instruction24) +PUBLIC FN_PREFIX(CryptonightR_instruction25) +PUBLIC FN_PREFIX(CryptonightR_instruction26) +PUBLIC FN_PREFIX(CryptonightR_instruction27) +PUBLIC FN_PREFIX(CryptonightR_instruction28) +PUBLIC FN_PREFIX(CryptonightR_instruction29) +PUBLIC FN_PREFIX(CryptonightR_instruction30) +PUBLIC FN_PREFIX(CryptonightR_instruction31) +PUBLIC FN_PREFIX(CryptonightR_instruction32) +PUBLIC FN_PREFIX(CryptonightR_instruction33) +PUBLIC FN_PREFIX(CryptonightR_instruction34) +PUBLIC FN_PREFIX(CryptonightR_instruction35) +PUBLIC FN_PREFIX(CryptonightR_instruction36) +PUBLIC FN_PREFIX(CryptonightR_instruction37) +PUBLIC FN_PREFIX(CryptonightR_instruction38) +PUBLIC FN_PREFIX(CryptonightR_instruction39) +PUBLIC FN_PREFIX(CryptonightR_instruction40) +PUBLIC FN_PREFIX(CryptonightR_instruction41) +PUBLIC FN_PREFIX(CryptonightR_instruction42) +PUBLIC FN_PREFIX(CryptonightR_instruction43) +PUBLIC FN_PREFIX(CryptonightR_instruction44) +PUBLIC FN_PREFIX(CryptonightR_instruction45) +PUBLIC FN_PREFIX(CryptonightR_instruction46) +PUBLIC FN_PREFIX(CryptonightR_instruction47) +PUBLIC FN_PREFIX(CryptonightR_instruction48) +PUBLIC FN_PREFIX(CryptonightR_instruction49) +PUBLIC FN_PREFIX(CryptonightR_instruction50) +PUBLIC FN_PREFIX(CryptonightR_instruction51) +PUBLIC FN_PREFIX(CryptonightR_instruction52) +PUBLIC FN_PREFIX(CryptonightR_instruction53) +PUBLIC FN_PREFIX(CryptonightR_instruction54) +PUBLIC FN_PREFIX(CryptonightR_instruction55) +PUBLIC FN_PREFIX(CryptonightR_instruction56) +PUBLIC FN_PREFIX(CryptonightR_instruction57) +PUBLIC FN_PREFIX(CryptonightR_instruction58) +PUBLIC FN_PREFIX(CryptonightR_instruction59) +PUBLIC FN_PREFIX(CryptonightR_instruction60) +PUBLIC FN_PREFIX(CryptonightR_instruction61) +PUBLIC FN_PREFIX(CryptonightR_instruction62) +PUBLIC FN_PREFIX(CryptonightR_instruction63) +PUBLIC FN_PREFIX(CryptonightR_instruction64) +PUBLIC FN_PREFIX(CryptonightR_instruction65) +PUBLIC FN_PREFIX(CryptonightR_instruction66) +PUBLIC FN_PREFIX(CryptonightR_instruction67) +PUBLIC FN_PREFIX(CryptonightR_instruction68) +PUBLIC FN_PREFIX(CryptonightR_instruction69) +PUBLIC FN_PREFIX(CryptonightR_instruction70) +PUBLIC FN_PREFIX(CryptonightR_instruction71) +PUBLIC FN_PREFIX(CryptonightR_instruction72) +PUBLIC FN_PREFIX(CryptonightR_instruction73) +PUBLIC FN_PREFIX(CryptonightR_instruction74) +PUBLIC FN_PREFIX(CryptonightR_instruction75) +PUBLIC FN_PREFIX(CryptonightR_instruction76) +PUBLIC FN_PREFIX(CryptonightR_instruction77) +PUBLIC FN_PREFIX(CryptonightR_instruction78) +PUBLIC FN_PREFIX(CryptonightR_instruction79) +PUBLIC FN_PREFIX(CryptonightR_instruction80) +PUBLIC FN_PREFIX(CryptonightR_instruction81) +PUBLIC FN_PREFIX(CryptonightR_instruction82) +PUBLIC FN_PREFIX(CryptonightR_instruction83) +PUBLIC FN_PREFIX(CryptonightR_instruction84) +PUBLIC FN_PREFIX(CryptonightR_instruction85) +PUBLIC FN_PREFIX(CryptonightR_instruction86) +PUBLIC FN_PREFIX(CryptonightR_instruction87) +PUBLIC FN_PREFIX(CryptonightR_instruction88) +PUBLIC FN_PREFIX(CryptonightR_instruction89) +PUBLIC FN_PREFIX(CryptonightR_instruction90) +PUBLIC FN_PREFIX(CryptonightR_instruction91) +PUBLIC FN_PREFIX(CryptonightR_instruction92) +PUBLIC FN_PREFIX(CryptonightR_instruction93) +PUBLIC FN_PREFIX(CryptonightR_instruction94) +PUBLIC FN_PREFIX(CryptonightR_instruction95) +PUBLIC FN_PREFIX(CryptonightR_instruction96) +PUBLIC FN_PREFIX(CryptonightR_instruction97) +PUBLIC FN_PREFIX(CryptonightR_instruction98) +PUBLIC FN_PREFIX(CryptonightR_instruction99) +PUBLIC FN_PREFIX(CryptonightR_instruction100) +PUBLIC FN_PREFIX(CryptonightR_instruction101) +PUBLIC FN_PREFIX(CryptonightR_instruction102) +PUBLIC FN_PREFIX(CryptonightR_instruction103) +PUBLIC FN_PREFIX(CryptonightR_instruction104) +PUBLIC FN_PREFIX(CryptonightR_instruction105) +PUBLIC FN_PREFIX(CryptonightR_instruction106) +PUBLIC FN_PREFIX(CryptonightR_instruction107) +PUBLIC FN_PREFIX(CryptonightR_instruction108) +PUBLIC FN_PREFIX(CryptonightR_instruction109) +PUBLIC FN_PREFIX(CryptonightR_instruction110) +PUBLIC FN_PREFIX(CryptonightR_instruction111) +PUBLIC FN_PREFIX(CryptonightR_instruction112) +PUBLIC FN_PREFIX(CryptonightR_instruction113) +PUBLIC FN_PREFIX(CryptonightR_instruction114) +PUBLIC FN_PREFIX(CryptonightR_instruction115) +PUBLIC FN_PREFIX(CryptonightR_instruction116) +PUBLIC FN_PREFIX(CryptonightR_instruction117) +PUBLIC FN_PREFIX(CryptonightR_instruction118) +PUBLIC FN_PREFIX(CryptonightR_instruction119) +PUBLIC FN_PREFIX(CryptonightR_instruction120) +PUBLIC FN_PREFIX(CryptonightR_instruction121) +PUBLIC FN_PREFIX(CryptonightR_instruction122) +PUBLIC FN_PREFIX(CryptonightR_instruction123) +PUBLIC FN_PREFIX(CryptonightR_instruction124) +PUBLIC FN_PREFIX(CryptonightR_instruction125) +PUBLIC FN_PREFIX(CryptonightR_instruction126) +PUBLIC FN_PREFIX(CryptonightR_instruction127) +PUBLIC FN_PREFIX(CryptonightR_instruction128) +PUBLIC FN_PREFIX(CryptonightR_instruction129) +PUBLIC FN_PREFIX(CryptonightR_instruction130) +PUBLIC FN_PREFIX(CryptonightR_instruction131) +PUBLIC FN_PREFIX(CryptonightR_instruction132) +PUBLIC FN_PREFIX(CryptonightR_instruction133) +PUBLIC FN_PREFIX(CryptonightR_instruction134) +PUBLIC FN_PREFIX(CryptonightR_instruction135) +PUBLIC FN_PREFIX(CryptonightR_instruction136) +PUBLIC FN_PREFIX(CryptonightR_instruction137) +PUBLIC FN_PREFIX(CryptonightR_instruction138) +PUBLIC FN_PREFIX(CryptonightR_instruction139) +PUBLIC FN_PREFIX(CryptonightR_instruction140) +PUBLIC FN_PREFIX(CryptonightR_instruction141) +PUBLIC FN_PREFIX(CryptonightR_instruction142) +PUBLIC FN_PREFIX(CryptonightR_instruction143) +PUBLIC FN_PREFIX(CryptonightR_instruction144) +PUBLIC FN_PREFIX(CryptonightR_instruction145) +PUBLIC FN_PREFIX(CryptonightR_instruction146) +PUBLIC FN_PREFIX(CryptonightR_instruction147) +PUBLIC FN_PREFIX(CryptonightR_instruction148) +PUBLIC FN_PREFIX(CryptonightR_instruction149) +PUBLIC FN_PREFIX(CryptonightR_instruction150) +PUBLIC FN_PREFIX(CryptonightR_instruction151) +PUBLIC FN_PREFIX(CryptonightR_instruction152) +PUBLIC FN_PREFIX(CryptonightR_instruction153) +PUBLIC FN_PREFIX(CryptonightR_instruction154) +PUBLIC FN_PREFIX(CryptonightR_instruction155) +PUBLIC FN_PREFIX(CryptonightR_instruction156) +PUBLIC FN_PREFIX(CryptonightR_instruction157) +PUBLIC FN_PREFIX(CryptonightR_instruction158) +PUBLIC FN_PREFIX(CryptonightR_instruction159) +PUBLIC FN_PREFIX(CryptonightR_instruction160) +PUBLIC FN_PREFIX(CryptonightR_instruction161) +PUBLIC FN_PREFIX(CryptonightR_instruction162) +PUBLIC FN_PREFIX(CryptonightR_instruction163) +PUBLIC FN_PREFIX(CryptonightR_instruction164) +PUBLIC FN_PREFIX(CryptonightR_instruction165) +PUBLIC FN_PREFIX(CryptonightR_instruction166) +PUBLIC FN_PREFIX(CryptonightR_instruction167) +PUBLIC FN_PREFIX(CryptonightR_instruction168) +PUBLIC FN_PREFIX(CryptonightR_instruction169) +PUBLIC FN_PREFIX(CryptonightR_instruction170) +PUBLIC FN_PREFIX(CryptonightR_instruction171) +PUBLIC FN_PREFIX(CryptonightR_instruction172) +PUBLIC FN_PREFIX(CryptonightR_instruction173) +PUBLIC FN_PREFIX(CryptonightR_instruction174) +PUBLIC FN_PREFIX(CryptonightR_instruction175) +PUBLIC FN_PREFIX(CryptonightR_instruction176) +PUBLIC FN_PREFIX(CryptonightR_instruction177) +PUBLIC FN_PREFIX(CryptonightR_instruction178) +PUBLIC FN_PREFIX(CryptonightR_instruction179) +PUBLIC FN_PREFIX(CryptonightR_instruction180) +PUBLIC FN_PREFIX(CryptonightR_instruction181) +PUBLIC FN_PREFIX(CryptonightR_instruction182) +PUBLIC FN_PREFIX(CryptonightR_instruction183) +PUBLIC FN_PREFIX(CryptonightR_instruction184) +PUBLIC FN_PREFIX(CryptonightR_instruction185) +PUBLIC FN_PREFIX(CryptonightR_instruction186) +PUBLIC FN_PREFIX(CryptonightR_instruction187) +PUBLIC FN_PREFIX(CryptonightR_instruction188) +PUBLIC FN_PREFIX(CryptonightR_instruction189) +PUBLIC FN_PREFIX(CryptonightR_instruction190) +PUBLIC FN_PREFIX(CryptonightR_instruction191) +PUBLIC FN_PREFIX(CryptonightR_instruction192) +PUBLIC FN_PREFIX(CryptonightR_instruction193) +PUBLIC FN_PREFIX(CryptonightR_instruction194) +PUBLIC FN_PREFIX(CryptonightR_instruction195) +PUBLIC FN_PREFIX(CryptonightR_instruction196) +PUBLIC FN_PREFIX(CryptonightR_instruction197) +PUBLIC FN_PREFIX(CryptonightR_instruction198) +PUBLIC FN_PREFIX(CryptonightR_instruction199) +PUBLIC FN_PREFIX(CryptonightR_instruction200) +PUBLIC FN_PREFIX(CryptonightR_instruction201) +PUBLIC FN_PREFIX(CryptonightR_instruction202) +PUBLIC FN_PREFIX(CryptonightR_instruction203) +PUBLIC FN_PREFIX(CryptonightR_instruction204) +PUBLIC FN_PREFIX(CryptonightR_instruction205) +PUBLIC FN_PREFIX(CryptonightR_instruction206) +PUBLIC FN_PREFIX(CryptonightR_instruction207) +PUBLIC FN_PREFIX(CryptonightR_instruction208) +PUBLIC FN_PREFIX(CryptonightR_instruction209) +PUBLIC FN_PREFIX(CryptonightR_instruction210) +PUBLIC FN_PREFIX(CryptonightR_instruction211) +PUBLIC FN_PREFIX(CryptonightR_instruction212) +PUBLIC FN_PREFIX(CryptonightR_instruction213) +PUBLIC FN_PREFIX(CryptonightR_instruction214) +PUBLIC FN_PREFIX(CryptonightR_instruction215) +PUBLIC FN_PREFIX(CryptonightR_instruction216) +PUBLIC FN_PREFIX(CryptonightR_instruction217) +PUBLIC FN_PREFIX(CryptonightR_instruction218) +PUBLIC FN_PREFIX(CryptonightR_instruction219) +PUBLIC FN_PREFIX(CryptonightR_instruction220) +PUBLIC FN_PREFIX(CryptonightR_instruction221) +PUBLIC FN_PREFIX(CryptonightR_instruction222) +PUBLIC FN_PREFIX(CryptonightR_instruction223) +PUBLIC FN_PREFIX(CryptonightR_instruction224) +PUBLIC FN_PREFIX(CryptonightR_instruction225) +PUBLIC FN_PREFIX(CryptonightR_instruction226) +PUBLIC FN_PREFIX(CryptonightR_instruction227) +PUBLIC FN_PREFIX(CryptonightR_instruction228) +PUBLIC FN_PREFIX(CryptonightR_instruction229) +PUBLIC FN_PREFIX(CryptonightR_instruction230) +PUBLIC FN_PREFIX(CryptonightR_instruction231) +PUBLIC FN_PREFIX(CryptonightR_instruction232) +PUBLIC FN_PREFIX(CryptonightR_instruction233) +PUBLIC FN_PREFIX(CryptonightR_instruction234) +PUBLIC FN_PREFIX(CryptonightR_instruction235) +PUBLIC FN_PREFIX(CryptonightR_instruction236) +PUBLIC FN_PREFIX(CryptonightR_instruction237) +PUBLIC FN_PREFIX(CryptonightR_instruction238) +PUBLIC FN_PREFIX(CryptonightR_instruction239) +PUBLIC FN_PREFIX(CryptonightR_instruction240) +PUBLIC FN_PREFIX(CryptonightR_instruction241) +PUBLIC FN_PREFIX(CryptonightR_instruction242) +PUBLIC FN_PREFIX(CryptonightR_instruction243) +PUBLIC FN_PREFIX(CryptonightR_instruction244) +PUBLIC FN_PREFIX(CryptonightR_instruction245) +PUBLIC FN_PREFIX(CryptonightR_instruction246) +PUBLIC FN_PREFIX(CryptonightR_instruction247) +PUBLIC FN_PREFIX(CryptonightR_instruction248) +PUBLIC FN_PREFIX(CryptonightR_instruction249) +PUBLIC FN_PREFIX(CryptonightR_instruction250) +PUBLIC FN_PREFIX(CryptonightR_instruction251) +PUBLIC FN_PREFIX(CryptonightR_instruction252) +PUBLIC FN_PREFIX(CryptonightR_instruction253) +PUBLIC FN_PREFIX(CryptonightR_instruction254) +PUBLIC FN_PREFIX(CryptonightR_instruction255) +PUBLIC FN_PREFIX(CryptonightR_instruction256) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) + +#include "CryptonightR_template.inc" + +FN_PREFIX(CryptonightR_instruction0): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction1): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction2): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction3): + add rbx, rbx + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction4): + sub rbx, rbx +FN_PREFIX(CryptonightR_instruction5): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction6): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction7): + xor rbx, rbx +FN_PREFIX(CryptonightR_instruction8): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction9): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction10): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction11): + add rsi, rbx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction12): + sub rsi, rbx +FN_PREFIX(CryptonightR_instruction13): + ror esi, cl +FN_PREFIX(CryptonightR_instruction14): + rol esi, cl +FN_PREFIX(CryptonightR_instruction15): + xor rsi, rbx +FN_PREFIX(CryptonightR_instruction16): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction17): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction18): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction19): + add rdi, rbx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction20): + sub rdi, rbx +FN_PREFIX(CryptonightR_instruction21): + ror edi, cl +FN_PREFIX(CryptonightR_instruction22): + rol edi, cl +FN_PREFIX(CryptonightR_instruction23): + xor rdi, rbx +FN_PREFIX(CryptonightR_instruction24): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction25): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction26): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction27): + add rbp, rbx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction28): + sub rbp, rbx +FN_PREFIX(CryptonightR_instruction29): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction30): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction31): + xor rbp, rbx +FN_PREFIX(CryptonightR_instruction32): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction33): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction34): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction35): + add rbx, rsi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction36): + sub rbx, rsi +FN_PREFIX(CryptonightR_instruction37): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction38): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction39): + xor rbx, rsi +FN_PREFIX(CryptonightR_instruction40): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction41): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction42): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction43): + add rsi, rsi + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction44): + sub rsi, rsi +FN_PREFIX(CryptonightR_instruction45): + ror esi, cl +FN_PREFIX(CryptonightR_instruction46): + rol esi, cl +FN_PREFIX(CryptonightR_instruction47): + xor rsi, rsi +FN_PREFIX(CryptonightR_instruction48): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction49): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction50): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction51): + add rdi, rsi + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction52): + sub rdi, rsi +FN_PREFIX(CryptonightR_instruction53): + ror edi, cl +FN_PREFIX(CryptonightR_instruction54): + rol edi, cl +FN_PREFIX(CryptonightR_instruction55): + xor rdi, rsi +FN_PREFIX(CryptonightR_instruction56): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction57): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction58): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction59): + add rbp, rsi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction60): + sub rbp, rsi +FN_PREFIX(CryptonightR_instruction61): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction62): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction63): + xor rbp, rsi +FN_PREFIX(CryptonightR_instruction64): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction65): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction66): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction67): + add rbx, rdi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction68): + sub rbx, rdi +FN_PREFIX(CryptonightR_instruction69): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction70): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction71): + xor rbx, rdi +FN_PREFIX(CryptonightR_instruction72): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction73): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction74): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction75): + add rsi, rdi + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction76): + sub rsi, rdi +FN_PREFIX(CryptonightR_instruction77): + ror esi, cl +FN_PREFIX(CryptonightR_instruction78): + rol esi, cl +FN_PREFIX(CryptonightR_instruction79): + xor rsi, rdi +FN_PREFIX(CryptonightR_instruction80): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction81): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction82): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction83): + add rdi, rdi + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction84): + sub rdi, rdi +FN_PREFIX(CryptonightR_instruction85): + ror edi, cl +FN_PREFIX(CryptonightR_instruction86): + rol edi, cl +FN_PREFIX(CryptonightR_instruction87): + xor rdi, rdi +FN_PREFIX(CryptonightR_instruction88): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction89): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction90): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction91): + add rbp, rdi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction92): + sub rbp, rdi +FN_PREFIX(CryptonightR_instruction93): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction94): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction95): + xor rbp, rdi +FN_PREFIX(CryptonightR_instruction96): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction97): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction98): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction99): + add rbx, rbp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction100): + sub rbx, rbp +FN_PREFIX(CryptonightR_instruction101): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction102): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction103): + xor rbx, rbp +FN_PREFIX(CryptonightR_instruction104): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction105): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction106): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction107): + add rsi, rbp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction108): + sub rsi, rbp +FN_PREFIX(CryptonightR_instruction109): + ror esi, cl +FN_PREFIX(CryptonightR_instruction110): + rol esi, cl +FN_PREFIX(CryptonightR_instruction111): + xor rsi, rbp +FN_PREFIX(CryptonightR_instruction112): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction113): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction114): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction115): + add rdi, rbp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction116): + sub rdi, rbp +FN_PREFIX(CryptonightR_instruction117): + ror edi, cl +FN_PREFIX(CryptonightR_instruction118): + rol edi, cl +FN_PREFIX(CryptonightR_instruction119): + xor rdi, rbp +FN_PREFIX(CryptonightR_instruction120): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction121): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction122): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction123): + add rbp, rbp + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction124): + sub rbp, rbp +FN_PREFIX(CryptonightR_instruction125): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction126): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction127): + xor rbp, rbp +FN_PREFIX(CryptonightR_instruction128): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction129): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction130): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction131): + add rbx, rsp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction132): + sub rbx, rsp +FN_PREFIX(CryptonightR_instruction133): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction134): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction135): + xor rbx, rsp +FN_PREFIX(CryptonightR_instruction136): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction137): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction138): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction139): + add rsi, rsp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction140): + sub rsi, rsp +FN_PREFIX(CryptonightR_instruction141): + ror esi, cl +FN_PREFIX(CryptonightR_instruction142): + rol esi, cl +FN_PREFIX(CryptonightR_instruction143): + xor rsi, rsp +FN_PREFIX(CryptonightR_instruction144): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction145): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction146): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction147): + add rdi, rsp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction148): + sub rdi, rsp +FN_PREFIX(CryptonightR_instruction149): + ror edi, cl +FN_PREFIX(CryptonightR_instruction150): + rol edi, cl +FN_PREFIX(CryptonightR_instruction151): + xor rdi, rsp +FN_PREFIX(CryptonightR_instruction152): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction153): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction154): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction155): + add rbp, rsp + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction156): + sub rbp, rsp +FN_PREFIX(CryptonightR_instruction157): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction158): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction159): + xor rbp, rsp +FN_PREFIX(CryptonightR_instruction160): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction161): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction162): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction163): + add rbx, r15 + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction164): + sub rbx, r15 +FN_PREFIX(CryptonightR_instruction165): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction166): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction167): + xor rbx, r15 +FN_PREFIX(CryptonightR_instruction168): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction169): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction170): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction171): + add rsi, r15 + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction172): + sub rsi, r15 +FN_PREFIX(CryptonightR_instruction173): + ror esi, cl +FN_PREFIX(CryptonightR_instruction174): + rol esi, cl +FN_PREFIX(CryptonightR_instruction175): + xor rsi, r15 +FN_PREFIX(CryptonightR_instruction176): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction177): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction178): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction179): + add rdi, r15 + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction180): + sub rdi, r15 +FN_PREFIX(CryptonightR_instruction181): + ror edi, cl +FN_PREFIX(CryptonightR_instruction182): + rol edi, cl +FN_PREFIX(CryptonightR_instruction183): + xor rdi, r15 +FN_PREFIX(CryptonightR_instruction184): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction185): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction186): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction187): + add rbp, r15 + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction188): + sub rbp, r15 +FN_PREFIX(CryptonightR_instruction189): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction190): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction191): + xor rbp, r15 +FN_PREFIX(CryptonightR_instruction192): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction193): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction194): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction195): + add rbx, rax + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction196): + sub rbx, rax +FN_PREFIX(CryptonightR_instruction197): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction198): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction199): + xor rbx, rax +FN_PREFIX(CryptonightR_instruction200): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction201): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction202): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction203): + add rsi, rax + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction204): + sub rsi, rax +FN_PREFIX(CryptonightR_instruction205): + ror esi, cl +FN_PREFIX(CryptonightR_instruction206): + rol esi, cl +FN_PREFIX(CryptonightR_instruction207): + xor rsi, rax +FN_PREFIX(CryptonightR_instruction208): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction209): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction210): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction211): + add rdi, rax + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction212): + sub rdi, rax +FN_PREFIX(CryptonightR_instruction213): + ror edi, cl +FN_PREFIX(CryptonightR_instruction214): + rol edi, cl +FN_PREFIX(CryptonightR_instruction215): + xor rdi, rax +FN_PREFIX(CryptonightR_instruction216): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction217): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction218): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction219): + add rbp, rax + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction220): + sub rbp, rax +FN_PREFIX(CryptonightR_instruction221): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction222): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction223): + xor rbp, rax +FN_PREFIX(CryptonightR_instruction224): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction225): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction226): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction227): + add rbx, rdx + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction228): + sub rbx, rdx +FN_PREFIX(CryptonightR_instruction229): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction230): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction231): + xor rbx, rdx +FN_PREFIX(CryptonightR_instruction232): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction233): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction234): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction235): + add rsi, rdx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction236): + sub rsi, rdx +FN_PREFIX(CryptonightR_instruction237): + ror esi, cl +FN_PREFIX(CryptonightR_instruction238): + rol esi, cl +FN_PREFIX(CryptonightR_instruction239): + xor rsi, rdx +FN_PREFIX(CryptonightR_instruction240): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction241): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction242): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction243): + add rdi, rdx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction244): + sub rdi, rdx +FN_PREFIX(CryptonightR_instruction245): + ror edi, cl +FN_PREFIX(CryptonightR_instruction246): + rol edi, cl +FN_PREFIX(CryptonightR_instruction247): + xor rdi, rdx +FN_PREFIX(CryptonightR_instruction248): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction249): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction250): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction251): + add rbp, rdx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction252): + sub rbp, rdx +FN_PREFIX(CryptonightR_instruction253): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction254): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction255): + xor rbp, rdx +FN_PREFIX(CryptonightR_instruction256): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction_mov0): + +FN_PREFIX(CryptonightR_instruction_mov1): + +FN_PREFIX(CryptonightR_instruction_mov2): + +FN_PREFIX(CryptonightR_instruction_mov3): + +FN_PREFIX(CryptonightR_instruction_mov4): + +FN_PREFIX(CryptonightR_instruction_mov5): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov6): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov7): + +FN_PREFIX(CryptonightR_instruction_mov8): + +FN_PREFIX(CryptonightR_instruction_mov9): + +FN_PREFIX(CryptonightR_instruction_mov10): + +FN_PREFIX(CryptonightR_instruction_mov11): + +FN_PREFIX(CryptonightR_instruction_mov12): + +FN_PREFIX(CryptonightR_instruction_mov13): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov14): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov15): + +FN_PREFIX(CryptonightR_instruction_mov16): + +FN_PREFIX(CryptonightR_instruction_mov17): + +FN_PREFIX(CryptonightR_instruction_mov18): + +FN_PREFIX(CryptonightR_instruction_mov19): + +FN_PREFIX(CryptonightR_instruction_mov20): + +FN_PREFIX(CryptonightR_instruction_mov21): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov22): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov23): + +FN_PREFIX(CryptonightR_instruction_mov24): + +FN_PREFIX(CryptonightR_instruction_mov25): + +FN_PREFIX(CryptonightR_instruction_mov26): + +FN_PREFIX(CryptonightR_instruction_mov27): + +FN_PREFIX(CryptonightR_instruction_mov28): + +FN_PREFIX(CryptonightR_instruction_mov29): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov30): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov31): + +FN_PREFIX(CryptonightR_instruction_mov32): + +FN_PREFIX(CryptonightR_instruction_mov33): + +FN_PREFIX(CryptonightR_instruction_mov34): + +FN_PREFIX(CryptonightR_instruction_mov35): + +FN_PREFIX(CryptonightR_instruction_mov36): + +FN_PREFIX(CryptonightR_instruction_mov37): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov38): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov39): + +FN_PREFIX(CryptonightR_instruction_mov40): + +FN_PREFIX(CryptonightR_instruction_mov41): + +FN_PREFIX(CryptonightR_instruction_mov42): + +FN_PREFIX(CryptonightR_instruction_mov43): + +FN_PREFIX(CryptonightR_instruction_mov44): + +FN_PREFIX(CryptonightR_instruction_mov45): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov46): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov47): + +FN_PREFIX(CryptonightR_instruction_mov48): + +FN_PREFIX(CryptonightR_instruction_mov49): + +FN_PREFIX(CryptonightR_instruction_mov50): + +FN_PREFIX(CryptonightR_instruction_mov51): + +FN_PREFIX(CryptonightR_instruction_mov52): + +FN_PREFIX(CryptonightR_instruction_mov53): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov54): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov55): + +FN_PREFIX(CryptonightR_instruction_mov56): + +FN_PREFIX(CryptonightR_instruction_mov57): + +FN_PREFIX(CryptonightR_instruction_mov58): + +FN_PREFIX(CryptonightR_instruction_mov59): + +FN_PREFIX(CryptonightR_instruction_mov60): + +FN_PREFIX(CryptonightR_instruction_mov61): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov62): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov63): + +FN_PREFIX(CryptonightR_instruction_mov64): + +FN_PREFIX(CryptonightR_instruction_mov65): + +FN_PREFIX(CryptonightR_instruction_mov66): + +FN_PREFIX(CryptonightR_instruction_mov67): + +FN_PREFIX(CryptonightR_instruction_mov68): + +FN_PREFIX(CryptonightR_instruction_mov69): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov70): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov71): + +FN_PREFIX(CryptonightR_instruction_mov72): + +FN_PREFIX(CryptonightR_instruction_mov73): + +FN_PREFIX(CryptonightR_instruction_mov74): + +FN_PREFIX(CryptonightR_instruction_mov75): + +FN_PREFIX(CryptonightR_instruction_mov76): + +FN_PREFIX(CryptonightR_instruction_mov77): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov78): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov79): + +FN_PREFIX(CryptonightR_instruction_mov80): + +FN_PREFIX(CryptonightR_instruction_mov81): + +FN_PREFIX(CryptonightR_instruction_mov82): + +FN_PREFIX(CryptonightR_instruction_mov83): + +FN_PREFIX(CryptonightR_instruction_mov84): + +FN_PREFIX(CryptonightR_instruction_mov85): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov86): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov87): + +FN_PREFIX(CryptonightR_instruction_mov88): + +FN_PREFIX(CryptonightR_instruction_mov89): + +FN_PREFIX(CryptonightR_instruction_mov90): + +FN_PREFIX(CryptonightR_instruction_mov91): + +FN_PREFIX(CryptonightR_instruction_mov92): + +FN_PREFIX(CryptonightR_instruction_mov93): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov94): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov95): + +FN_PREFIX(CryptonightR_instruction_mov96): + +FN_PREFIX(CryptonightR_instruction_mov97): + +FN_PREFIX(CryptonightR_instruction_mov98): + +FN_PREFIX(CryptonightR_instruction_mov99): + +FN_PREFIX(CryptonightR_instruction_mov100): + +FN_PREFIX(CryptonightR_instruction_mov101): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov102): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov103): + +FN_PREFIX(CryptonightR_instruction_mov104): + +FN_PREFIX(CryptonightR_instruction_mov105): + +FN_PREFIX(CryptonightR_instruction_mov106): + +FN_PREFIX(CryptonightR_instruction_mov107): + +FN_PREFIX(CryptonightR_instruction_mov108): + +FN_PREFIX(CryptonightR_instruction_mov109): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov110): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov111): + +FN_PREFIX(CryptonightR_instruction_mov112): + +FN_PREFIX(CryptonightR_instruction_mov113): + +FN_PREFIX(CryptonightR_instruction_mov114): + +FN_PREFIX(CryptonightR_instruction_mov115): + +FN_PREFIX(CryptonightR_instruction_mov116): + +FN_PREFIX(CryptonightR_instruction_mov117): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov118): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov119): + +FN_PREFIX(CryptonightR_instruction_mov120): + +FN_PREFIX(CryptonightR_instruction_mov121): + +FN_PREFIX(CryptonightR_instruction_mov122): + +FN_PREFIX(CryptonightR_instruction_mov123): + +FN_PREFIX(CryptonightR_instruction_mov124): + +FN_PREFIX(CryptonightR_instruction_mov125): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov126): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov127): + +FN_PREFIX(CryptonightR_instruction_mov128): + +FN_PREFIX(CryptonightR_instruction_mov129): + +FN_PREFIX(CryptonightR_instruction_mov130): + +FN_PREFIX(CryptonightR_instruction_mov131): + +FN_PREFIX(CryptonightR_instruction_mov132): + +FN_PREFIX(CryptonightR_instruction_mov133): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov134): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov135): + +FN_PREFIX(CryptonightR_instruction_mov136): + +FN_PREFIX(CryptonightR_instruction_mov137): + +FN_PREFIX(CryptonightR_instruction_mov138): + +FN_PREFIX(CryptonightR_instruction_mov139): + +FN_PREFIX(CryptonightR_instruction_mov140): + +FN_PREFIX(CryptonightR_instruction_mov141): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov142): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov143): + +FN_PREFIX(CryptonightR_instruction_mov144): + +FN_PREFIX(CryptonightR_instruction_mov145): + +FN_PREFIX(CryptonightR_instruction_mov146): + +FN_PREFIX(CryptonightR_instruction_mov147): + +FN_PREFIX(CryptonightR_instruction_mov148): + +FN_PREFIX(CryptonightR_instruction_mov149): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov150): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov151): + +FN_PREFIX(CryptonightR_instruction_mov152): + +FN_PREFIX(CryptonightR_instruction_mov153): + +FN_PREFIX(CryptonightR_instruction_mov154): + +FN_PREFIX(CryptonightR_instruction_mov155): + +FN_PREFIX(CryptonightR_instruction_mov156): + +FN_PREFIX(CryptonightR_instruction_mov157): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov158): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov159): + +FN_PREFIX(CryptonightR_instruction_mov160): + +FN_PREFIX(CryptonightR_instruction_mov161): + +FN_PREFIX(CryptonightR_instruction_mov162): + +FN_PREFIX(CryptonightR_instruction_mov163): + +FN_PREFIX(CryptonightR_instruction_mov164): + +FN_PREFIX(CryptonightR_instruction_mov165): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov166): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov167): + +FN_PREFIX(CryptonightR_instruction_mov168): + +FN_PREFIX(CryptonightR_instruction_mov169): + +FN_PREFIX(CryptonightR_instruction_mov170): + +FN_PREFIX(CryptonightR_instruction_mov171): + +FN_PREFIX(CryptonightR_instruction_mov172): + +FN_PREFIX(CryptonightR_instruction_mov173): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov174): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov175): + +FN_PREFIX(CryptonightR_instruction_mov176): + +FN_PREFIX(CryptonightR_instruction_mov177): + +FN_PREFIX(CryptonightR_instruction_mov178): + +FN_PREFIX(CryptonightR_instruction_mov179): + +FN_PREFIX(CryptonightR_instruction_mov180): + +FN_PREFIX(CryptonightR_instruction_mov181): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov182): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov183): + +FN_PREFIX(CryptonightR_instruction_mov184): + +FN_PREFIX(CryptonightR_instruction_mov185): + +FN_PREFIX(CryptonightR_instruction_mov186): + +FN_PREFIX(CryptonightR_instruction_mov187): + +FN_PREFIX(CryptonightR_instruction_mov188): + +FN_PREFIX(CryptonightR_instruction_mov189): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov190): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov191): + +FN_PREFIX(CryptonightR_instruction_mov192): + +FN_PREFIX(CryptonightR_instruction_mov193): + +FN_PREFIX(CryptonightR_instruction_mov194): + +FN_PREFIX(CryptonightR_instruction_mov195): + +FN_PREFIX(CryptonightR_instruction_mov196): + +FN_PREFIX(CryptonightR_instruction_mov197): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov198): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov199): + +FN_PREFIX(CryptonightR_instruction_mov200): + +FN_PREFIX(CryptonightR_instruction_mov201): + +FN_PREFIX(CryptonightR_instruction_mov202): + +FN_PREFIX(CryptonightR_instruction_mov203): + +FN_PREFIX(CryptonightR_instruction_mov204): + +FN_PREFIX(CryptonightR_instruction_mov205): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov206): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov207): + +FN_PREFIX(CryptonightR_instruction_mov208): + +FN_PREFIX(CryptonightR_instruction_mov209): + +FN_PREFIX(CryptonightR_instruction_mov210): + +FN_PREFIX(CryptonightR_instruction_mov211): + +FN_PREFIX(CryptonightR_instruction_mov212): + +FN_PREFIX(CryptonightR_instruction_mov213): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov214): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov215): + +FN_PREFIX(CryptonightR_instruction_mov216): + +FN_PREFIX(CryptonightR_instruction_mov217): + +FN_PREFIX(CryptonightR_instruction_mov218): + +FN_PREFIX(CryptonightR_instruction_mov219): + +FN_PREFIX(CryptonightR_instruction_mov220): + +FN_PREFIX(CryptonightR_instruction_mov221): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov222): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov223): + +FN_PREFIX(CryptonightR_instruction_mov224): + +FN_PREFIX(CryptonightR_instruction_mov225): + +FN_PREFIX(CryptonightR_instruction_mov226): + +FN_PREFIX(CryptonightR_instruction_mov227): + +FN_PREFIX(CryptonightR_instruction_mov228): + +FN_PREFIX(CryptonightR_instruction_mov229): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov230): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov231): + +FN_PREFIX(CryptonightR_instruction_mov232): + +FN_PREFIX(CryptonightR_instruction_mov233): + +FN_PREFIX(CryptonightR_instruction_mov234): + +FN_PREFIX(CryptonightR_instruction_mov235): + +FN_PREFIX(CryptonightR_instruction_mov236): + +FN_PREFIX(CryptonightR_instruction_mov237): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov238): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov239): + +FN_PREFIX(CryptonightR_instruction_mov240): + +FN_PREFIX(CryptonightR_instruction_mov241): + +FN_PREFIX(CryptonightR_instruction_mov242): + +FN_PREFIX(CryptonightR_instruction_mov243): + +FN_PREFIX(CryptonightR_instruction_mov244): + +FN_PREFIX(CryptonightR_instruction_mov245): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov246): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov247): + +FN_PREFIX(CryptonightR_instruction_mov248): + +FN_PREFIX(CryptonightR_instruction_mov249): + +FN_PREFIX(CryptonightR_instruction_mov250): + +FN_PREFIX(CryptonightR_instruction_mov251): + +FN_PREFIX(CryptonightR_instruction_mov252): + +FN_PREFIX(CryptonightR_instruction_mov253): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov254): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov255): + +FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/src/crypto/asm/CryptonightR_template.asm b/src/crypto/asm/CryptonightR_template.asm new file mode 100644 index 00000000..ec8ad5af --- /dev/null +++ b/src/crypto/asm/CryptonightR_template.asm @@ -0,0 +1,1582 @@ +; Auto-generated file, do not edit + +_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE +PUBLIC CryptonightR_instruction0 +PUBLIC CryptonightR_instruction1 +PUBLIC CryptonightR_instruction2 +PUBLIC CryptonightR_instruction3 +PUBLIC CryptonightR_instruction4 +PUBLIC CryptonightR_instruction5 +PUBLIC CryptonightR_instruction6 +PUBLIC CryptonightR_instruction7 +PUBLIC CryptonightR_instruction8 +PUBLIC CryptonightR_instruction9 +PUBLIC CryptonightR_instruction10 +PUBLIC CryptonightR_instruction11 +PUBLIC CryptonightR_instruction12 +PUBLIC CryptonightR_instruction13 +PUBLIC CryptonightR_instruction14 +PUBLIC CryptonightR_instruction15 +PUBLIC CryptonightR_instruction16 +PUBLIC CryptonightR_instruction17 +PUBLIC CryptonightR_instruction18 +PUBLIC CryptonightR_instruction19 +PUBLIC CryptonightR_instruction20 +PUBLIC CryptonightR_instruction21 +PUBLIC CryptonightR_instruction22 +PUBLIC CryptonightR_instruction23 +PUBLIC CryptonightR_instruction24 +PUBLIC CryptonightR_instruction25 +PUBLIC CryptonightR_instruction26 +PUBLIC CryptonightR_instruction27 +PUBLIC CryptonightR_instruction28 +PUBLIC CryptonightR_instruction29 +PUBLIC CryptonightR_instruction30 +PUBLIC CryptonightR_instruction31 +PUBLIC CryptonightR_instruction32 +PUBLIC CryptonightR_instruction33 +PUBLIC CryptonightR_instruction34 +PUBLIC CryptonightR_instruction35 +PUBLIC CryptonightR_instruction36 +PUBLIC CryptonightR_instruction37 +PUBLIC CryptonightR_instruction38 +PUBLIC CryptonightR_instruction39 +PUBLIC CryptonightR_instruction40 +PUBLIC CryptonightR_instruction41 +PUBLIC CryptonightR_instruction42 +PUBLIC CryptonightR_instruction43 +PUBLIC CryptonightR_instruction44 +PUBLIC CryptonightR_instruction45 +PUBLIC CryptonightR_instruction46 +PUBLIC CryptonightR_instruction47 +PUBLIC CryptonightR_instruction48 +PUBLIC CryptonightR_instruction49 +PUBLIC CryptonightR_instruction50 +PUBLIC CryptonightR_instruction51 +PUBLIC CryptonightR_instruction52 +PUBLIC CryptonightR_instruction53 +PUBLIC CryptonightR_instruction54 +PUBLIC CryptonightR_instruction55 +PUBLIC CryptonightR_instruction56 +PUBLIC CryptonightR_instruction57 +PUBLIC CryptonightR_instruction58 +PUBLIC CryptonightR_instruction59 +PUBLIC CryptonightR_instruction60 +PUBLIC CryptonightR_instruction61 +PUBLIC CryptonightR_instruction62 +PUBLIC CryptonightR_instruction63 +PUBLIC CryptonightR_instruction64 +PUBLIC CryptonightR_instruction65 +PUBLIC CryptonightR_instruction66 +PUBLIC CryptonightR_instruction67 +PUBLIC CryptonightR_instruction68 +PUBLIC CryptonightR_instruction69 +PUBLIC CryptonightR_instruction70 +PUBLIC CryptonightR_instruction71 +PUBLIC CryptonightR_instruction72 +PUBLIC CryptonightR_instruction73 +PUBLIC CryptonightR_instruction74 +PUBLIC CryptonightR_instruction75 +PUBLIC CryptonightR_instruction76 +PUBLIC CryptonightR_instruction77 +PUBLIC CryptonightR_instruction78 +PUBLIC CryptonightR_instruction79 +PUBLIC CryptonightR_instruction80 +PUBLIC CryptonightR_instruction81 +PUBLIC CryptonightR_instruction82 +PUBLIC CryptonightR_instruction83 +PUBLIC CryptonightR_instruction84 +PUBLIC CryptonightR_instruction85 +PUBLIC CryptonightR_instruction86 +PUBLIC CryptonightR_instruction87 +PUBLIC CryptonightR_instruction88 +PUBLIC CryptonightR_instruction89 +PUBLIC CryptonightR_instruction90 +PUBLIC CryptonightR_instruction91 +PUBLIC CryptonightR_instruction92 +PUBLIC CryptonightR_instruction93 +PUBLIC CryptonightR_instruction94 +PUBLIC CryptonightR_instruction95 +PUBLIC CryptonightR_instruction96 +PUBLIC CryptonightR_instruction97 +PUBLIC CryptonightR_instruction98 +PUBLIC CryptonightR_instruction99 +PUBLIC CryptonightR_instruction100 +PUBLIC CryptonightR_instruction101 +PUBLIC CryptonightR_instruction102 +PUBLIC CryptonightR_instruction103 +PUBLIC CryptonightR_instruction104 +PUBLIC CryptonightR_instruction105 +PUBLIC CryptonightR_instruction106 +PUBLIC CryptonightR_instruction107 +PUBLIC CryptonightR_instruction108 +PUBLIC CryptonightR_instruction109 +PUBLIC CryptonightR_instruction110 +PUBLIC CryptonightR_instruction111 +PUBLIC CryptonightR_instruction112 +PUBLIC CryptonightR_instruction113 +PUBLIC CryptonightR_instruction114 +PUBLIC CryptonightR_instruction115 +PUBLIC CryptonightR_instruction116 +PUBLIC CryptonightR_instruction117 +PUBLIC CryptonightR_instruction118 +PUBLIC CryptonightR_instruction119 +PUBLIC CryptonightR_instruction120 +PUBLIC CryptonightR_instruction121 +PUBLIC CryptonightR_instruction122 +PUBLIC CryptonightR_instruction123 +PUBLIC CryptonightR_instruction124 +PUBLIC CryptonightR_instruction125 +PUBLIC CryptonightR_instruction126 +PUBLIC CryptonightR_instruction127 +PUBLIC CryptonightR_instruction128 +PUBLIC CryptonightR_instruction129 +PUBLIC CryptonightR_instruction130 +PUBLIC CryptonightR_instruction131 +PUBLIC CryptonightR_instruction132 +PUBLIC CryptonightR_instruction133 +PUBLIC CryptonightR_instruction134 +PUBLIC CryptonightR_instruction135 +PUBLIC CryptonightR_instruction136 +PUBLIC CryptonightR_instruction137 +PUBLIC CryptonightR_instruction138 +PUBLIC CryptonightR_instruction139 +PUBLIC CryptonightR_instruction140 +PUBLIC CryptonightR_instruction141 +PUBLIC CryptonightR_instruction142 +PUBLIC CryptonightR_instruction143 +PUBLIC CryptonightR_instruction144 +PUBLIC CryptonightR_instruction145 +PUBLIC CryptonightR_instruction146 +PUBLIC CryptonightR_instruction147 +PUBLIC CryptonightR_instruction148 +PUBLIC CryptonightR_instruction149 +PUBLIC CryptonightR_instruction150 +PUBLIC CryptonightR_instruction151 +PUBLIC CryptonightR_instruction152 +PUBLIC CryptonightR_instruction153 +PUBLIC CryptonightR_instruction154 +PUBLIC CryptonightR_instruction155 +PUBLIC CryptonightR_instruction156 +PUBLIC CryptonightR_instruction157 +PUBLIC CryptonightR_instruction158 +PUBLIC CryptonightR_instruction159 +PUBLIC CryptonightR_instruction160 +PUBLIC CryptonightR_instruction161 +PUBLIC CryptonightR_instruction162 +PUBLIC CryptonightR_instruction163 +PUBLIC CryptonightR_instruction164 +PUBLIC CryptonightR_instruction165 +PUBLIC CryptonightR_instruction166 +PUBLIC CryptonightR_instruction167 +PUBLIC CryptonightR_instruction168 +PUBLIC CryptonightR_instruction169 +PUBLIC CryptonightR_instruction170 +PUBLIC CryptonightR_instruction171 +PUBLIC CryptonightR_instruction172 +PUBLIC CryptonightR_instruction173 +PUBLIC CryptonightR_instruction174 +PUBLIC CryptonightR_instruction175 +PUBLIC CryptonightR_instruction176 +PUBLIC CryptonightR_instruction177 +PUBLIC CryptonightR_instruction178 +PUBLIC CryptonightR_instruction179 +PUBLIC CryptonightR_instruction180 +PUBLIC CryptonightR_instruction181 +PUBLIC CryptonightR_instruction182 +PUBLIC CryptonightR_instruction183 +PUBLIC CryptonightR_instruction184 +PUBLIC CryptonightR_instruction185 +PUBLIC CryptonightR_instruction186 +PUBLIC CryptonightR_instruction187 +PUBLIC CryptonightR_instruction188 +PUBLIC CryptonightR_instruction189 +PUBLIC CryptonightR_instruction190 +PUBLIC CryptonightR_instruction191 +PUBLIC CryptonightR_instruction192 +PUBLIC CryptonightR_instruction193 +PUBLIC CryptonightR_instruction194 +PUBLIC CryptonightR_instruction195 +PUBLIC CryptonightR_instruction196 +PUBLIC CryptonightR_instruction197 +PUBLIC CryptonightR_instruction198 +PUBLIC CryptonightR_instruction199 +PUBLIC CryptonightR_instruction200 +PUBLIC CryptonightR_instruction201 +PUBLIC CryptonightR_instruction202 +PUBLIC CryptonightR_instruction203 +PUBLIC CryptonightR_instruction204 +PUBLIC CryptonightR_instruction205 +PUBLIC CryptonightR_instruction206 +PUBLIC CryptonightR_instruction207 +PUBLIC CryptonightR_instruction208 +PUBLIC CryptonightR_instruction209 +PUBLIC CryptonightR_instruction210 +PUBLIC CryptonightR_instruction211 +PUBLIC CryptonightR_instruction212 +PUBLIC CryptonightR_instruction213 +PUBLIC CryptonightR_instruction214 +PUBLIC CryptonightR_instruction215 +PUBLIC CryptonightR_instruction216 +PUBLIC CryptonightR_instruction217 +PUBLIC CryptonightR_instruction218 +PUBLIC CryptonightR_instruction219 +PUBLIC CryptonightR_instruction220 +PUBLIC CryptonightR_instruction221 +PUBLIC CryptonightR_instruction222 +PUBLIC CryptonightR_instruction223 +PUBLIC CryptonightR_instruction224 +PUBLIC CryptonightR_instruction225 +PUBLIC CryptonightR_instruction226 +PUBLIC CryptonightR_instruction227 +PUBLIC CryptonightR_instruction228 +PUBLIC CryptonightR_instruction229 +PUBLIC CryptonightR_instruction230 +PUBLIC CryptonightR_instruction231 +PUBLIC CryptonightR_instruction232 +PUBLIC CryptonightR_instruction233 +PUBLIC CryptonightR_instruction234 +PUBLIC CryptonightR_instruction235 +PUBLIC CryptonightR_instruction236 +PUBLIC CryptonightR_instruction237 +PUBLIC CryptonightR_instruction238 +PUBLIC CryptonightR_instruction239 +PUBLIC CryptonightR_instruction240 +PUBLIC CryptonightR_instruction241 +PUBLIC CryptonightR_instruction242 +PUBLIC CryptonightR_instruction243 +PUBLIC CryptonightR_instruction244 +PUBLIC CryptonightR_instruction245 +PUBLIC CryptonightR_instruction246 +PUBLIC CryptonightR_instruction247 +PUBLIC CryptonightR_instruction248 +PUBLIC CryptonightR_instruction249 +PUBLIC CryptonightR_instruction250 +PUBLIC CryptonightR_instruction251 +PUBLIC CryptonightR_instruction252 +PUBLIC CryptonightR_instruction253 +PUBLIC CryptonightR_instruction254 +PUBLIC CryptonightR_instruction255 +PUBLIC CryptonightR_instruction256 +PUBLIC CryptonightR_instruction_mov0 +PUBLIC CryptonightR_instruction_mov1 +PUBLIC CryptonightR_instruction_mov2 +PUBLIC CryptonightR_instruction_mov3 +PUBLIC CryptonightR_instruction_mov4 +PUBLIC CryptonightR_instruction_mov5 +PUBLIC CryptonightR_instruction_mov6 +PUBLIC CryptonightR_instruction_mov7 +PUBLIC CryptonightR_instruction_mov8 +PUBLIC CryptonightR_instruction_mov9 +PUBLIC CryptonightR_instruction_mov10 +PUBLIC CryptonightR_instruction_mov11 +PUBLIC CryptonightR_instruction_mov12 +PUBLIC CryptonightR_instruction_mov13 +PUBLIC CryptonightR_instruction_mov14 +PUBLIC CryptonightR_instruction_mov15 +PUBLIC CryptonightR_instruction_mov16 +PUBLIC CryptonightR_instruction_mov17 +PUBLIC CryptonightR_instruction_mov18 +PUBLIC CryptonightR_instruction_mov19 +PUBLIC CryptonightR_instruction_mov20 +PUBLIC CryptonightR_instruction_mov21 +PUBLIC CryptonightR_instruction_mov22 +PUBLIC CryptonightR_instruction_mov23 +PUBLIC CryptonightR_instruction_mov24 +PUBLIC CryptonightR_instruction_mov25 +PUBLIC CryptonightR_instruction_mov26 +PUBLIC CryptonightR_instruction_mov27 +PUBLIC CryptonightR_instruction_mov28 +PUBLIC CryptonightR_instruction_mov29 +PUBLIC CryptonightR_instruction_mov30 +PUBLIC CryptonightR_instruction_mov31 +PUBLIC CryptonightR_instruction_mov32 +PUBLIC CryptonightR_instruction_mov33 +PUBLIC CryptonightR_instruction_mov34 +PUBLIC CryptonightR_instruction_mov35 +PUBLIC CryptonightR_instruction_mov36 +PUBLIC CryptonightR_instruction_mov37 +PUBLIC CryptonightR_instruction_mov38 +PUBLIC CryptonightR_instruction_mov39 +PUBLIC CryptonightR_instruction_mov40 +PUBLIC CryptonightR_instruction_mov41 +PUBLIC CryptonightR_instruction_mov42 +PUBLIC CryptonightR_instruction_mov43 +PUBLIC CryptonightR_instruction_mov44 +PUBLIC CryptonightR_instruction_mov45 +PUBLIC CryptonightR_instruction_mov46 +PUBLIC CryptonightR_instruction_mov47 +PUBLIC CryptonightR_instruction_mov48 +PUBLIC CryptonightR_instruction_mov49 +PUBLIC CryptonightR_instruction_mov50 +PUBLIC CryptonightR_instruction_mov51 +PUBLIC CryptonightR_instruction_mov52 +PUBLIC CryptonightR_instruction_mov53 +PUBLIC CryptonightR_instruction_mov54 +PUBLIC CryptonightR_instruction_mov55 +PUBLIC CryptonightR_instruction_mov56 +PUBLIC CryptonightR_instruction_mov57 +PUBLIC CryptonightR_instruction_mov58 +PUBLIC CryptonightR_instruction_mov59 +PUBLIC CryptonightR_instruction_mov60 +PUBLIC CryptonightR_instruction_mov61 +PUBLIC CryptonightR_instruction_mov62 +PUBLIC CryptonightR_instruction_mov63 +PUBLIC CryptonightR_instruction_mov64 +PUBLIC CryptonightR_instruction_mov65 +PUBLIC CryptonightR_instruction_mov66 +PUBLIC CryptonightR_instruction_mov67 +PUBLIC CryptonightR_instruction_mov68 +PUBLIC CryptonightR_instruction_mov69 +PUBLIC CryptonightR_instruction_mov70 +PUBLIC CryptonightR_instruction_mov71 +PUBLIC CryptonightR_instruction_mov72 +PUBLIC CryptonightR_instruction_mov73 +PUBLIC CryptonightR_instruction_mov74 +PUBLIC CryptonightR_instruction_mov75 +PUBLIC CryptonightR_instruction_mov76 +PUBLIC CryptonightR_instruction_mov77 +PUBLIC CryptonightR_instruction_mov78 +PUBLIC CryptonightR_instruction_mov79 +PUBLIC CryptonightR_instruction_mov80 +PUBLIC CryptonightR_instruction_mov81 +PUBLIC CryptonightR_instruction_mov82 +PUBLIC CryptonightR_instruction_mov83 +PUBLIC CryptonightR_instruction_mov84 +PUBLIC CryptonightR_instruction_mov85 +PUBLIC CryptonightR_instruction_mov86 +PUBLIC CryptonightR_instruction_mov87 +PUBLIC CryptonightR_instruction_mov88 +PUBLIC CryptonightR_instruction_mov89 +PUBLIC CryptonightR_instruction_mov90 +PUBLIC CryptonightR_instruction_mov91 +PUBLIC CryptonightR_instruction_mov92 +PUBLIC CryptonightR_instruction_mov93 +PUBLIC CryptonightR_instruction_mov94 +PUBLIC CryptonightR_instruction_mov95 +PUBLIC CryptonightR_instruction_mov96 +PUBLIC CryptonightR_instruction_mov97 +PUBLIC CryptonightR_instruction_mov98 +PUBLIC CryptonightR_instruction_mov99 +PUBLIC CryptonightR_instruction_mov100 +PUBLIC CryptonightR_instruction_mov101 +PUBLIC CryptonightR_instruction_mov102 +PUBLIC CryptonightR_instruction_mov103 +PUBLIC CryptonightR_instruction_mov104 +PUBLIC CryptonightR_instruction_mov105 +PUBLIC CryptonightR_instruction_mov106 +PUBLIC CryptonightR_instruction_mov107 +PUBLIC CryptonightR_instruction_mov108 +PUBLIC CryptonightR_instruction_mov109 +PUBLIC CryptonightR_instruction_mov110 +PUBLIC CryptonightR_instruction_mov111 +PUBLIC CryptonightR_instruction_mov112 +PUBLIC CryptonightR_instruction_mov113 +PUBLIC CryptonightR_instruction_mov114 +PUBLIC CryptonightR_instruction_mov115 +PUBLIC CryptonightR_instruction_mov116 +PUBLIC CryptonightR_instruction_mov117 +PUBLIC CryptonightR_instruction_mov118 +PUBLIC CryptonightR_instruction_mov119 +PUBLIC CryptonightR_instruction_mov120 +PUBLIC CryptonightR_instruction_mov121 +PUBLIC CryptonightR_instruction_mov122 +PUBLIC CryptonightR_instruction_mov123 +PUBLIC CryptonightR_instruction_mov124 +PUBLIC CryptonightR_instruction_mov125 +PUBLIC CryptonightR_instruction_mov126 +PUBLIC CryptonightR_instruction_mov127 +PUBLIC CryptonightR_instruction_mov128 +PUBLIC CryptonightR_instruction_mov129 +PUBLIC CryptonightR_instruction_mov130 +PUBLIC CryptonightR_instruction_mov131 +PUBLIC CryptonightR_instruction_mov132 +PUBLIC CryptonightR_instruction_mov133 +PUBLIC CryptonightR_instruction_mov134 +PUBLIC CryptonightR_instruction_mov135 +PUBLIC CryptonightR_instruction_mov136 +PUBLIC CryptonightR_instruction_mov137 +PUBLIC CryptonightR_instruction_mov138 +PUBLIC CryptonightR_instruction_mov139 +PUBLIC CryptonightR_instruction_mov140 +PUBLIC CryptonightR_instruction_mov141 +PUBLIC CryptonightR_instruction_mov142 +PUBLIC CryptonightR_instruction_mov143 +PUBLIC CryptonightR_instruction_mov144 +PUBLIC CryptonightR_instruction_mov145 +PUBLIC CryptonightR_instruction_mov146 +PUBLIC CryptonightR_instruction_mov147 +PUBLIC CryptonightR_instruction_mov148 +PUBLIC CryptonightR_instruction_mov149 +PUBLIC CryptonightR_instruction_mov150 +PUBLIC CryptonightR_instruction_mov151 +PUBLIC CryptonightR_instruction_mov152 +PUBLIC CryptonightR_instruction_mov153 +PUBLIC CryptonightR_instruction_mov154 +PUBLIC CryptonightR_instruction_mov155 +PUBLIC CryptonightR_instruction_mov156 +PUBLIC CryptonightR_instruction_mov157 +PUBLIC CryptonightR_instruction_mov158 +PUBLIC CryptonightR_instruction_mov159 +PUBLIC CryptonightR_instruction_mov160 +PUBLIC CryptonightR_instruction_mov161 +PUBLIC CryptonightR_instruction_mov162 +PUBLIC CryptonightR_instruction_mov163 +PUBLIC CryptonightR_instruction_mov164 +PUBLIC CryptonightR_instruction_mov165 +PUBLIC CryptonightR_instruction_mov166 +PUBLIC CryptonightR_instruction_mov167 +PUBLIC CryptonightR_instruction_mov168 +PUBLIC CryptonightR_instruction_mov169 +PUBLIC CryptonightR_instruction_mov170 +PUBLIC CryptonightR_instruction_mov171 +PUBLIC CryptonightR_instruction_mov172 +PUBLIC CryptonightR_instruction_mov173 +PUBLIC CryptonightR_instruction_mov174 +PUBLIC CryptonightR_instruction_mov175 +PUBLIC CryptonightR_instruction_mov176 +PUBLIC CryptonightR_instruction_mov177 +PUBLIC CryptonightR_instruction_mov178 +PUBLIC CryptonightR_instruction_mov179 +PUBLIC CryptonightR_instruction_mov180 +PUBLIC CryptonightR_instruction_mov181 +PUBLIC CryptonightR_instruction_mov182 +PUBLIC CryptonightR_instruction_mov183 +PUBLIC CryptonightR_instruction_mov184 +PUBLIC CryptonightR_instruction_mov185 +PUBLIC CryptonightR_instruction_mov186 +PUBLIC CryptonightR_instruction_mov187 +PUBLIC CryptonightR_instruction_mov188 +PUBLIC CryptonightR_instruction_mov189 +PUBLIC CryptonightR_instruction_mov190 +PUBLIC CryptonightR_instruction_mov191 +PUBLIC CryptonightR_instruction_mov192 +PUBLIC CryptonightR_instruction_mov193 +PUBLIC CryptonightR_instruction_mov194 +PUBLIC CryptonightR_instruction_mov195 +PUBLIC CryptonightR_instruction_mov196 +PUBLIC CryptonightR_instruction_mov197 +PUBLIC CryptonightR_instruction_mov198 +PUBLIC CryptonightR_instruction_mov199 +PUBLIC CryptonightR_instruction_mov200 +PUBLIC CryptonightR_instruction_mov201 +PUBLIC CryptonightR_instruction_mov202 +PUBLIC CryptonightR_instruction_mov203 +PUBLIC CryptonightR_instruction_mov204 +PUBLIC CryptonightR_instruction_mov205 +PUBLIC CryptonightR_instruction_mov206 +PUBLIC CryptonightR_instruction_mov207 +PUBLIC CryptonightR_instruction_mov208 +PUBLIC CryptonightR_instruction_mov209 +PUBLIC CryptonightR_instruction_mov210 +PUBLIC CryptonightR_instruction_mov211 +PUBLIC CryptonightR_instruction_mov212 +PUBLIC CryptonightR_instruction_mov213 +PUBLIC CryptonightR_instruction_mov214 +PUBLIC CryptonightR_instruction_mov215 +PUBLIC CryptonightR_instruction_mov216 +PUBLIC CryptonightR_instruction_mov217 +PUBLIC CryptonightR_instruction_mov218 +PUBLIC CryptonightR_instruction_mov219 +PUBLIC CryptonightR_instruction_mov220 +PUBLIC CryptonightR_instruction_mov221 +PUBLIC CryptonightR_instruction_mov222 +PUBLIC CryptonightR_instruction_mov223 +PUBLIC CryptonightR_instruction_mov224 +PUBLIC CryptonightR_instruction_mov225 +PUBLIC CryptonightR_instruction_mov226 +PUBLIC CryptonightR_instruction_mov227 +PUBLIC CryptonightR_instruction_mov228 +PUBLIC CryptonightR_instruction_mov229 +PUBLIC CryptonightR_instruction_mov230 +PUBLIC CryptonightR_instruction_mov231 +PUBLIC CryptonightR_instruction_mov232 +PUBLIC CryptonightR_instruction_mov233 +PUBLIC CryptonightR_instruction_mov234 +PUBLIC CryptonightR_instruction_mov235 +PUBLIC CryptonightR_instruction_mov236 +PUBLIC CryptonightR_instruction_mov237 +PUBLIC CryptonightR_instruction_mov238 +PUBLIC CryptonightR_instruction_mov239 +PUBLIC CryptonightR_instruction_mov240 +PUBLIC CryptonightR_instruction_mov241 +PUBLIC CryptonightR_instruction_mov242 +PUBLIC CryptonightR_instruction_mov243 +PUBLIC CryptonightR_instruction_mov244 +PUBLIC CryptonightR_instruction_mov245 +PUBLIC CryptonightR_instruction_mov246 +PUBLIC CryptonightR_instruction_mov247 +PUBLIC CryptonightR_instruction_mov248 +PUBLIC CryptonightR_instruction_mov249 +PUBLIC CryptonightR_instruction_mov250 +PUBLIC CryptonightR_instruction_mov251 +PUBLIC CryptonightR_instruction_mov252 +PUBLIC CryptonightR_instruction_mov253 +PUBLIC CryptonightR_instruction_mov254 +PUBLIC CryptonightR_instruction_mov255 +PUBLIC CryptonightR_instruction_mov256 + +INCLUDE CryptonightR_template_win.inc + +CryptonightR_instruction0: + imul rbx, rbx +CryptonightR_instruction1: + imul rbx, rbx +CryptonightR_instruction2: + imul rbx, rbx +CryptonightR_instruction3: + add rbx, rbx + add rbx, 2147483647 +CryptonightR_instruction4: + sub rbx, rbx +CryptonightR_instruction5: + ror ebx, cl +CryptonightR_instruction6: + rol ebx, cl +CryptonightR_instruction7: + xor rbx, rbx +CryptonightR_instruction8: + imul rsi, rbx +CryptonightR_instruction9: + imul rsi, rbx +CryptonightR_instruction10: + imul rsi, rbx +CryptonightR_instruction11: + add rsi, rbx + add rsi, 2147483647 +CryptonightR_instruction12: + sub rsi, rbx +CryptonightR_instruction13: + ror esi, cl +CryptonightR_instruction14: + rol esi, cl +CryptonightR_instruction15: + xor rsi, rbx +CryptonightR_instruction16: + imul rdi, rbx +CryptonightR_instruction17: + imul rdi, rbx +CryptonightR_instruction18: + imul rdi, rbx +CryptonightR_instruction19: + add rdi, rbx + add rdi, 2147483647 +CryptonightR_instruction20: + sub rdi, rbx +CryptonightR_instruction21: + ror edi, cl +CryptonightR_instruction22: + rol edi, cl +CryptonightR_instruction23: + xor rdi, rbx +CryptonightR_instruction24: + imul rbp, rbx +CryptonightR_instruction25: + imul rbp, rbx +CryptonightR_instruction26: + imul rbp, rbx +CryptonightR_instruction27: + add rbp, rbx + add rbp, 2147483647 +CryptonightR_instruction28: + sub rbp, rbx +CryptonightR_instruction29: + ror ebp, cl +CryptonightR_instruction30: + rol ebp, cl +CryptonightR_instruction31: + xor rbp, rbx +CryptonightR_instruction32: + imul rbx, rsi +CryptonightR_instruction33: + imul rbx, rsi +CryptonightR_instruction34: + imul rbx, rsi +CryptonightR_instruction35: + add rbx, rsi + add rbx, 2147483647 +CryptonightR_instruction36: + sub rbx, rsi +CryptonightR_instruction37: + ror ebx, cl +CryptonightR_instruction38: + rol ebx, cl +CryptonightR_instruction39: + xor rbx, rsi +CryptonightR_instruction40: + imul rsi, rsi +CryptonightR_instruction41: + imul rsi, rsi +CryptonightR_instruction42: + imul rsi, rsi +CryptonightR_instruction43: + add rsi, rsi + add rsi, 2147483647 +CryptonightR_instruction44: + sub rsi, rsi +CryptonightR_instruction45: + ror esi, cl +CryptonightR_instruction46: + rol esi, cl +CryptonightR_instruction47: + xor rsi, rsi +CryptonightR_instruction48: + imul rdi, rsi +CryptonightR_instruction49: + imul rdi, rsi +CryptonightR_instruction50: + imul rdi, rsi +CryptonightR_instruction51: + add rdi, rsi + add rdi, 2147483647 +CryptonightR_instruction52: + sub rdi, rsi +CryptonightR_instruction53: + ror edi, cl +CryptonightR_instruction54: + rol edi, cl +CryptonightR_instruction55: + xor rdi, rsi +CryptonightR_instruction56: + imul rbp, rsi +CryptonightR_instruction57: + imul rbp, rsi +CryptonightR_instruction58: + imul rbp, rsi +CryptonightR_instruction59: + add rbp, rsi + add rbp, 2147483647 +CryptonightR_instruction60: + sub rbp, rsi +CryptonightR_instruction61: + ror ebp, cl +CryptonightR_instruction62: + rol ebp, cl +CryptonightR_instruction63: + xor rbp, rsi +CryptonightR_instruction64: + imul rbx, rdi +CryptonightR_instruction65: + imul rbx, rdi +CryptonightR_instruction66: + imul rbx, rdi +CryptonightR_instruction67: + add rbx, rdi + add rbx, 2147483647 +CryptonightR_instruction68: + sub rbx, rdi +CryptonightR_instruction69: + ror ebx, cl +CryptonightR_instruction70: + rol ebx, cl +CryptonightR_instruction71: + xor rbx, rdi +CryptonightR_instruction72: + imul rsi, rdi +CryptonightR_instruction73: + imul rsi, rdi +CryptonightR_instruction74: + imul rsi, rdi +CryptonightR_instruction75: + add rsi, rdi + add rsi, 2147483647 +CryptonightR_instruction76: + sub rsi, rdi +CryptonightR_instruction77: + ror esi, cl +CryptonightR_instruction78: + rol esi, cl +CryptonightR_instruction79: + xor rsi, rdi +CryptonightR_instruction80: + imul rdi, rdi +CryptonightR_instruction81: + imul rdi, rdi +CryptonightR_instruction82: + imul rdi, rdi +CryptonightR_instruction83: + add rdi, rdi + add rdi, 2147483647 +CryptonightR_instruction84: + sub rdi, rdi +CryptonightR_instruction85: + ror edi, cl +CryptonightR_instruction86: + rol edi, cl +CryptonightR_instruction87: + xor rdi, rdi +CryptonightR_instruction88: + imul rbp, rdi +CryptonightR_instruction89: + imul rbp, rdi +CryptonightR_instruction90: + imul rbp, rdi +CryptonightR_instruction91: + add rbp, rdi + add rbp, 2147483647 +CryptonightR_instruction92: + sub rbp, rdi +CryptonightR_instruction93: + ror ebp, cl +CryptonightR_instruction94: + rol ebp, cl +CryptonightR_instruction95: + xor rbp, rdi +CryptonightR_instruction96: + imul rbx, rbp +CryptonightR_instruction97: + imul rbx, rbp +CryptonightR_instruction98: + imul rbx, rbp +CryptonightR_instruction99: + add rbx, rbp + add rbx, 2147483647 +CryptonightR_instruction100: + sub rbx, rbp +CryptonightR_instruction101: + ror ebx, cl +CryptonightR_instruction102: + rol ebx, cl +CryptonightR_instruction103: + xor rbx, rbp +CryptonightR_instruction104: + imul rsi, rbp +CryptonightR_instruction105: + imul rsi, rbp +CryptonightR_instruction106: + imul rsi, rbp +CryptonightR_instruction107: + add rsi, rbp + add rsi, 2147483647 +CryptonightR_instruction108: + sub rsi, rbp +CryptonightR_instruction109: + ror esi, cl +CryptonightR_instruction110: + rol esi, cl +CryptonightR_instruction111: + xor rsi, rbp +CryptonightR_instruction112: + imul rdi, rbp +CryptonightR_instruction113: + imul rdi, rbp +CryptonightR_instruction114: + imul rdi, rbp +CryptonightR_instruction115: + add rdi, rbp + add rdi, 2147483647 +CryptonightR_instruction116: + sub rdi, rbp +CryptonightR_instruction117: + ror edi, cl +CryptonightR_instruction118: + rol edi, cl +CryptonightR_instruction119: + xor rdi, rbp +CryptonightR_instruction120: + imul rbp, rbp +CryptonightR_instruction121: + imul rbp, rbp +CryptonightR_instruction122: + imul rbp, rbp +CryptonightR_instruction123: + add rbp, rbp + add rbp, 2147483647 +CryptonightR_instruction124: + sub rbp, rbp +CryptonightR_instruction125: + ror ebp, cl +CryptonightR_instruction126: + rol ebp, cl +CryptonightR_instruction127: + xor rbp, rbp +CryptonightR_instruction128: + imul rbx, rsp +CryptonightR_instruction129: + imul rbx, rsp +CryptonightR_instruction130: + imul rbx, rsp +CryptonightR_instruction131: + add rbx, rsp + add rbx, 2147483647 +CryptonightR_instruction132: + sub rbx, rsp +CryptonightR_instruction133: + ror ebx, cl +CryptonightR_instruction134: + rol ebx, cl +CryptonightR_instruction135: + xor rbx, rsp +CryptonightR_instruction136: + imul rsi, rsp +CryptonightR_instruction137: + imul rsi, rsp +CryptonightR_instruction138: + imul rsi, rsp +CryptonightR_instruction139: + add rsi, rsp + add rsi, 2147483647 +CryptonightR_instruction140: + sub rsi, rsp +CryptonightR_instruction141: + ror esi, cl +CryptonightR_instruction142: + rol esi, cl +CryptonightR_instruction143: + xor rsi, rsp +CryptonightR_instruction144: + imul rdi, rsp +CryptonightR_instruction145: + imul rdi, rsp +CryptonightR_instruction146: + imul rdi, rsp +CryptonightR_instruction147: + add rdi, rsp + add rdi, 2147483647 +CryptonightR_instruction148: + sub rdi, rsp +CryptonightR_instruction149: + ror edi, cl +CryptonightR_instruction150: + rol edi, cl +CryptonightR_instruction151: + xor rdi, rsp +CryptonightR_instruction152: + imul rbp, rsp +CryptonightR_instruction153: + imul rbp, rsp +CryptonightR_instruction154: + imul rbp, rsp +CryptonightR_instruction155: + add rbp, rsp + add rbp, 2147483647 +CryptonightR_instruction156: + sub rbp, rsp +CryptonightR_instruction157: + ror ebp, cl +CryptonightR_instruction158: + rol ebp, cl +CryptonightR_instruction159: + xor rbp, rsp +CryptonightR_instruction160: + imul rbx, r15 +CryptonightR_instruction161: + imul rbx, r15 +CryptonightR_instruction162: + imul rbx, r15 +CryptonightR_instruction163: + add rbx, r15 + add rbx, 2147483647 +CryptonightR_instruction164: + sub rbx, r15 +CryptonightR_instruction165: + ror ebx, cl +CryptonightR_instruction166: + rol ebx, cl +CryptonightR_instruction167: + xor rbx, r15 +CryptonightR_instruction168: + imul rsi, r15 +CryptonightR_instruction169: + imul rsi, r15 +CryptonightR_instruction170: + imul rsi, r15 +CryptonightR_instruction171: + add rsi, r15 + add rsi, 2147483647 +CryptonightR_instruction172: + sub rsi, r15 +CryptonightR_instruction173: + ror esi, cl +CryptonightR_instruction174: + rol esi, cl +CryptonightR_instruction175: + xor rsi, r15 +CryptonightR_instruction176: + imul rdi, r15 +CryptonightR_instruction177: + imul rdi, r15 +CryptonightR_instruction178: + imul rdi, r15 +CryptonightR_instruction179: + add rdi, r15 + add rdi, 2147483647 +CryptonightR_instruction180: + sub rdi, r15 +CryptonightR_instruction181: + ror edi, cl +CryptonightR_instruction182: + rol edi, cl +CryptonightR_instruction183: + xor rdi, r15 +CryptonightR_instruction184: + imul rbp, r15 +CryptonightR_instruction185: + imul rbp, r15 +CryptonightR_instruction186: + imul rbp, r15 +CryptonightR_instruction187: + add rbp, r15 + add rbp, 2147483647 +CryptonightR_instruction188: + sub rbp, r15 +CryptonightR_instruction189: + ror ebp, cl +CryptonightR_instruction190: + rol ebp, cl +CryptonightR_instruction191: + xor rbp, r15 +CryptonightR_instruction192: + imul rbx, rax +CryptonightR_instruction193: + imul rbx, rax +CryptonightR_instruction194: + imul rbx, rax +CryptonightR_instruction195: + add rbx, rax + add rbx, 2147483647 +CryptonightR_instruction196: + sub rbx, rax +CryptonightR_instruction197: + ror ebx, cl +CryptonightR_instruction198: + rol ebx, cl +CryptonightR_instruction199: + xor rbx, rax +CryptonightR_instruction200: + imul rsi, rax +CryptonightR_instruction201: + imul rsi, rax +CryptonightR_instruction202: + imul rsi, rax +CryptonightR_instruction203: + add rsi, rax + add rsi, 2147483647 +CryptonightR_instruction204: + sub rsi, rax +CryptonightR_instruction205: + ror esi, cl +CryptonightR_instruction206: + rol esi, cl +CryptonightR_instruction207: + xor rsi, rax +CryptonightR_instruction208: + imul rdi, rax +CryptonightR_instruction209: + imul rdi, rax +CryptonightR_instruction210: + imul rdi, rax +CryptonightR_instruction211: + add rdi, rax + add rdi, 2147483647 +CryptonightR_instruction212: + sub rdi, rax +CryptonightR_instruction213: + ror edi, cl +CryptonightR_instruction214: + rol edi, cl +CryptonightR_instruction215: + xor rdi, rax +CryptonightR_instruction216: + imul rbp, rax +CryptonightR_instruction217: + imul rbp, rax +CryptonightR_instruction218: + imul rbp, rax +CryptonightR_instruction219: + add rbp, rax + add rbp, 2147483647 +CryptonightR_instruction220: + sub rbp, rax +CryptonightR_instruction221: + ror ebp, cl +CryptonightR_instruction222: + rol ebp, cl +CryptonightR_instruction223: + xor rbp, rax +CryptonightR_instruction224: + imul rbx, rdx +CryptonightR_instruction225: + imul rbx, rdx +CryptonightR_instruction226: + imul rbx, rdx +CryptonightR_instruction227: + add rbx, rdx + add rbx, 2147483647 +CryptonightR_instruction228: + sub rbx, rdx +CryptonightR_instruction229: + ror ebx, cl +CryptonightR_instruction230: + rol ebx, cl +CryptonightR_instruction231: + xor rbx, rdx +CryptonightR_instruction232: + imul rsi, rdx +CryptonightR_instruction233: + imul rsi, rdx +CryptonightR_instruction234: + imul rsi, rdx +CryptonightR_instruction235: + add rsi, rdx + add rsi, 2147483647 +CryptonightR_instruction236: + sub rsi, rdx +CryptonightR_instruction237: + ror esi, cl +CryptonightR_instruction238: + rol esi, cl +CryptonightR_instruction239: + xor rsi, rdx +CryptonightR_instruction240: + imul rdi, rdx +CryptonightR_instruction241: + imul rdi, rdx +CryptonightR_instruction242: + imul rdi, rdx +CryptonightR_instruction243: + add rdi, rdx + add rdi, 2147483647 +CryptonightR_instruction244: + sub rdi, rdx +CryptonightR_instruction245: + ror edi, cl +CryptonightR_instruction246: + rol edi, cl +CryptonightR_instruction247: + xor rdi, rdx +CryptonightR_instruction248: + imul rbp, rdx +CryptonightR_instruction249: + imul rbp, rdx +CryptonightR_instruction250: + imul rbp, rdx +CryptonightR_instruction251: + add rbp, rdx + add rbp, 2147483647 +CryptonightR_instruction252: + sub rbp, rdx +CryptonightR_instruction253: + ror ebp, cl +CryptonightR_instruction254: + rol ebp, cl +CryptonightR_instruction255: + xor rbp, rdx +CryptonightR_instruction256: + imul rbx, rbx +CryptonightR_instruction_mov0: + +CryptonightR_instruction_mov1: + +CryptonightR_instruction_mov2: + +CryptonightR_instruction_mov3: + +CryptonightR_instruction_mov4: + +CryptonightR_instruction_mov5: + mov rcx, rbx +CryptonightR_instruction_mov6: + mov rcx, rbx +CryptonightR_instruction_mov7: + +CryptonightR_instruction_mov8: + +CryptonightR_instruction_mov9: + +CryptonightR_instruction_mov10: + +CryptonightR_instruction_mov11: + +CryptonightR_instruction_mov12: + +CryptonightR_instruction_mov13: + mov rcx, rbx +CryptonightR_instruction_mov14: + mov rcx, rbx +CryptonightR_instruction_mov15: + +CryptonightR_instruction_mov16: + +CryptonightR_instruction_mov17: + +CryptonightR_instruction_mov18: + +CryptonightR_instruction_mov19: + +CryptonightR_instruction_mov20: + +CryptonightR_instruction_mov21: + mov rcx, rbx +CryptonightR_instruction_mov22: + mov rcx, rbx +CryptonightR_instruction_mov23: + +CryptonightR_instruction_mov24: + +CryptonightR_instruction_mov25: + +CryptonightR_instruction_mov26: + +CryptonightR_instruction_mov27: + +CryptonightR_instruction_mov28: + +CryptonightR_instruction_mov29: + mov rcx, rbx +CryptonightR_instruction_mov30: + mov rcx, rbx +CryptonightR_instruction_mov31: + +CryptonightR_instruction_mov32: + +CryptonightR_instruction_mov33: + +CryptonightR_instruction_mov34: + +CryptonightR_instruction_mov35: + +CryptonightR_instruction_mov36: + +CryptonightR_instruction_mov37: + mov rcx, rsi +CryptonightR_instruction_mov38: + mov rcx, rsi +CryptonightR_instruction_mov39: + +CryptonightR_instruction_mov40: + +CryptonightR_instruction_mov41: + +CryptonightR_instruction_mov42: + +CryptonightR_instruction_mov43: + +CryptonightR_instruction_mov44: + +CryptonightR_instruction_mov45: + mov rcx, rsi +CryptonightR_instruction_mov46: + mov rcx, rsi +CryptonightR_instruction_mov47: + +CryptonightR_instruction_mov48: + +CryptonightR_instruction_mov49: + +CryptonightR_instruction_mov50: + +CryptonightR_instruction_mov51: + +CryptonightR_instruction_mov52: + +CryptonightR_instruction_mov53: + mov rcx, rsi +CryptonightR_instruction_mov54: + mov rcx, rsi +CryptonightR_instruction_mov55: + +CryptonightR_instruction_mov56: + +CryptonightR_instruction_mov57: + +CryptonightR_instruction_mov58: + +CryptonightR_instruction_mov59: + +CryptonightR_instruction_mov60: + +CryptonightR_instruction_mov61: + mov rcx, rsi +CryptonightR_instruction_mov62: + mov rcx, rsi +CryptonightR_instruction_mov63: + +CryptonightR_instruction_mov64: + +CryptonightR_instruction_mov65: + +CryptonightR_instruction_mov66: + +CryptonightR_instruction_mov67: + +CryptonightR_instruction_mov68: + +CryptonightR_instruction_mov69: + mov rcx, rdi +CryptonightR_instruction_mov70: + mov rcx, rdi +CryptonightR_instruction_mov71: + +CryptonightR_instruction_mov72: + +CryptonightR_instruction_mov73: + +CryptonightR_instruction_mov74: + +CryptonightR_instruction_mov75: + +CryptonightR_instruction_mov76: + +CryptonightR_instruction_mov77: + mov rcx, rdi +CryptonightR_instruction_mov78: + mov rcx, rdi +CryptonightR_instruction_mov79: + +CryptonightR_instruction_mov80: + +CryptonightR_instruction_mov81: + +CryptonightR_instruction_mov82: + +CryptonightR_instruction_mov83: + +CryptonightR_instruction_mov84: + +CryptonightR_instruction_mov85: + mov rcx, rdi +CryptonightR_instruction_mov86: + mov rcx, rdi +CryptonightR_instruction_mov87: + +CryptonightR_instruction_mov88: + +CryptonightR_instruction_mov89: + +CryptonightR_instruction_mov90: + +CryptonightR_instruction_mov91: + +CryptonightR_instruction_mov92: + +CryptonightR_instruction_mov93: + mov rcx, rdi +CryptonightR_instruction_mov94: + mov rcx, rdi +CryptonightR_instruction_mov95: + +CryptonightR_instruction_mov96: + +CryptonightR_instruction_mov97: + +CryptonightR_instruction_mov98: + +CryptonightR_instruction_mov99: + +CryptonightR_instruction_mov100: + +CryptonightR_instruction_mov101: + mov rcx, rbp +CryptonightR_instruction_mov102: + mov rcx, rbp +CryptonightR_instruction_mov103: + +CryptonightR_instruction_mov104: + +CryptonightR_instruction_mov105: + +CryptonightR_instruction_mov106: + +CryptonightR_instruction_mov107: + +CryptonightR_instruction_mov108: + +CryptonightR_instruction_mov109: + mov rcx, rbp +CryptonightR_instruction_mov110: + mov rcx, rbp +CryptonightR_instruction_mov111: + +CryptonightR_instruction_mov112: + +CryptonightR_instruction_mov113: + +CryptonightR_instruction_mov114: + +CryptonightR_instruction_mov115: + +CryptonightR_instruction_mov116: + +CryptonightR_instruction_mov117: + mov rcx, rbp +CryptonightR_instruction_mov118: + mov rcx, rbp +CryptonightR_instruction_mov119: + +CryptonightR_instruction_mov120: + +CryptonightR_instruction_mov121: + +CryptonightR_instruction_mov122: + +CryptonightR_instruction_mov123: + +CryptonightR_instruction_mov124: + +CryptonightR_instruction_mov125: + mov rcx, rbp +CryptonightR_instruction_mov126: + mov rcx, rbp +CryptonightR_instruction_mov127: + +CryptonightR_instruction_mov128: + +CryptonightR_instruction_mov129: + +CryptonightR_instruction_mov130: + +CryptonightR_instruction_mov131: + +CryptonightR_instruction_mov132: + +CryptonightR_instruction_mov133: + mov rcx, rsp +CryptonightR_instruction_mov134: + mov rcx, rsp +CryptonightR_instruction_mov135: + +CryptonightR_instruction_mov136: + +CryptonightR_instruction_mov137: + +CryptonightR_instruction_mov138: + +CryptonightR_instruction_mov139: + +CryptonightR_instruction_mov140: + +CryptonightR_instruction_mov141: + mov rcx, rsp +CryptonightR_instruction_mov142: + mov rcx, rsp +CryptonightR_instruction_mov143: + +CryptonightR_instruction_mov144: + +CryptonightR_instruction_mov145: + +CryptonightR_instruction_mov146: + +CryptonightR_instruction_mov147: + +CryptonightR_instruction_mov148: + +CryptonightR_instruction_mov149: + mov rcx, rsp +CryptonightR_instruction_mov150: + mov rcx, rsp +CryptonightR_instruction_mov151: + +CryptonightR_instruction_mov152: + +CryptonightR_instruction_mov153: + +CryptonightR_instruction_mov154: + +CryptonightR_instruction_mov155: + +CryptonightR_instruction_mov156: + +CryptonightR_instruction_mov157: + mov rcx, rsp +CryptonightR_instruction_mov158: + mov rcx, rsp +CryptonightR_instruction_mov159: + +CryptonightR_instruction_mov160: + +CryptonightR_instruction_mov161: + +CryptonightR_instruction_mov162: + +CryptonightR_instruction_mov163: + +CryptonightR_instruction_mov164: + +CryptonightR_instruction_mov165: + mov rcx, r15 +CryptonightR_instruction_mov166: + mov rcx, r15 +CryptonightR_instruction_mov167: + +CryptonightR_instruction_mov168: + +CryptonightR_instruction_mov169: + +CryptonightR_instruction_mov170: + +CryptonightR_instruction_mov171: + +CryptonightR_instruction_mov172: + +CryptonightR_instruction_mov173: + mov rcx, r15 +CryptonightR_instruction_mov174: + mov rcx, r15 +CryptonightR_instruction_mov175: + +CryptonightR_instruction_mov176: + +CryptonightR_instruction_mov177: + +CryptonightR_instruction_mov178: + +CryptonightR_instruction_mov179: + +CryptonightR_instruction_mov180: + +CryptonightR_instruction_mov181: + mov rcx, r15 +CryptonightR_instruction_mov182: + mov rcx, r15 +CryptonightR_instruction_mov183: + +CryptonightR_instruction_mov184: + +CryptonightR_instruction_mov185: + +CryptonightR_instruction_mov186: + +CryptonightR_instruction_mov187: + +CryptonightR_instruction_mov188: + +CryptonightR_instruction_mov189: + mov rcx, r15 +CryptonightR_instruction_mov190: + mov rcx, r15 +CryptonightR_instruction_mov191: + +CryptonightR_instruction_mov192: + +CryptonightR_instruction_mov193: + +CryptonightR_instruction_mov194: + +CryptonightR_instruction_mov195: + +CryptonightR_instruction_mov196: + +CryptonightR_instruction_mov197: + mov rcx, rax +CryptonightR_instruction_mov198: + mov rcx, rax +CryptonightR_instruction_mov199: + +CryptonightR_instruction_mov200: + +CryptonightR_instruction_mov201: + +CryptonightR_instruction_mov202: + +CryptonightR_instruction_mov203: + +CryptonightR_instruction_mov204: + +CryptonightR_instruction_mov205: + mov rcx, rax +CryptonightR_instruction_mov206: + mov rcx, rax +CryptonightR_instruction_mov207: + +CryptonightR_instruction_mov208: + +CryptonightR_instruction_mov209: + +CryptonightR_instruction_mov210: + +CryptonightR_instruction_mov211: + +CryptonightR_instruction_mov212: + +CryptonightR_instruction_mov213: + mov rcx, rax +CryptonightR_instruction_mov214: + mov rcx, rax +CryptonightR_instruction_mov215: + +CryptonightR_instruction_mov216: + +CryptonightR_instruction_mov217: + +CryptonightR_instruction_mov218: + +CryptonightR_instruction_mov219: + +CryptonightR_instruction_mov220: + +CryptonightR_instruction_mov221: + mov rcx, rax +CryptonightR_instruction_mov222: + mov rcx, rax +CryptonightR_instruction_mov223: + +CryptonightR_instruction_mov224: + +CryptonightR_instruction_mov225: + +CryptonightR_instruction_mov226: + +CryptonightR_instruction_mov227: + +CryptonightR_instruction_mov228: + +CryptonightR_instruction_mov229: + mov rcx, rdx +CryptonightR_instruction_mov230: + mov rcx, rdx +CryptonightR_instruction_mov231: + +CryptonightR_instruction_mov232: + +CryptonightR_instruction_mov233: + +CryptonightR_instruction_mov234: + +CryptonightR_instruction_mov235: + +CryptonightR_instruction_mov236: + +CryptonightR_instruction_mov237: + mov rcx, rdx +CryptonightR_instruction_mov238: + mov rcx, rdx +CryptonightR_instruction_mov239: + +CryptonightR_instruction_mov240: + +CryptonightR_instruction_mov241: + +CryptonightR_instruction_mov242: + +CryptonightR_instruction_mov243: + +CryptonightR_instruction_mov244: + +CryptonightR_instruction_mov245: + mov rcx, rdx +CryptonightR_instruction_mov246: + mov rcx, rdx +CryptonightR_instruction_mov247: + +CryptonightR_instruction_mov248: + +CryptonightR_instruction_mov249: + +CryptonightR_instruction_mov250: + +CryptonightR_instruction_mov251: + +CryptonightR_instruction_mov252: + +CryptonightR_instruction_mov253: + mov rcx, rdx +CryptonightR_instruction_mov254: + mov rcx, rdx +CryptonightR_instruction_mov255: + +CryptonightR_instruction_mov256: + +_TEXT_CN_TEMPLATE ENDS +END diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h new file mode 100644 index 00000000..182c6870 --- /dev/null +++ b/src/crypto/asm/CryptonightR_template.h @@ -0,0 +1,1050 @@ +// Auto-generated file, do not edit + +extern "C" +{ + void CryptonightR_template_part1(); + void CryptonightR_template_mainloop(); + void CryptonightR_template_part2(); + void CryptonightR_template_part3(); + void CryptonightR_template_end(); + void CryptonightR_template_double_part1(); + void CryptonightR_template_double_mainloop(); + void CryptonightR_template_double_part2(); + void CryptonightR_template_double_part3(); + void CryptonightR_template_double_part4(); + void CryptonightR_template_double_end(); + void CryptonightR_instruction0(); + void CryptonightR_instruction1(); + void CryptonightR_instruction2(); + void CryptonightR_instruction3(); + void CryptonightR_instruction4(); + void CryptonightR_instruction5(); + void CryptonightR_instruction6(); + void CryptonightR_instruction7(); + void CryptonightR_instruction8(); + void CryptonightR_instruction9(); + void CryptonightR_instruction10(); + void CryptonightR_instruction11(); + void CryptonightR_instruction12(); + void CryptonightR_instruction13(); + void CryptonightR_instruction14(); + void CryptonightR_instruction15(); + void CryptonightR_instruction16(); + void CryptonightR_instruction17(); + void CryptonightR_instruction18(); + void CryptonightR_instruction19(); + void CryptonightR_instruction20(); + void CryptonightR_instruction21(); + void CryptonightR_instruction22(); + void CryptonightR_instruction23(); + void CryptonightR_instruction24(); + void CryptonightR_instruction25(); + void CryptonightR_instruction26(); + void CryptonightR_instruction27(); + void CryptonightR_instruction28(); + void CryptonightR_instruction29(); + void CryptonightR_instruction30(); + void CryptonightR_instruction31(); + void CryptonightR_instruction32(); + void CryptonightR_instruction33(); + void CryptonightR_instruction34(); + void CryptonightR_instruction35(); + void CryptonightR_instruction36(); + void CryptonightR_instruction37(); + void CryptonightR_instruction38(); + void CryptonightR_instruction39(); + void CryptonightR_instruction40(); + void CryptonightR_instruction41(); + void CryptonightR_instruction42(); + void CryptonightR_instruction43(); + void CryptonightR_instruction44(); + void CryptonightR_instruction45(); + void CryptonightR_instruction46(); + void CryptonightR_instruction47(); + void CryptonightR_instruction48(); + void CryptonightR_instruction49(); + void CryptonightR_instruction50(); + void CryptonightR_instruction51(); + void CryptonightR_instruction52(); + void CryptonightR_instruction53(); + void CryptonightR_instruction54(); + void CryptonightR_instruction55(); + void CryptonightR_instruction56(); + void CryptonightR_instruction57(); + void CryptonightR_instruction58(); + void CryptonightR_instruction59(); + void CryptonightR_instruction60(); + void CryptonightR_instruction61(); + void CryptonightR_instruction62(); + void CryptonightR_instruction63(); + void CryptonightR_instruction64(); + void CryptonightR_instruction65(); + void CryptonightR_instruction66(); + void CryptonightR_instruction67(); + void CryptonightR_instruction68(); + void CryptonightR_instruction69(); + void CryptonightR_instruction70(); + void CryptonightR_instruction71(); + void CryptonightR_instruction72(); + void CryptonightR_instruction73(); + void CryptonightR_instruction74(); + void CryptonightR_instruction75(); + void CryptonightR_instruction76(); + void CryptonightR_instruction77(); + void CryptonightR_instruction78(); + void CryptonightR_instruction79(); + void CryptonightR_instruction80(); + void CryptonightR_instruction81(); + void CryptonightR_instruction82(); + void CryptonightR_instruction83(); + void CryptonightR_instruction84(); + void CryptonightR_instruction85(); + void CryptonightR_instruction86(); + void CryptonightR_instruction87(); + void CryptonightR_instruction88(); + void CryptonightR_instruction89(); + void CryptonightR_instruction90(); + void CryptonightR_instruction91(); + void CryptonightR_instruction92(); + void CryptonightR_instruction93(); + void CryptonightR_instruction94(); + void CryptonightR_instruction95(); + void CryptonightR_instruction96(); + void CryptonightR_instruction97(); + void CryptonightR_instruction98(); + void CryptonightR_instruction99(); + void CryptonightR_instruction100(); + void CryptonightR_instruction101(); + void CryptonightR_instruction102(); + void CryptonightR_instruction103(); + void CryptonightR_instruction104(); + void CryptonightR_instruction105(); + void CryptonightR_instruction106(); + void CryptonightR_instruction107(); + void CryptonightR_instruction108(); + void CryptonightR_instruction109(); + void CryptonightR_instruction110(); + void CryptonightR_instruction111(); + void CryptonightR_instruction112(); + void CryptonightR_instruction113(); + void CryptonightR_instruction114(); + void CryptonightR_instruction115(); + void CryptonightR_instruction116(); + void CryptonightR_instruction117(); + void CryptonightR_instruction118(); + void CryptonightR_instruction119(); + void CryptonightR_instruction120(); + void CryptonightR_instruction121(); + void CryptonightR_instruction122(); + void CryptonightR_instruction123(); + void CryptonightR_instruction124(); + void CryptonightR_instruction125(); + void CryptonightR_instruction126(); + void CryptonightR_instruction127(); + void CryptonightR_instruction128(); + void CryptonightR_instruction129(); + void CryptonightR_instruction130(); + void CryptonightR_instruction131(); + void CryptonightR_instruction132(); + void CryptonightR_instruction133(); + void CryptonightR_instruction134(); + void CryptonightR_instruction135(); + void CryptonightR_instruction136(); + void CryptonightR_instruction137(); + void CryptonightR_instruction138(); + void CryptonightR_instruction139(); + void CryptonightR_instruction140(); + void CryptonightR_instruction141(); + void CryptonightR_instruction142(); + void CryptonightR_instruction143(); + void CryptonightR_instruction144(); + void CryptonightR_instruction145(); + void CryptonightR_instruction146(); + void CryptonightR_instruction147(); + void CryptonightR_instruction148(); + void CryptonightR_instruction149(); + void CryptonightR_instruction150(); + void CryptonightR_instruction151(); + void CryptonightR_instruction152(); + void CryptonightR_instruction153(); + void CryptonightR_instruction154(); + void CryptonightR_instruction155(); + void CryptonightR_instruction156(); + void CryptonightR_instruction157(); + void CryptonightR_instruction158(); + void CryptonightR_instruction159(); + void CryptonightR_instruction160(); + void CryptonightR_instruction161(); + void CryptonightR_instruction162(); + void CryptonightR_instruction163(); + void CryptonightR_instruction164(); + void CryptonightR_instruction165(); + void CryptonightR_instruction166(); + void CryptonightR_instruction167(); + void CryptonightR_instruction168(); + void CryptonightR_instruction169(); + void CryptonightR_instruction170(); + void CryptonightR_instruction171(); + void CryptonightR_instruction172(); + void CryptonightR_instruction173(); + void CryptonightR_instruction174(); + void CryptonightR_instruction175(); + void CryptonightR_instruction176(); + void CryptonightR_instruction177(); + void CryptonightR_instruction178(); + void CryptonightR_instruction179(); + void CryptonightR_instruction180(); + void CryptonightR_instruction181(); + void CryptonightR_instruction182(); + void CryptonightR_instruction183(); + void CryptonightR_instruction184(); + void CryptonightR_instruction185(); + void CryptonightR_instruction186(); + void CryptonightR_instruction187(); + void CryptonightR_instruction188(); + void CryptonightR_instruction189(); + void CryptonightR_instruction190(); + void CryptonightR_instruction191(); + void CryptonightR_instruction192(); + void CryptonightR_instruction193(); + void CryptonightR_instruction194(); + void CryptonightR_instruction195(); + void CryptonightR_instruction196(); + void CryptonightR_instruction197(); + void CryptonightR_instruction198(); + void CryptonightR_instruction199(); + void CryptonightR_instruction200(); + void CryptonightR_instruction201(); + void CryptonightR_instruction202(); + void CryptonightR_instruction203(); + void CryptonightR_instruction204(); + void CryptonightR_instruction205(); + void CryptonightR_instruction206(); + void CryptonightR_instruction207(); + void CryptonightR_instruction208(); + void CryptonightR_instruction209(); + void CryptonightR_instruction210(); + void CryptonightR_instruction211(); + void CryptonightR_instruction212(); + void CryptonightR_instruction213(); + void CryptonightR_instruction214(); + void CryptonightR_instruction215(); + void CryptonightR_instruction216(); + void CryptonightR_instruction217(); + void CryptonightR_instruction218(); + void CryptonightR_instruction219(); + void CryptonightR_instruction220(); + void CryptonightR_instruction221(); + void CryptonightR_instruction222(); + void CryptonightR_instruction223(); + void CryptonightR_instruction224(); + void CryptonightR_instruction225(); + void CryptonightR_instruction226(); + void CryptonightR_instruction227(); + void CryptonightR_instruction228(); + void CryptonightR_instruction229(); + void CryptonightR_instruction230(); + void CryptonightR_instruction231(); + void CryptonightR_instruction232(); + void CryptonightR_instruction233(); + void CryptonightR_instruction234(); + void CryptonightR_instruction235(); + void CryptonightR_instruction236(); + void CryptonightR_instruction237(); + void CryptonightR_instruction238(); + void CryptonightR_instruction239(); + void CryptonightR_instruction240(); + void CryptonightR_instruction241(); + void CryptonightR_instruction242(); + void CryptonightR_instruction243(); + void CryptonightR_instruction244(); + void CryptonightR_instruction245(); + void CryptonightR_instruction246(); + void CryptonightR_instruction247(); + void CryptonightR_instruction248(); + void CryptonightR_instruction249(); + void CryptonightR_instruction250(); + void CryptonightR_instruction251(); + void CryptonightR_instruction252(); + void CryptonightR_instruction253(); + void CryptonightR_instruction254(); + void CryptonightR_instruction255(); + void CryptonightR_instruction256(); + void CryptonightR_instruction_mov0(); + void CryptonightR_instruction_mov1(); + void CryptonightR_instruction_mov2(); + void CryptonightR_instruction_mov3(); + void CryptonightR_instruction_mov4(); + void CryptonightR_instruction_mov5(); + void CryptonightR_instruction_mov6(); + void CryptonightR_instruction_mov7(); + void CryptonightR_instruction_mov8(); + void CryptonightR_instruction_mov9(); + void CryptonightR_instruction_mov10(); + void CryptonightR_instruction_mov11(); + void CryptonightR_instruction_mov12(); + void CryptonightR_instruction_mov13(); + void CryptonightR_instruction_mov14(); + void CryptonightR_instruction_mov15(); + void CryptonightR_instruction_mov16(); + void CryptonightR_instruction_mov17(); + void CryptonightR_instruction_mov18(); + void CryptonightR_instruction_mov19(); + void CryptonightR_instruction_mov20(); + void CryptonightR_instruction_mov21(); + void CryptonightR_instruction_mov22(); + void CryptonightR_instruction_mov23(); + void CryptonightR_instruction_mov24(); + void CryptonightR_instruction_mov25(); + void CryptonightR_instruction_mov26(); + void CryptonightR_instruction_mov27(); + void CryptonightR_instruction_mov28(); + void CryptonightR_instruction_mov29(); + void CryptonightR_instruction_mov30(); + void CryptonightR_instruction_mov31(); + void CryptonightR_instruction_mov32(); + void CryptonightR_instruction_mov33(); + void CryptonightR_instruction_mov34(); + void CryptonightR_instruction_mov35(); + void CryptonightR_instruction_mov36(); + void CryptonightR_instruction_mov37(); + void CryptonightR_instruction_mov38(); + void CryptonightR_instruction_mov39(); + void CryptonightR_instruction_mov40(); + void CryptonightR_instruction_mov41(); + void CryptonightR_instruction_mov42(); + void CryptonightR_instruction_mov43(); + void CryptonightR_instruction_mov44(); + void CryptonightR_instruction_mov45(); + void CryptonightR_instruction_mov46(); + void CryptonightR_instruction_mov47(); + void CryptonightR_instruction_mov48(); + void CryptonightR_instruction_mov49(); + void CryptonightR_instruction_mov50(); + void CryptonightR_instruction_mov51(); + void CryptonightR_instruction_mov52(); + void CryptonightR_instruction_mov53(); + void CryptonightR_instruction_mov54(); + void CryptonightR_instruction_mov55(); + void CryptonightR_instruction_mov56(); + void CryptonightR_instruction_mov57(); + void CryptonightR_instruction_mov58(); + void CryptonightR_instruction_mov59(); + void CryptonightR_instruction_mov60(); + void CryptonightR_instruction_mov61(); + void CryptonightR_instruction_mov62(); + void CryptonightR_instruction_mov63(); + void CryptonightR_instruction_mov64(); + void CryptonightR_instruction_mov65(); + void CryptonightR_instruction_mov66(); + void CryptonightR_instruction_mov67(); + void CryptonightR_instruction_mov68(); + void CryptonightR_instruction_mov69(); + void CryptonightR_instruction_mov70(); + void CryptonightR_instruction_mov71(); + void CryptonightR_instruction_mov72(); + void CryptonightR_instruction_mov73(); + void CryptonightR_instruction_mov74(); + void CryptonightR_instruction_mov75(); + void CryptonightR_instruction_mov76(); + void CryptonightR_instruction_mov77(); + void CryptonightR_instruction_mov78(); + void CryptonightR_instruction_mov79(); + void CryptonightR_instruction_mov80(); + void CryptonightR_instruction_mov81(); + void CryptonightR_instruction_mov82(); + void CryptonightR_instruction_mov83(); + void CryptonightR_instruction_mov84(); + void CryptonightR_instruction_mov85(); + void CryptonightR_instruction_mov86(); + void CryptonightR_instruction_mov87(); + void CryptonightR_instruction_mov88(); + void CryptonightR_instruction_mov89(); + void CryptonightR_instruction_mov90(); + void CryptonightR_instruction_mov91(); + void CryptonightR_instruction_mov92(); + void CryptonightR_instruction_mov93(); + void CryptonightR_instruction_mov94(); + void CryptonightR_instruction_mov95(); + void CryptonightR_instruction_mov96(); + void CryptonightR_instruction_mov97(); + void CryptonightR_instruction_mov98(); + void CryptonightR_instruction_mov99(); + void CryptonightR_instruction_mov100(); + void CryptonightR_instruction_mov101(); + void CryptonightR_instruction_mov102(); + void CryptonightR_instruction_mov103(); + void CryptonightR_instruction_mov104(); + void CryptonightR_instruction_mov105(); + void CryptonightR_instruction_mov106(); + void CryptonightR_instruction_mov107(); + void CryptonightR_instruction_mov108(); + void CryptonightR_instruction_mov109(); + void CryptonightR_instruction_mov110(); + void CryptonightR_instruction_mov111(); + void CryptonightR_instruction_mov112(); + void CryptonightR_instruction_mov113(); + void CryptonightR_instruction_mov114(); + void CryptonightR_instruction_mov115(); + void CryptonightR_instruction_mov116(); + void CryptonightR_instruction_mov117(); + void CryptonightR_instruction_mov118(); + void CryptonightR_instruction_mov119(); + void CryptonightR_instruction_mov120(); + void CryptonightR_instruction_mov121(); + void CryptonightR_instruction_mov122(); + void CryptonightR_instruction_mov123(); + void CryptonightR_instruction_mov124(); + void CryptonightR_instruction_mov125(); + void CryptonightR_instruction_mov126(); + void CryptonightR_instruction_mov127(); + void CryptonightR_instruction_mov128(); + void CryptonightR_instruction_mov129(); + void CryptonightR_instruction_mov130(); + void CryptonightR_instruction_mov131(); + void CryptonightR_instruction_mov132(); + void CryptonightR_instruction_mov133(); + void CryptonightR_instruction_mov134(); + void CryptonightR_instruction_mov135(); + void CryptonightR_instruction_mov136(); + void CryptonightR_instruction_mov137(); + void CryptonightR_instruction_mov138(); + void CryptonightR_instruction_mov139(); + void CryptonightR_instruction_mov140(); + void CryptonightR_instruction_mov141(); + void CryptonightR_instruction_mov142(); + void CryptonightR_instruction_mov143(); + void CryptonightR_instruction_mov144(); + void CryptonightR_instruction_mov145(); + void CryptonightR_instruction_mov146(); + void CryptonightR_instruction_mov147(); + void CryptonightR_instruction_mov148(); + void CryptonightR_instruction_mov149(); + void CryptonightR_instruction_mov150(); + void CryptonightR_instruction_mov151(); + void CryptonightR_instruction_mov152(); + void CryptonightR_instruction_mov153(); + void CryptonightR_instruction_mov154(); + void CryptonightR_instruction_mov155(); + void CryptonightR_instruction_mov156(); + void CryptonightR_instruction_mov157(); + void CryptonightR_instruction_mov158(); + void CryptonightR_instruction_mov159(); + void CryptonightR_instruction_mov160(); + void CryptonightR_instruction_mov161(); + void CryptonightR_instruction_mov162(); + void CryptonightR_instruction_mov163(); + void CryptonightR_instruction_mov164(); + void CryptonightR_instruction_mov165(); + void CryptonightR_instruction_mov166(); + void CryptonightR_instruction_mov167(); + void CryptonightR_instruction_mov168(); + void CryptonightR_instruction_mov169(); + void CryptonightR_instruction_mov170(); + void CryptonightR_instruction_mov171(); + void CryptonightR_instruction_mov172(); + void CryptonightR_instruction_mov173(); + void CryptonightR_instruction_mov174(); + void CryptonightR_instruction_mov175(); + void CryptonightR_instruction_mov176(); + void CryptonightR_instruction_mov177(); + void CryptonightR_instruction_mov178(); + void CryptonightR_instruction_mov179(); + void CryptonightR_instruction_mov180(); + void CryptonightR_instruction_mov181(); + void CryptonightR_instruction_mov182(); + void CryptonightR_instruction_mov183(); + void CryptonightR_instruction_mov184(); + void CryptonightR_instruction_mov185(); + void CryptonightR_instruction_mov186(); + void CryptonightR_instruction_mov187(); + void CryptonightR_instruction_mov188(); + void CryptonightR_instruction_mov189(); + void CryptonightR_instruction_mov190(); + void CryptonightR_instruction_mov191(); + void CryptonightR_instruction_mov192(); + void CryptonightR_instruction_mov193(); + void CryptonightR_instruction_mov194(); + void CryptonightR_instruction_mov195(); + void CryptonightR_instruction_mov196(); + void CryptonightR_instruction_mov197(); + void CryptonightR_instruction_mov198(); + void CryptonightR_instruction_mov199(); + void CryptonightR_instruction_mov200(); + void CryptonightR_instruction_mov201(); + void CryptonightR_instruction_mov202(); + void CryptonightR_instruction_mov203(); + void CryptonightR_instruction_mov204(); + void CryptonightR_instruction_mov205(); + void CryptonightR_instruction_mov206(); + void CryptonightR_instruction_mov207(); + void CryptonightR_instruction_mov208(); + void CryptonightR_instruction_mov209(); + void CryptonightR_instruction_mov210(); + void CryptonightR_instruction_mov211(); + void CryptonightR_instruction_mov212(); + void CryptonightR_instruction_mov213(); + void CryptonightR_instruction_mov214(); + void CryptonightR_instruction_mov215(); + void CryptonightR_instruction_mov216(); + void CryptonightR_instruction_mov217(); + void CryptonightR_instruction_mov218(); + void CryptonightR_instruction_mov219(); + void CryptonightR_instruction_mov220(); + void CryptonightR_instruction_mov221(); + void CryptonightR_instruction_mov222(); + void CryptonightR_instruction_mov223(); + void CryptonightR_instruction_mov224(); + void CryptonightR_instruction_mov225(); + void CryptonightR_instruction_mov226(); + void CryptonightR_instruction_mov227(); + void CryptonightR_instruction_mov228(); + void CryptonightR_instruction_mov229(); + void CryptonightR_instruction_mov230(); + void CryptonightR_instruction_mov231(); + void CryptonightR_instruction_mov232(); + void CryptonightR_instruction_mov233(); + void CryptonightR_instruction_mov234(); + void CryptonightR_instruction_mov235(); + void CryptonightR_instruction_mov236(); + void CryptonightR_instruction_mov237(); + void CryptonightR_instruction_mov238(); + void CryptonightR_instruction_mov239(); + void CryptonightR_instruction_mov240(); + void CryptonightR_instruction_mov241(); + void CryptonightR_instruction_mov242(); + void CryptonightR_instruction_mov243(); + void CryptonightR_instruction_mov244(); + void CryptonightR_instruction_mov245(); + void CryptonightR_instruction_mov246(); + void CryptonightR_instruction_mov247(); + void CryptonightR_instruction_mov248(); + void CryptonightR_instruction_mov249(); + void CryptonightR_instruction_mov250(); + void CryptonightR_instruction_mov251(); + void CryptonightR_instruction_mov252(); + void CryptonightR_instruction_mov253(); + void CryptonightR_instruction_mov254(); + void CryptonightR_instruction_mov255(); + void CryptonightR_instruction_mov256(); +} + +const void_func instructions[257] = { + CryptonightR_instruction0, + CryptonightR_instruction1, + CryptonightR_instruction2, + CryptonightR_instruction3, + CryptonightR_instruction4, + CryptonightR_instruction5, + CryptonightR_instruction6, + CryptonightR_instruction7, + CryptonightR_instruction8, + CryptonightR_instruction9, + CryptonightR_instruction10, + CryptonightR_instruction11, + CryptonightR_instruction12, + CryptonightR_instruction13, + CryptonightR_instruction14, + CryptonightR_instruction15, + CryptonightR_instruction16, + CryptonightR_instruction17, + CryptonightR_instruction18, + CryptonightR_instruction19, + CryptonightR_instruction20, + CryptonightR_instruction21, + CryptonightR_instruction22, + CryptonightR_instruction23, + CryptonightR_instruction24, + CryptonightR_instruction25, + CryptonightR_instruction26, + CryptonightR_instruction27, + CryptonightR_instruction28, + CryptonightR_instruction29, + CryptonightR_instruction30, + CryptonightR_instruction31, + CryptonightR_instruction32, + CryptonightR_instruction33, + CryptonightR_instruction34, + CryptonightR_instruction35, + CryptonightR_instruction36, + CryptonightR_instruction37, + CryptonightR_instruction38, + CryptonightR_instruction39, + CryptonightR_instruction40, + CryptonightR_instruction41, + CryptonightR_instruction42, + CryptonightR_instruction43, + CryptonightR_instruction44, + CryptonightR_instruction45, + CryptonightR_instruction46, + CryptonightR_instruction47, + CryptonightR_instruction48, + CryptonightR_instruction49, + CryptonightR_instruction50, + CryptonightR_instruction51, + CryptonightR_instruction52, + CryptonightR_instruction53, + CryptonightR_instruction54, + CryptonightR_instruction55, + CryptonightR_instruction56, + CryptonightR_instruction57, + CryptonightR_instruction58, + CryptonightR_instruction59, + CryptonightR_instruction60, + CryptonightR_instruction61, + CryptonightR_instruction62, + CryptonightR_instruction63, + CryptonightR_instruction64, + CryptonightR_instruction65, + CryptonightR_instruction66, + CryptonightR_instruction67, + CryptonightR_instruction68, + CryptonightR_instruction69, + CryptonightR_instruction70, + CryptonightR_instruction71, + CryptonightR_instruction72, + CryptonightR_instruction73, + CryptonightR_instruction74, + CryptonightR_instruction75, + CryptonightR_instruction76, + CryptonightR_instruction77, + CryptonightR_instruction78, + CryptonightR_instruction79, + CryptonightR_instruction80, + CryptonightR_instruction81, + CryptonightR_instruction82, + CryptonightR_instruction83, + CryptonightR_instruction84, + CryptonightR_instruction85, + CryptonightR_instruction86, + CryptonightR_instruction87, + CryptonightR_instruction88, + CryptonightR_instruction89, + CryptonightR_instruction90, + CryptonightR_instruction91, + CryptonightR_instruction92, + CryptonightR_instruction93, + CryptonightR_instruction94, + CryptonightR_instruction95, + CryptonightR_instruction96, + CryptonightR_instruction97, + CryptonightR_instruction98, + CryptonightR_instruction99, + CryptonightR_instruction100, + CryptonightR_instruction101, + CryptonightR_instruction102, + CryptonightR_instruction103, + CryptonightR_instruction104, + CryptonightR_instruction105, + CryptonightR_instruction106, + CryptonightR_instruction107, + CryptonightR_instruction108, + CryptonightR_instruction109, + CryptonightR_instruction110, + CryptonightR_instruction111, + CryptonightR_instruction112, + CryptonightR_instruction113, + CryptonightR_instruction114, + CryptonightR_instruction115, + CryptonightR_instruction116, + CryptonightR_instruction117, + CryptonightR_instruction118, + CryptonightR_instruction119, + CryptonightR_instruction120, + CryptonightR_instruction121, + CryptonightR_instruction122, + CryptonightR_instruction123, + CryptonightR_instruction124, + CryptonightR_instruction125, + CryptonightR_instruction126, + CryptonightR_instruction127, + CryptonightR_instruction128, + CryptonightR_instruction129, + CryptonightR_instruction130, + CryptonightR_instruction131, + CryptonightR_instruction132, + CryptonightR_instruction133, + CryptonightR_instruction134, + CryptonightR_instruction135, + CryptonightR_instruction136, + CryptonightR_instruction137, + CryptonightR_instruction138, + CryptonightR_instruction139, + CryptonightR_instruction140, + CryptonightR_instruction141, + CryptonightR_instruction142, + CryptonightR_instruction143, + CryptonightR_instruction144, + CryptonightR_instruction145, + CryptonightR_instruction146, + CryptonightR_instruction147, + CryptonightR_instruction148, + CryptonightR_instruction149, + CryptonightR_instruction150, + CryptonightR_instruction151, + CryptonightR_instruction152, + CryptonightR_instruction153, + CryptonightR_instruction154, + CryptonightR_instruction155, + CryptonightR_instruction156, + CryptonightR_instruction157, + CryptonightR_instruction158, + CryptonightR_instruction159, + CryptonightR_instruction160, + CryptonightR_instruction161, + CryptonightR_instruction162, + CryptonightR_instruction163, + CryptonightR_instruction164, + CryptonightR_instruction165, + CryptonightR_instruction166, + CryptonightR_instruction167, + CryptonightR_instruction168, + CryptonightR_instruction169, + CryptonightR_instruction170, + CryptonightR_instruction171, + CryptonightR_instruction172, + CryptonightR_instruction173, + CryptonightR_instruction174, + CryptonightR_instruction175, + CryptonightR_instruction176, + CryptonightR_instruction177, + CryptonightR_instruction178, + CryptonightR_instruction179, + CryptonightR_instruction180, + CryptonightR_instruction181, + CryptonightR_instruction182, + CryptonightR_instruction183, + CryptonightR_instruction184, + CryptonightR_instruction185, + CryptonightR_instruction186, + CryptonightR_instruction187, + CryptonightR_instruction188, + CryptonightR_instruction189, + CryptonightR_instruction190, + CryptonightR_instruction191, + CryptonightR_instruction192, + CryptonightR_instruction193, + CryptonightR_instruction194, + CryptonightR_instruction195, + CryptonightR_instruction196, + CryptonightR_instruction197, + CryptonightR_instruction198, + CryptonightR_instruction199, + CryptonightR_instruction200, + CryptonightR_instruction201, + CryptonightR_instruction202, + CryptonightR_instruction203, + CryptonightR_instruction204, + CryptonightR_instruction205, + CryptonightR_instruction206, + CryptonightR_instruction207, + CryptonightR_instruction208, + CryptonightR_instruction209, + CryptonightR_instruction210, + CryptonightR_instruction211, + CryptonightR_instruction212, + CryptonightR_instruction213, + CryptonightR_instruction214, + CryptonightR_instruction215, + CryptonightR_instruction216, + CryptonightR_instruction217, + CryptonightR_instruction218, + CryptonightR_instruction219, + CryptonightR_instruction220, + CryptonightR_instruction221, + CryptonightR_instruction222, + CryptonightR_instruction223, + CryptonightR_instruction224, + CryptonightR_instruction225, + CryptonightR_instruction226, + CryptonightR_instruction227, + CryptonightR_instruction228, + CryptonightR_instruction229, + CryptonightR_instruction230, + CryptonightR_instruction231, + CryptonightR_instruction232, + CryptonightR_instruction233, + CryptonightR_instruction234, + CryptonightR_instruction235, + CryptonightR_instruction236, + CryptonightR_instruction237, + CryptonightR_instruction238, + CryptonightR_instruction239, + CryptonightR_instruction240, + CryptonightR_instruction241, + CryptonightR_instruction242, + CryptonightR_instruction243, + CryptonightR_instruction244, + CryptonightR_instruction245, + CryptonightR_instruction246, + CryptonightR_instruction247, + CryptonightR_instruction248, + CryptonightR_instruction249, + CryptonightR_instruction250, + CryptonightR_instruction251, + CryptonightR_instruction252, + CryptonightR_instruction253, + CryptonightR_instruction254, + CryptonightR_instruction255, + CryptonightR_instruction256, +}; + +const void_func instructions_mov[257] = { + CryptonightR_instruction_mov0, + CryptonightR_instruction_mov1, + CryptonightR_instruction_mov2, + CryptonightR_instruction_mov3, + CryptonightR_instruction_mov4, + CryptonightR_instruction_mov5, + CryptonightR_instruction_mov6, + CryptonightR_instruction_mov7, + CryptonightR_instruction_mov8, + CryptonightR_instruction_mov9, + CryptonightR_instruction_mov10, + CryptonightR_instruction_mov11, + CryptonightR_instruction_mov12, + CryptonightR_instruction_mov13, + CryptonightR_instruction_mov14, + CryptonightR_instruction_mov15, + CryptonightR_instruction_mov16, + CryptonightR_instruction_mov17, + CryptonightR_instruction_mov18, + CryptonightR_instruction_mov19, + CryptonightR_instruction_mov20, + CryptonightR_instruction_mov21, + CryptonightR_instruction_mov22, + CryptonightR_instruction_mov23, + CryptonightR_instruction_mov24, + CryptonightR_instruction_mov25, + CryptonightR_instruction_mov26, + CryptonightR_instruction_mov27, + CryptonightR_instruction_mov28, + CryptonightR_instruction_mov29, + CryptonightR_instruction_mov30, + CryptonightR_instruction_mov31, + CryptonightR_instruction_mov32, + CryptonightR_instruction_mov33, + CryptonightR_instruction_mov34, + CryptonightR_instruction_mov35, + CryptonightR_instruction_mov36, + CryptonightR_instruction_mov37, + CryptonightR_instruction_mov38, + CryptonightR_instruction_mov39, + CryptonightR_instruction_mov40, + CryptonightR_instruction_mov41, + CryptonightR_instruction_mov42, + CryptonightR_instruction_mov43, + CryptonightR_instruction_mov44, + CryptonightR_instruction_mov45, + CryptonightR_instruction_mov46, + CryptonightR_instruction_mov47, + CryptonightR_instruction_mov48, + CryptonightR_instruction_mov49, + CryptonightR_instruction_mov50, + CryptonightR_instruction_mov51, + CryptonightR_instruction_mov52, + CryptonightR_instruction_mov53, + CryptonightR_instruction_mov54, + CryptonightR_instruction_mov55, + CryptonightR_instruction_mov56, + CryptonightR_instruction_mov57, + CryptonightR_instruction_mov58, + CryptonightR_instruction_mov59, + CryptonightR_instruction_mov60, + CryptonightR_instruction_mov61, + CryptonightR_instruction_mov62, + CryptonightR_instruction_mov63, + CryptonightR_instruction_mov64, + CryptonightR_instruction_mov65, + CryptonightR_instruction_mov66, + CryptonightR_instruction_mov67, + CryptonightR_instruction_mov68, + CryptonightR_instruction_mov69, + CryptonightR_instruction_mov70, + CryptonightR_instruction_mov71, + CryptonightR_instruction_mov72, + CryptonightR_instruction_mov73, + CryptonightR_instruction_mov74, + CryptonightR_instruction_mov75, + CryptonightR_instruction_mov76, + CryptonightR_instruction_mov77, + CryptonightR_instruction_mov78, + CryptonightR_instruction_mov79, + CryptonightR_instruction_mov80, + CryptonightR_instruction_mov81, + CryptonightR_instruction_mov82, + CryptonightR_instruction_mov83, + CryptonightR_instruction_mov84, + CryptonightR_instruction_mov85, + CryptonightR_instruction_mov86, + CryptonightR_instruction_mov87, + CryptonightR_instruction_mov88, + CryptonightR_instruction_mov89, + CryptonightR_instruction_mov90, + CryptonightR_instruction_mov91, + CryptonightR_instruction_mov92, + CryptonightR_instruction_mov93, + CryptonightR_instruction_mov94, + CryptonightR_instruction_mov95, + CryptonightR_instruction_mov96, + CryptonightR_instruction_mov97, + CryptonightR_instruction_mov98, + CryptonightR_instruction_mov99, + CryptonightR_instruction_mov100, + CryptonightR_instruction_mov101, + CryptonightR_instruction_mov102, + CryptonightR_instruction_mov103, + CryptonightR_instruction_mov104, + CryptonightR_instruction_mov105, + CryptonightR_instruction_mov106, + CryptonightR_instruction_mov107, + CryptonightR_instruction_mov108, + CryptonightR_instruction_mov109, + CryptonightR_instruction_mov110, + CryptonightR_instruction_mov111, + CryptonightR_instruction_mov112, + CryptonightR_instruction_mov113, + CryptonightR_instruction_mov114, + CryptonightR_instruction_mov115, + CryptonightR_instruction_mov116, + CryptonightR_instruction_mov117, + CryptonightR_instruction_mov118, + CryptonightR_instruction_mov119, + CryptonightR_instruction_mov120, + CryptonightR_instruction_mov121, + CryptonightR_instruction_mov122, + CryptonightR_instruction_mov123, + CryptonightR_instruction_mov124, + CryptonightR_instruction_mov125, + CryptonightR_instruction_mov126, + CryptonightR_instruction_mov127, + CryptonightR_instruction_mov128, + CryptonightR_instruction_mov129, + CryptonightR_instruction_mov130, + CryptonightR_instruction_mov131, + CryptonightR_instruction_mov132, + CryptonightR_instruction_mov133, + CryptonightR_instruction_mov134, + CryptonightR_instruction_mov135, + CryptonightR_instruction_mov136, + CryptonightR_instruction_mov137, + CryptonightR_instruction_mov138, + CryptonightR_instruction_mov139, + CryptonightR_instruction_mov140, + CryptonightR_instruction_mov141, + CryptonightR_instruction_mov142, + CryptonightR_instruction_mov143, + CryptonightR_instruction_mov144, + CryptonightR_instruction_mov145, + CryptonightR_instruction_mov146, + CryptonightR_instruction_mov147, + CryptonightR_instruction_mov148, + CryptonightR_instruction_mov149, + CryptonightR_instruction_mov150, + CryptonightR_instruction_mov151, + CryptonightR_instruction_mov152, + CryptonightR_instruction_mov153, + CryptonightR_instruction_mov154, + CryptonightR_instruction_mov155, + CryptonightR_instruction_mov156, + CryptonightR_instruction_mov157, + CryptonightR_instruction_mov158, + CryptonightR_instruction_mov159, + CryptonightR_instruction_mov160, + CryptonightR_instruction_mov161, + CryptonightR_instruction_mov162, + CryptonightR_instruction_mov163, + CryptonightR_instruction_mov164, + CryptonightR_instruction_mov165, + CryptonightR_instruction_mov166, + CryptonightR_instruction_mov167, + CryptonightR_instruction_mov168, + CryptonightR_instruction_mov169, + CryptonightR_instruction_mov170, + CryptonightR_instruction_mov171, + CryptonightR_instruction_mov172, + CryptonightR_instruction_mov173, + CryptonightR_instruction_mov174, + CryptonightR_instruction_mov175, + CryptonightR_instruction_mov176, + CryptonightR_instruction_mov177, + CryptonightR_instruction_mov178, + CryptonightR_instruction_mov179, + CryptonightR_instruction_mov180, + CryptonightR_instruction_mov181, + CryptonightR_instruction_mov182, + CryptonightR_instruction_mov183, + CryptonightR_instruction_mov184, + CryptonightR_instruction_mov185, + CryptonightR_instruction_mov186, + CryptonightR_instruction_mov187, + CryptonightR_instruction_mov188, + CryptonightR_instruction_mov189, + CryptonightR_instruction_mov190, + CryptonightR_instruction_mov191, + CryptonightR_instruction_mov192, + CryptonightR_instruction_mov193, + CryptonightR_instruction_mov194, + CryptonightR_instruction_mov195, + CryptonightR_instruction_mov196, + CryptonightR_instruction_mov197, + CryptonightR_instruction_mov198, + CryptonightR_instruction_mov199, + CryptonightR_instruction_mov200, + CryptonightR_instruction_mov201, + CryptonightR_instruction_mov202, + CryptonightR_instruction_mov203, + CryptonightR_instruction_mov204, + CryptonightR_instruction_mov205, + CryptonightR_instruction_mov206, + CryptonightR_instruction_mov207, + CryptonightR_instruction_mov208, + CryptonightR_instruction_mov209, + CryptonightR_instruction_mov210, + CryptonightR_instruction_mov211, + CryptonightR_instruction_mov212, + CryptonightR_instruction_mov213, + CryptonightR_instruction_mov214, + CryptonightR_instruction_mov215, + CryptonightR_instruction_mov216, + CryptonightR_instruction_mov217, + CryptonightR_instruction_mov218, + CryptonightR_instruction_mov219, + CryptonightR_instruction_mov220, + CryptonightR_instruction_mov221, + CryptonightR_instruction_mov222, + CryptonightR_instruction_mov223, + CryptonightR_instruction_mov224, + CryptonightR_instruction_mov225, + CryptonightR_instruction_mov226, + CryptonightR_instruction_mov227, + CryptonightR_instruction_mov228, + CryptonightR_instruction_mov229, + CryptonightR_instruction_mov230, + CryptonightR_instruction_mov231, + CryptonightR_instruction_mov232, + CryptonightR_instruction_mov233, + CryptonightR_instruction_mov234, + CryptonightR_instruction_mov235, + CryptonightR_instruction_mov236, + CryptonightR_instruction_mov237, + CryptonightR_instruction_mov238, + CryptonightR_instruction_mov239, + CryptonightR_instruction_mov240, + CryptonightR_instruction_mov241, + CryptonightR_instruction_mov242, + CryptonightR_instruction_mov243, + CryptonightR_instruction_mov244, + CryptonightR_instruction_mov245, + CryptonightR_instruction_mov246, + CryptonightR_instruction_mov247, + CryptonightR_instruction_mov248, + CryptonightR_instruction_mov249, + CryptonightR_instruction_mov250, + CryptonightR_instruction_mov251, + CryptonightR_instruction_mov252, + CryptonightR_instruction_mov253, + CryptonightR_instruction_mov254, + CryptonightR_instruction_mov255, + CryptonightR_instruction_mov256, +}; diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc new file mode 100644 index 00000000..468eb87b --- /dev/null +++ b/src/crypto/asm/CryptonightR_template.inc @@ -0,0 +1,478 @@ +PUBLIC FN_PREFIX(CryptonightR_template_part1) +PUBLIC FN_PREFIX(CryptonightR_template_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_part2) +PUBLIC FN_PREFIX(CryptonightR_template_part3) +PUBLIC FN_PREFIX(CryptonightR_template_end) +PUBLIC FN_PREFIX(CryptonightR_template_double_part1) +PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_double_part2) +PUBLIC FN_PREFIX(CryptonightR_template_double_part3) +PUBLIC FN_PREFIX(CryptonightR_template_double_part4) +PUBLIC FN_PREFIX(CryptonightR_template_double_end) + +FN_PREFIX(CryptonightR_template_part1): + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movq xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movq xmm0, r12 + movq xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movq xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_mainloop): + movdqa xmm5, XMMWORD PTR [r9+r11] + movq xmm0, r15 + movq xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movq r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + +FN_PREFIX(CryptonightR_template_part2): + mov rax, r13 + mul r12 + movq xmm0, rax + movq xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz FN_PREFIX(CryptonightR_template_mainloop) + +FN_PREFIX(CryptonightR_template_part3): + movq rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +FN_PREFIX(CryptonightR_template_end): + +ALIGN(64) +FN_PREFIX(CryptonightR_template_double_part1): + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movq xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movq xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movq xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movq xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movq xmm0, rcx + mov r11d, 524288 + movq xmm10, rax + punpcklqdq xmm10, xmm0 + + movq xmm14, QWORD PTR [rsp+128] + movq xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_double_mainloop): + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movq xmm0, r12 + mov ecx, ebx + movq xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movq rdx, xmm6 + movq xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movq xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movq rdi, xmm5 + movq rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movq xmm0, rsp + movq xmm1, rsi + movq xmm2, rdi + movq xmm11, rbp + movq xmm12, r15 + movq xmm13, rdx + mov [rsp+112], rcx + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + +FN_PREFIX(CryptonightR_template_double_part2): + + movq rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movq rsi, xmm1 + movq rdi, xmm2 + movq rbp, xmm11 + movq r15, xmm12 + movq rdx, xmm13 + mov rcx, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movq xmm1, rdx + movq xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movq rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movq xmm0, rsp + movq xmm1, rbx + movq xmm2, rsi + movq xmm11, rdi + movq xmm12, rbp + movq xmm13, r15 + mov [rsp+104], rcx + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movq xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + +FN_PREFIX(CryptonightR_template_double_part3): + + movq rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movq rbx, xmm1 + movq rsi, xmm2 + movq rdi, xmm11 + movq rbp, xmm12 + movq r15, xmm13 + mov rcx, [rsp+104] + + mov rax, r8 + mul rdi + movq xmm1, rdx + movq xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movq rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz FN_PREFIX(CryptonightR_template_double_mainloop) + +FN_PREFIX(CryptonightR_template_double_part4): + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +FN_PREFIX(CryptonightR_template_double_end): diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc new file mode 100644 index 00000000..efa1f3f5 --- /dev/null +++ b/src/crypto/asm/CryptonightR_template_win.inc @@ -0,0 +1,478 @@ +PUBLIC CryptonightR_template_part1 +PUBLIC CryptonightR_template_mainloop +PUBLIC CryptonightR_template_part2 +PUBLIC CryptonightR_template_part3 +PUBLIC CryptonightR_template_end +PUBLIC CryptonightR_template_double_part1 +PUBLIC CryptonightR_template_double_mainloop +PUBLIC CryptonightR_template_double_part2 +PUBLIC CryptonightR_template_double_part3 +PUBLIC CryptonightR_template_double_part4 +PUBLIC CryptonightR_template_double_end + +CryptonightR_template_part1: + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movq xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movq xmm0, r12 + movq xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movq xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +CryptonightR_template_mainloop: + movdqa xmm5, XMMWORD PTR [r9+r11] + movq xmm0, r15 + movq xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movq r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + +CryptonightR_template_part2: + mov rax, r13 + mul r12 + movq xmm0, rax + movq xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz CryptonightR_template_mainloop + +CryptonightR_template_part3: + movq rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +CryptonightR_template_end: + +ALIGN(64) +CryptonightR_template_double_part1: + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movq xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movq xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movq xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movq xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movq xmm0, rcx + mov r11d, 524288 + movq xmm10, rax + punpcklqdq xmm10, xmm0 + + movq xmm14, QWORD PTR [rsp+128] + movq xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +CryptonightR_template_double_mainloop: + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movq xmm0, r12 + mov ecx, ebx + movq xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movq rdx, xmm6 + movq xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movq xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movq rdi, xmm5 + movq rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movq xmm0, rsp + movq xmm1, rsi + movq xmm2, rdi + movq xmm11, rbp + movq xmm12, r15 + movq xmm13, rdx + mov [rsp+112], rcx + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + +CryptonightR_template_double_part2: + + movq rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movq rsi, xmm1 + movq rdi, xmm2 + movq rbp, xmm11 + movq r15, xmm12 + movq rdx, xmm13 + mov rcx, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movq xmm1, rdx + movq xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movq rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movq xmm0, rsp + movq xmm1, rbx + movq xmm2, rsi + movq xmm11, rdi + movq xmm12, rbp + movq xmm13, r15 + mov [rsp+104], rcx + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movq xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + +CryptonightR_template_double_part3: + + movq rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movq rbx, xmm1 + movq rsi, xmm2 + movq rdi, xmm11 + movq rbp, xmm12 + movq r15, xmm13 + mov rcx, [rsp+104] + + mov rax, r8 + mul rdi + movq xmm1, rdx + movq xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movq rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz CryptonightR_template_double_mainloop + +CryptonightR_template_double_part4: + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +CryptonightR_template_double_end: diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc new file mode 100644 index 00000000..aa5101a8 --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -0,0 +1,410 @@ + mov rax, rsp + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 184 + + stmxcsr DWORD PTR [rsp+272] + mov DWORD PTR [rsp+276], 24448 + ldmxcsr DWORD PTR [rsp+276] + + mov r13, QWORD PTR [rcx+224] + mov r9, rdx + mov r10, QWORD PTR [rcx+32] + mov r8, rcx + xor r10, QWORD PTR [rcx] + mov r14d, 524288 + mov r11, QWORD PTR [rcx+40] + xor r11, QWORD PTR [rcx+8] + mov rsi, QWORD PTR [rdx+224] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov rdi, QWORD PTR [r9+32] + xor rdi, QWORD PTR [r9] + mov rbp, QWORD PTR [r9+40] + xor rbp, QWORD PTR [r9+8] + movq xmm0, rdx + movaps XMMWORD PTR [rax-88], xmm6 + movaps XMMWORD PTR [rax-104], xmm7 + movaps XMMWORD PTR [rax-120], xmm8 + movaps XMMWORD PTR [rsp+112], xmm9 + movaps XMMWORD PTR [rsp+96], xmm10 + movaps XMMWORD PTR [rsp+80], xmm11 + movaps XMMWORD PTR [rsp+64], xmm12 + movaps XMMWORD PTR [rsp+48], xmm13 + movaps XMMWORD PTR [rsp+32], xmm14 + movaps XMMWORD PTR [rsp+16], xmm15 + mov rdx, r10 + movq xmm4, QWORD PTR [r8+96] + and edx, 2097136 + mov rax, QWORD PTR [rcx+48] + xorps xmm13, xmm13 + xor rax, QWORD PTR [rcx+16] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r8+72] + movq xmm5, QWORD PTR [r8+104] + movq xmm7, rax + + mov eax, 1 + shl rax, 52 + movq xmm14, rax + punpcklqdq xmm14, xmm14 + + mov eax, 1023 + shl rax, 52 + movq xmm12, rax + punpcklqdq xmm12, xmm12 + + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [r9+56] + xor rcx, QWORD PTR [r9+24] + movq xmm3, rax + mov rax, QWORD PTR [r9+48] + xor rax, QWORD PTR [r9+16] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + mov QWORD PTR [rsp], r13 + mov rcx, QWORD PTR [r9+88] + xor rcx, QWORD PTR [r9+72] + movq xmm6, rax + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + punpcklqdq xmm6, xmm0 + movq xmm0, rcx + mov QWORD PTR [rsp+256], r10 + mov rcx, rdi + mov QWORD PTR [rsp+264], r11 + movq xmm8, rax + and ecx, 2097136 + punpcklqdq xmm8, xmm0 + movq xmm0, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movq xmm0, QWORD PTR [r9+104] + lea r8, QWORD PTR [rcx+rsi] + movdqu xmm11, XMMWORD PTR [r8] + punpcklqdq xmm5, xmm0 + lea r9, QWORD PTR [rdx+r13] + movdqu xmm15, XMMWORD PTR [r9] + + ALIGN(64) +main_loop_double_sandybridge: + movdqu xmm9, xmm15 + mov eax, edx + mov ebx, edx + xor eax, 16 + xor ebx, 32 + xor edx, 48 + + movq xmm0, r11 + movq xmm2, r10 + punpcklqdq xmm2, xmm0 + aesenc xmm9, xmm2 + + movdqu xmm0, XMMWORD PTR [rax+r13] + movdqu xmm1, XMMWORD PTR [rbx+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [rbx+r13], xmm0 + movdqu xmm0, XMMWORD PTR [rdx+r13] + movdqu XMMWORD PTR [rdx+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [rax+r13], xmm0 + + movq r11, xmm9 + mov edx, r11d + and edx, 2097136 + movdqa xmm0, xmm9 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [r9], xmm0 + + lea rbx, QWORD PTR [rdx+r13] + mov r10, QWORD PTR [rdx+r13] + + movdqu xmm10, xmm11 + movq xmm0, rbp + movq xmm11, rdi + punpcklqdq xmm11, xmm0 + aesenc xmm10, xmm11 + + mov eax, ecx + mov r12d, ecx + xor eax, 16 + xor r12d, 32 + xor ecx, 48 + + movdqu xmm0, XMMWORD PTR [rax+rsi] + paddq xmm0, xmm6 + movdqu xmm1, XMMWORD PTR [r12+rsi] + movdqu XMMWORD PTR [r12+rsi], xmm0 + paddq xmm1, xmm11 + movdqu xmm0, XMMWORD PTR [rcx+rsi] + movdqu XMMWORD PTR [rcx+rsi], xmm1 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [rax+rsi], xmm0 + + movq rcx, xmm10 + and ecx, 2097136 + + movdqa xmm0, xmm10 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [r8], xmm0 + mov r12, QWORD PTR [rcx+rsi] + + mov r9, QWORD PTR [rbx+8] + + xor edx, 16 + mov r8d, edx + mov r15d, edx + + movq rdx, xmm5 + shl rdx, 32 + movq rax, xmm4 + xor rdx, rax + xor r10, rdx + mov rax, r10 + mul r11 + mov r11d, r8d + xor r11d, 48 + movq xmm0, rdx + xor rdx, [r11+r13] + movq xmm1, rax + xor rax, [r11+r13+8] + punpcklqdq xmm0, xmm1 + + pxor xmm0, XMMWORD PTR [r8+r13] + xor r8d, 32 + movdqu xmm1, XMMWORD PTR [r11+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [r11+r13], xmm0 + movdqu xmm0, XMMWORD PTR [r8+r13] + movdqu XMMWORD PTR [r8+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [r15+r13], xmm0 + + mov r11, QWORD PTR [rsp+256] + add r11, rdx + mov rdx, QWORD PTR [rsp+264] + add rdx, rax + mov QWORD PTR [rbx], r11 + xor r11, r10 + mov QWORD PTR [rbx+8], rdx + xor rdx, r9 + mov QWORD PTR [rsp+256], r11 + and r11d, 2097136 + mov QWORD PTR [rsp+264], rdx + mov QWORD PTR [rsp+8], r11 + lea r15, QWORD PTR [r11+r13] + movdqu xmm15, XMMWORD PTR [r11+r13] + lea r13, QWORD PTR [rsi+rcx] + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movaps xmm2, xmm13 + movq r10, xmm0 + psllq xmm5, 1 + shl r10, 32 + movdqa xmm0, xmm9 + psrldq xmm0, 8 + movdqa xmm1, xmm10 + movq r11, xmm0 + psrldq xmm1, 8 + movq r8, xmm1 + psrldq xmm4, 8 + movaps xmm0, xmm13 + movq rax, xmm4 + xor r10, rax + movaps xmm1, xmm13 + xor r10, r12 + lea rax, QWORD PTR [r11+1] + shr rax, 1 + movdqa xmm3, xmm9 + punpcklqdq xmm3, xmm10 + paddq xmm5, xmm3 + movq rdx, xmm5 + psrldq xmm5, 8 + cvtsi2sd xmm2, rax + or edx, -2147483647 + lea rax, QWORD PTR [r8+1] + shr rax, 1 + movq r9, xmm5 + cvtsi2sd xmm0, rax + or r9d, -2147483647 + cvtsi2sd xmm1, rdx + unpcklpd xmm2, xmm0 + movaps xmm0, xmm13 + cvtsi2sd xmm0, r9 + unpcklpd xmm1, xmm0 + divpd xmm2, xmm1 + paddq xmm2, xmm14 + cvttsd2si rax, xmm2 + psrldq xmm2, 8 + mov rbx, rax + imul rax, rdx + sub r11, rax + js div_fix_1_sandybridge +div_fix_1_ret_sandybridge: + + cvttsd2si rdx, xmm2 + mov rax, rdx + imul rax, r9 + movd xmm2, r11d + movd xmm4, ebx + sub r8, rax + js div_fix_2_sandybridge +div_fix_2_ret_sandybridge: + + movd xmm1, r8d + movd xmm0, edx + punpckldq xmm2, xmm1 + punpckldq xmm4, xmm0 + punpckldq xmm4, xmm2 + paddq xmm3, xmm4 + movdqa xmm0, xmm3 + psrlq xmm0, 12 + paddq xmm0, xmm12 + sqrtpd xmm1, xmm0 + movq r9, xmm1 + movdqa xmm5, xmm1 + psrlq xmm5, 19 + test r9, 524287 + je sqrt_fix_1_sandybridge +sqrt_fix_1_ret_sandybridge: + + movq r9, xmm10 + psrldq xmm1, 8 + movq r8, xmm1 + test r8, 524287 + je sqrt_fix_2_sandybridge +sqrt_fix_2_ret_sandybridge: + + mov r12d, ecx + mov r8d, ecx + xor r12d, 16 + xor r8d, 32 + xor ecx, 48 + mov rax, r10 + mul r9 + movq xmm0, rax + movq xmm3, rdx + punpcklqdq xmm3, xmm0 + + movdqu xmm0, XMMWORD PTR [r12+rsi] + pxor xmm0, xmm3 + movdqu xmm1, XMMWORD PTR [r8+rsi] + xor rdx, [r8+rsi] + xor rax, [r8+rsi+8] + movdqu xmm3, XMMWORD PTR [rcx+rsi] + paddq xmm0, xmm6 + paddq xmm1, xmm11 + paddq xmm3, xmm8 + movdqu XMMWORD PTR [r8+rsi], xmm0 + movdqu XMMWORD PTR [rcx+rsi], xmm1 + movdqu XMMWORD PTR [r12+rsi], xmm3 + + add rdi, rdx + mov QWORD PTR [r13], rdi + xor rdi, r10 + mov ecx, edi + and ecx, 2097136 + lea r8, QWORD PTR [rcx+rsi] + + mov rdx, QWORD PTR [r13+8] + add rbp, rax + mov QWORD PTR [r13+8], rbp + movdqu xmm11, XMMWORD PTR [rcx+rsi] + xor rbp, rdx + mov r13, QWORD PTR [rsp] + movdqa xmm3, xmm7 + mov rdx, QWORD PTR [rsp+8] + movdqa xmm8, xmm6 + mov r10, QWORD PTR [rsp+256] + movdqa xmm7, xmm9 + mov r11, QWORD PTR [rsp+264] + movdqa xmm6, xmm10 + mov r9, r15 + dec r14d + jne main_loop_double_sandybridge + + ldmxcsr DWORD PTR [rsp+272] + movaps xmm13, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+184] + movaps xmm6, XMMWORD PTR [r11-24] + movaps xmm7, XMMWORD PTR [r11-40] + movaps xmm8, XMMWORD PTR [r11-56] + movaps xmm9, XMMWORD PTR [r11-72] + movaps xmm10, XMMWORD PTR [r11-88] + movaps xmm11, XMMWORD PTR [r11-104] + movaps xmm12, XMMWORD PTR [r11-120] + movaps xmm14, XMMWORD PTR [rsp+32] + movaps xmm15, XMMWORD PTR [rsp+16] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + jmp cnv2_double_mainloop_asm_sandybridge_endp + +div_fix_1_sandybridge: + dec rbx + add r11, rdx + jmp div_fix_1_ret_sandybridge + +div_fix_2_sandybridge: + dec rdx + add r8, r9 + jmp div_fix_2_ret_sandybridge + +sqrt_fix_1_sandybridge: + movq r8, xmm3 + movdqa xmm0, xmm5 + psrldq xmm0, 8 + dec r9 + mov r11d, -1022 + shl r11, 32 + mov rax, r9 + shr r9, 19 + shr rax, 20 + mov rdx, r9 + sub rdx, rax + lea rdx, [rdx+r11+1] + add rax, r11 + imul rdx, rax + sub rdx, r8 + adc r9, 0 + movq xmm5, r9 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_1_ret_sandybridge + +sqrt_fix_2_sandybridge: + psrldq xmm3, 8 + movq r11, xmm3 + dec r8 + mov ebx, -1022 + shl rbx, 32 + mov rax, r8 + shr r8, 19 + shr rax, 20 + mov rdx, r8 + sub rdx, rax + lea rdx, [rdx+rbx+1] + add rax, rbx + imul rdx, rax + sub rdx, r11 + adc r8, 0 + movq xmm0, r8 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_2_ret_sandybridge + +cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc new file mode 100644 index 00000000..c764501d --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -0,0 +1,180 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movq xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN(64) +cnv2_main_loop_bulldozer: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movq xmm6, r8 + pinsrq xmm6, r11, 1 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + + mov edi, 1023 + shl rdi, 52 + + movq r14, xmm5 + pextrq rax, xmm5, 1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + div r9 + mov eax, eax + shl rdx, 32 + lea r15, [rax+rdx] + lea rax, [r14+r15] + shr rax, 12 + add rax, rdi + movq xmm0, rax + sqrtsd xmm1, xmm0 + movq rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_bulldozer + shr rdi, 19 + +sqrt_fixup_bulldozer_ret: + mov rax, rsi + mul r14 + movq xmm1, rax + movq xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne cnv2_main_loop_bulldozer + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_bulldozer_endp + +sqrt_fixup_bulldozer: + movq r9, xmm5 + add r9, r15 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_bulldozer_ret + +cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc new file mode 100644 index 00000000..06f1d28b --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -0,0 +1,186 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 524288 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movq xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movq xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movq xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movq xmm0, rcx + punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] + + ALIGN(64) +main_loop_ivybridge: + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movq xmm0, r11 + movq xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movq rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm2, XMMWORD PTR [rcx+rbx] + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm1, xmm7 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [rax+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movq rcx, xmm3 + mov rax, rcx + shl rax, 32 + xor rdi, rax + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movq rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movq xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movq rdx, xmm3 + test edx, 524287 + je sqrt_fixup_ivybridge + psrlq xmm3, 19 +sqrt_fixup_ivybridge_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movq xmm2, rdx + xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 + movq xmm0, rax + xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm4 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm0 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + movdqu xmm6, [rdi+rbx] + mov r10d, edi + xor r11, r12 + dec rsi + jne main_loop_ivybridge + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp cnv2_main_loop_ivybridge_endp + +sqrt_fixup_ivybridge: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movq xmm3, rdx + jmp sqrt_fixup_ivybridge_ret + +cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc new file mode 100644 index 00000000..5dbf5917 --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -0,0 +1,179 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movq xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN(64) +main_loop_ryzen: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movq xmm0, r11 + movq xmm6, r8 + punpcklqdq xmm6, xmm0 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + movq r14, xmm5 + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movq rax, xmm0 + + div r9 + movq xmm0, rax + movq xmm1, rdx + punpckldq xmm0, xmm1 + movq r15, xmm0 + paddq xmm0, xmm5 + movdqa xmm2, xmm0 + psrlq xmm0, 12 + paddq xmm0, xmm7 + sqrtsd xmm1, xmm0 + movq rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_ryzen + shr rdi, 19 + +sqrt_fixup_ryzen_ret: + mov rax, rsi + mul r14 + movq xmm1, rax + movq xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne main_loop_ryzen + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_ryzen_endp + +sqrt_fixup_ryzen: + movq r9, xmm2 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_ryzen_ret + +cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S new file mode 100644 index 00000000..a792337f --- /dev/null +++ b/src/crypto/asm/cn_main_loop.S @@ -0,0 +1,54 @@ +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif +.intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn +.section .text +#endif +.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) +.global FN_PREFIX(cnv2_mainloop_ryzen_asm) +.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) +.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) + +ALIGN(64) +FN_PREFIX(cnv2_mainloop_ivybridge_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_main_loop_ivybridge.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 + +ALIGN(64) +FN_PREFIX(cnv2_mainloop_ryzen_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_main_loop_ryzen.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 + +ALIGN(64) +FN_PREFIX(cnv2_mainloop_bulldozer_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_main_loop_bulldozer.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 + +ALIGN(64) +FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): + sub rsp, 48 + mov rcx, rdi + mov rdx, rsi + #include "cn2/cnv2_double_main_loop_sandybridge.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm new file mode 100644 index 00000000..f1384be8 --- /dev/null +++ b/src/crypto/asm/cn_main_loop.asm @@ -0,0 +1,36 @@ +_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE +PUBLIC cnv2_mainloop_ivybridge_asm +PUBLIC cnv2_mainloop_ryzen_asm +PUBLIC cnv2_mainloop_bulldozer_asm +PUBLIC cnv2_double_mainloop_sandybridge_asm + +ALIGN(64) +cnv2_mainloop_ivybridge_asm PROC + INCLUDE cn2/cnv2_main_loop_ivybridge.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_ivybridge_asm ENDP + +ALIGN(64) +cnv2_mainloop_ryzen_asm PROC + INCLUDE cn2/cnv2_main_loop_ryzen.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_ryzen_asm ENDP + +ALIGN(64) +cnv2_mainloop_bulldozer_asm PROC + INCLUDE cn2/cnv2_main_loop_bulldozer.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_bulldozer_asm ENDP + +ALIGN(64) +cnv2_double_mainloop_sandybridge_asm PROC + INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc + ret 0 + mov eax, 3735929054 +cnv2_double_mainloop_sandybridge_asm ENDP + +_TEXT_CNV2_MAINLOOP ENDS +END diff --git a/src/crypto/asm/win64/CryptonightR_template.S b/src/crypto/asm/win64/CryptonightR_template.S new file mode 100644 index 00000000..e8478beb --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_template.S @@ -0,0 +1,1592 @@ +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif +.intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn +.section .text +#endif + +#define PUBLIC .global + +PUBLIC FN_PREFIX(CryptonightR_instruction0) +PUBLIC FN_PREFIX(CryptonightR_instruction1) +PUBLIC FN_PREFIX(CryptonightR_instruction2) +PUBLIC FN_PREFIX(CryptonightR_instruction3) +PUBLIC FN_PREFIX(CryptonightR_instruction4) +PUBLIC FN_PREFIX(CryptonightR_instruction5) +PUBLIC FN_PREFIX(CryptonightR_instruction6) +PUBLIC FN_PREFIX(CryptonightR_instruction7) +PUBLIC FN_PREFIX(CryptonightR_instruction8) +PUBLIC FN_PREFIX(CryptonightR_instruction9) +PUBLIC FN_PREFIX(CryptonightR_instruction10) +PUBLIC FN_PREFIX(CryptonightR_instruction11) +PUBLIC FN_PREFIX(CryptonightR_instruction12) +PUBLIC FN_PREFIX(CryptonightR_instruction13) +PUBLIC FN_PREFIX(CryptonightR_instruction14) +PUBLIC FN_PREFIX(CryptonightR_instruction15) +PUBLIC FN_PREFIX(CryptonightR_instruction16) +PUBLIC FN_PREFIX(CryptonightR_instruction17) +PUBLIC FN_PREFIX(CryptonightR_instruction18) +PUBLIC FN_PREFIX(CryptonightR_instruction19) +PUBLIC FN_PREFIX(CryptonightR_instruction20) +PUBLIC FN_PREFIX(CryptonightR_instruction21) +PUBLIC FN_PREFIX(CryptonightR_instruction22) +PUBLIC FN_PREFIX(CryptonightR_instruction23) +PUBLIC FN_PREFIX(CryptonightR_instruction24) +PUBLIC FN_PREFIX(CryptonightR_instruction25) +PUBLIC FN_PREFIX(CryptonightR_instruction26) +PUBLIC FN_PREFIX(CryptonightR_instruction27) +PUBLIC FN_PREFIX(CryptonightR_instruction28) +PUBLIC FN_PREFIX(CryptonightR_instruction29) +PUBLIC FN_PREFIX(CryptonightR_instruction30) +PUBLIC FN_PREFIX(CryptonightR_instruction31) +PUBLIC FN_PREFIX(CryptonightR_instruction32) +PUBLIC FN_PREFIX(CryptonightR_instruction33) +PUBLIC FN_PREFIX(CryptonightR_instruction34) +PUBLIC FN_PREFIX(CryptonightR_instruction35) +PUBLIC FN_PREFIX(CryptonightR_instruction36) +PUBLIC FN_PREFIX(CryptonightR_instruction37) +PUBLIC FN_PREFIX(CryptonightR_instruction38) +PUBLIC FN_PREFIX(CryptonightR_instruction39) +PUBLIC FN_PREFIX(CryptonightR_instruction40) +PUBLIC FN_PREFIX(CryptonightR_instruction41) +PUBLIC FN_PREFIX(CryptonightR_instruction42) +PUBLIC FN_PREFIX(CryptonightR_instruction43) +PUBLIC FN_PREFIX(CryptonightR_instruction44) +PUBLIC FN_PREFIX(CryptonightR_instruction45) +PUBLIC FN_PREFIX(CryptonightR_instruction46) +PUBLIC FN_PREFIX(CryptonightR_instruction47) +PUBLIC FN_PREFIX(CryptonightR_instruction48) +PUBLIC FN_PREFIX(CryptonightR_instruction49) +PUBLIC FN_PREFIX(CryptonightR_instruction50) +PUBLIC FN_PREFIX(CryptonightR_instruction51) +PUBLIC FN_PREFIX(CryptonightR_instruction52) +PUBLIC FN_PREFIX(CryptonightR_instruction53) +PUBLIC FN_PREFIX(CryptonightR_instruction54) +PUBLIC FN_PREFIX(CryptonightR_instruction55) +PUBLIC FN_PREFIX(CryptonightR_instruction56) +PUBLIC FN_PREFIX(CryptonightR_instruction57) +PUBLIC FN_PREFIX(CryptonightR_instruction58) +PUBLIC FN_PREFIX(CryptonightR_instruction59) +PUBLIC FN_PREFIX(CryptonightR_instruction60) +PUBLIC FN_PREFIX(CryptonightR_instruction61) +PUBLIC FN_PREFIX(CryptonightR_instruction62) +PUBLIC FN_PREFIX(CryptonightR_instruction63) +PUBLIC FN_PREFIX(CryptonightR_instruction64) +PUBLIC FN_PREFIX(CryptonightR_instruction65) +PUBLIC FN_PREFIX(CryptonightR_instruction66) +PUBLIC FN_PREFIX(CryptonightR_instruction67) +PUBLIC FN_PREFIX(CryptonightR_instruction68) +PUBLIC FN_PREFIX(CryptonightR_instruction69) +PUBLIC FN_PREFIX(CryptonightR_instruction70) +PUBLIC FN_PREFIX(CryptonightR_instruction71) +PUBLIC FN_PREFIX(CryptonightR_instruction72) +PUBLIC FN_PREFIX(CryptonightR_instruction73) +PUBLIC FN_PREFIX(CryptonightR_instruction74) +PUBLIC FN_PREFIX(CryptonightR_instruction75) +PUBLIC FN_PREFIX(CryptonightR_instruction76) +PUBLIC FN_PREFIX(CryptonightR_instruction77) +PUBLIC FN_PREFIX(CryptonightR_instruction78) +PUBLIC FN_PREFIX(CryptonightR_instruction79) +PUBLIC FN_PREFIX(CryptonightR_instruction80) +PUBLIC FN_PREFIX(CryptonightR_instruction81) +PUBLIC FN_PREFIX(CryptonightR_instruction82) +PUBLIC FN_PREFIX(CryptonightR_instruction83) +PUBLIC FN_PREFIX(CryptonightR_instruction84) +PUBLIC FN_PREFIX(CryptonightR_instruction85) +PUBLIC FN_PREFIX(CryptonightR_instruction86) +PUBLIC FN_PREFIX(CryptonightR_instruction87) +PUBLIC FN_PREFIX(CryptonightR_instruction88) +PUBLIC FN_PREFIX(CryptonightR_instruction89) +PUBLIC FN_PREFIX(CryptonightR_instruction90) +PUBLIC FN_PREFIX(CryptonightR_instruction91) +PUBLIC FN_PREFIX(CryptonightR_instruction92) +PUBLIC FN_PREFIX(CryptonightR_instruction93) +PUBLIC FN_PREFIX(CryptonightR_instruction94) +PUBLIC FN_PREFIX(CryptonightR_instruction95) +PUBLIC FN_PREFIX(CryptonightR_instruction96) +PUBLIC FN_PREFIX(CryptonightR_instruction97) +PUBLIC FN_PREFIX(CryptonightR_instruction98) +PUBLIC FN_PREFIX(CryptonightR_instruction99) +PUBLIC FN_PREFIX(CryptonightR_instruction100) +PUBLIC FN_PREFIX(CryptonightR_instruction101) +PUBLIC FN_PREFIX(CryptonightR_instruction102) +PUBLIC FN_PREFIX(CryptonightR_instruction103) +PUBLIC FN_PREFIX(CryptonightR_instruction104) +PUBLIC FN_PREFIX(CryptonightR_instruction105) +PUBLIC FN_PREFIX(CryptonightR_instruction106) +PUBLIC FN_PREFIX(CryptonightR_instruction107) +PUBLIC FN_PREFIX(CryptonightR_instruction108) +PUBLIC FN_PREFIX(CryptonightR_instruction109) +PUBLIC FN_PREFIX(CryptonightR_instruction110) +PUBLIC FN_PREFIX(CryptonightR_instruction111) +PUBLIC FN_PREFIX(CryptonightR_instruction112) +PUBLIC FN_PREFIX(CryptonightR_instruction113) +PUBLIC FN_PREFIX(CryptonightR_instruction114) +PUBLIC FN_PREFIX(CryptonightR_instruction115) +PUBLIC FN_PREFIX(CryptonightR_instruction116) +PUBLIC FN_PREFIX(CryptonightR_instruction117) +PUBLIC FN_PREFIX(CryptonightR_instruction118) +PUBLIC FN_PREFIX(CryptonightR_instruction119) +PUBLIC FN_PREFIX(CryptonightR_instruction120) +PUBLIC FN_PREFIX(CryptonightR_instruction121) +PUBLIC FN_PREFIX(CryptonightR_instruction122) +PUBLIC FN_PREFIX(CryptonightR_instruction123) +PUBLIC FN_PREFIX(CryptonightR_instruction124) +PUBLIC FN_PREFIX(CryptonightR_instruction125) +PUBLIC FN_PREFIX(CryptonightR_instruction126) +PUBLIC FN_PREFIX(CryptonightR_instruction127) +PUBLIC FN_PREFIX(CryptonightR_instruction128) +PUBLIC FN_PREFIX(CryptonightR_instruction129) +PUBLIC FN_PREFIX(CryptonightR_instruction130) +PUBLIC FN_PREFIX(CryptonightR_instruction131) +PUBLIC FN_PREFIX(CryptonightR_instruction132) +PUBLIC FN_PREFIX(CryptonightR_instruction133) +PUBLIC FN_PREFIX(CryptonightR_instruction134) +PUBLIC FN_PREFIX(CryptonightR_instruction135) +PUBLIC FN_PREFIX(CryptonightR_instruction136) +PUBLIC FN_PREFIX(CryptonightR_instruction137) +PUBLIC FN_PREFIX(CryptonightR_instruction138) +PUBLIC FN_PREFIX(CryptonightR_instruction139) +PUBLIC FN_PREFIX(CryptonightR_instruction140) +PUBLIC FN_PREFIX(CryptonightR_instruction141) +PUBLIC FN_PREFIX(CryptonightR_instruction142) +PUBLIC FN_PREFIX(CryptonightR_instruction143) +PUBLIC FN_PREFIX(CryptonightR_instruction144) +PUBLIC FN_PREFIX(CryptonightR_instruction145) +PUBLIC FN_PREFIX(CryptonightR_instruction146) +PUBLIC FN_PREFIX(CryptonightR_instruction147) +PUBLIC FN_PREFIX(CryptonightR_instruction148) +PUBLIC FN_PREFIX(CryptonightR_instruction149) +PUBLIC FN_PREFIX(CryptonightR_instruction150) +PUBLIC FN_PREFIX(CryptonightR_instruction151) +PUBLIC FN_PREFIX(CryptonightR_instruction152) +PUBLIC FN_PREFIX(CryptonightR_instruction153) +PUBLIC FN_PREFIX(CryptonightR_instruction154) +PUBLIC FN_PREFIX(CryptonightR_instruction155) +PUBLIC FN_PREFIX(CryptonightR_instruction156) +PUBLIC FN_PREFIX(CryptonightR_instruction157) +PUBLIC FN_PREFIX(CryptonightR_instruction158) +PUBLIC FN_PREFIX(CryptonightR_instruction159) +PUBLIC FN_PREFIX(CryptonightR_instruction160) +PUBLIC FN_PREFIX(CryptonightR_instruction161) +PUBLIC FN_PREFIX(CryptonightR_instruction162) +PUBLIC FN_PREFIX(CryptonightR_instruction163) +PUBLIC FN_PREFIX(CryptonightR_instruction164) +PUBLIC FN_PREFIX(CryptonightR_instruction165) +PUBLIC FN_PREFIX(CryptonightR_instruction166) +PUBLIC FN_PREFIX(CryptonightR_instruction167) +PUBLIC FN_PREFIX(CryptonightR_instruction168) +PUBLIC FN_PREFIX(CryptonightR_instruction169) +PUBLIC FN_PREFIX(CryptonightR_instruction170) +PUBLIC FN_PREFIX(CryptonightR_instruction171) +PUBLIC FN_PREFIX(CryptonightR_instruction172) +PUBLIC FN_PREFIX(CryptonightR_instruction173) +PUBLIC FN_PREFIX(CryptonightR_instruction174) +PUBLIC FN_PREFIX(CryptonightR_instruction175) +PUBLIC FN_PREFIX(CryptonightR_instruction176) +PUBLIC FN_PREFIX(CryptonightR_instruction177) +PUBLIC FN_PREFIX(CryptonightR_instruction178) +PUBLIC FN_PREFIX(CryptonightR_instruction179) +PUBLIC FN_PREFIX(CryptonightR_instruction180) +PUBLIC FN_PREFIX(CryptonightR_instruction181) +PUBLIC FN_PREFIX(CryptonightR_instruction182) +PUBLIC FN_PREFIX(CryptonightR_instruction183) +PUBLIC FN_PREFIX(CryptonightR_instruction184) +PUBLIC FN_PREFIX(CryptonightR_instruction185) +PUBLIC FN_PREFIX(CryptonightR_instruction186) +PUBLIC FN_PREFIX(CryptonightR_instruction187) +PUBLIC FN_PREFIX(CryptonightR_instruction188) +PUBLIC FN_PREFIX(CryptonightR_instruction189) +PUBLIC FN_PREFIX(CryptonightR_instruction190) +PUBLIC FN_PREFIX(CryptonightR_instruction191) +PUBLIC FN_PREFIX(CryptonightR_instruction192) +PUBLIC FN_PREFIX(CryptonightR_instruction193) +PUBLIC FN_PREFIX(CryptonightR_instruction194) +PUBLIC FN_PREFIX(CryptonightR_instruction195) +PUBLIC FN_PREFIX(CryptonightR_instruction196) +PUBLIC FN_PREFIX(CryptonightR_instruction197) +PUBLIC FN_PREFIX(CryptonightR_instruction198) +PUBLIC FN_PREFIX(CryptonightR_instruction199) +PUBLIC FN_PREFIX(CryptonightR_instruction200) +PUBLIC FN_PREFIX(CryptonightR_instruction201) +PUBLIC FN_PREFIX(CryptonightR_instruction202) +PUBLIC FN_PREFIX(CryptonightR_instruction203) +PUBLIC FN_PREFIX(CryptonightR_instruction204) +PUBLIC FN_PREFIX(CryptonightR_instruction205) +PUBLIC FN_PREFIX(CryptonightR_instruction206) +PUBLIC FN_PREFIX(CryptonightR_instruction207) +PUBLIC FN_PREFIX(CryptonightR_instruction208) +PUBLIC FN_PREFIX(CryptonightR_instruction209) +PUBLIC FN_PREFIX(CryptonightR_instruction210) +PUBLIC FN_PREFIX(CryptonightR_instruction211) +PUBLIC FN_PREFIX(CryptonightR_instruction212) +PUBLIC FN_PREFIX(CryptonightR_instruction213) +PUBLIC FN_PREFIX(CryptonightR_instruction214) +PUBLIC FN_PREFIX(CryptonightR_instruction215) +PUBLIC FN_PREFIX(CryptonightR_instruction216) +PUBLIC FN_PREFIX(CryptonightR_instruction217) +PUBLIC FN_PREFIX(CryptonightR_instruction218) +PUBLIC FN_PREFIX(CryptonightR_instruction219) +PUBLIC FN_PREFIX(CryptonightR_instruction220) +PUBLIC FN_PREFIX(CryptonightR_instruction221) +PUBLIC FN_PREFIX(CryptonightR_instruction222) +PUBLIC FN_PREFIX(CryptonightR_instruction223) +PUBLIC FN_PREFIX(CryptonightR_instruction224) +PUBLIC FN_PREFIX(CryptonightR_instruction225) +PUBLIC FN_PREFIX(CryptonightR_instruction226) +PUBLIC FN_PREFIX(CryptonightR_instruction227) +PUBLIC FN_PREFIX(CryptonightR_instruction228) +PUBLIC FN_PREFIX(CryptonightR_instruction229) +PUBLIC FN_PREFIX(CryptonightR_instruction230) +PUBLIC FN_PREFIX(CryptonightR_instruction231) +PUBLIC FN_PREFIX(CryptonightR_instruction232) +PUBLIC FN_PREFIX(CryptonightR_instruction233) +PUBLIC FN_PREFIX(CryptonightR_instruction234) +PUBLIC FN_PREFIX(CryptonightR_instruction235) +PUBLIC FN_PREFIX(CryptonightR_instruction236) +PUBLIC FN_PREFIX(CryptonightR_instruction237) +PUBLIC FN_PREFIX(CryptonightR_instruction238) +PUBLIC FN_PREFIX(CryptonightR_instruction239) +PUBLIC FN_PREFIX(CryptonightR_instruction240) +PUBLIC FN_PREFIX(CryptonightR_instruction241) +PUBLIC FN_PREFIX(CryptonightR_instruction242) +PUBLIC FN_PREFIX(CryptonightR_instruction243) +PUBLIC FN_PREFIX(CryptonightR_instruction244) +PUBLIC FN_PREFIX(CryptonightR_instruction245) +PUBLIC FN_PREFIX(CryptonightR_instruction246) +PUBLIC FN_PREFIX(CryptonightR_instruction247) +PUBLIC FN_PREFIX(CryptonightR_instruction248) +PUBLIC FN_PREFIX(CryptonightR_instruction249) +PUBLIC FN_PREFIX(CryptonightR_instruction250) +PUBLIC FN_PREFIX(CryptonightR_instruction251) +PUBLIC FN_PREFIX(CryptonightR_instruction252) +PUBLIC FN_PREFIX(CryptonightR_instruction253) +PUBLIC FN_PREFIX(CryptonightR_instruction254) +PUBLIC FN_PREFIX(CryptonightR_instruction255) +PUBLIC FN_PREFIX(CryptonightR_instruction256) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) +PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) + +#include "CryptonightR_template.inc" + +FN_PREFIX(CryptonightR_instruction0): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction1): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction2): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction3): + add rbx, rbx + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction4): + sub rbx, rbx +FN_PREFIX(CryptonightR_instruction5): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction6): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction7): + xor rbx, rbx +FN_PREFIX(CryptonightR_instruction8): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction9): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction10): + imul rsi, rbx +FN_PREFIX(CryptonightR_instruction11): + add rsi, rbx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction12): + sub rsi, rbx +FN_PREFIX(CryptonightR_instruction13): + ror esi, cl +FN_PREFIX(CryptonightR_instruction14): + rol esi, cl +FN_PREFIX(CryptonightR_instruction15): + xor rsi, rbx +FN_PREFIX(CryptonightR_instruction16): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction17): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction18): + imul rdi, rbx +FN_PREFIX(CryptonightR_instruction19): + add rdi, rbx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction20): + sub rdi, rbx +FN_PREFIX(CryptonightR_instruction21): + ror edi, cl +FN_PREFIX(CryptonightR_instruction22): + rol edi, cl +FN_PREFIX(CryptonightR_instruction23): + xor rdi, rbx +FN_PREFIX(CryptonightR_instruction24): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction25): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction26): + imul rbp, rbx +FN_PREFIX(CryptonightR_instruction27): + add rbp, rbx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction28): + sub rbp, rbx +FN_PREFIX(CryptonightR_instruction29): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction30): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction31): + xor rbp, rbx +FN_PREFIX(CryptonightR_instruction32): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction33): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction34): + imul rbx, rsi +FN_PREFIX(CryptonightR_instruction35): + add rbx, rsi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction36): + sub rbx, rsi +FN_PREFIX(CryptonightR_instruction37): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction38): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction39): + xor rbx, rsi +FN_PREFIX(CryptonightR_instruction40): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction41): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction42): + imul rsi, rsi +FN_PREFIX(CryptonightR_instruction43): + add rsi, rsi + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction44): + sub rsi, rsi +FN_PREFIX(CryptonightR_instruction45): + ror esi, cl +FN_PREFIX(CryptonightR_instruction46): + rol esi, cl +FN_PREFIX(CryptonightR_instruction47): + xor rsi, rsi +FN_PREFIX(CryptonightR_instruction48): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction49): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction50): + imul rdi, rsi +FN_PREFIX(CryptonightR_instruction51): + add rdi, rsi + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction52): + sub rdi, rsi +FN_PREFIX(CryptonightR_instruction53): + ror edi, cl +FN_PREFIX(CryptonightR_instruction54): + rol edi, cl +FN_PREFIX(CryptonightR_instruction55): + xor rdi, rsi +FN_PREFIX(CryptonightR_instruction56): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction57): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction58): + imul rbp, rsi +FN_PREFIX(CryptonightR_instruction59): + add rbp, rsi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction60): + sub rbp, rsi +FN_PREFIX(CryptonightR_instruction61): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction62): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction63): + xor rbp, rsi +FN_PREFIX(CryptonightR_instruction64): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction65): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction66): + imul rbx, rdi +FN_PREFIX(CryptonightR_instruction67): + add rbx, rdi + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction68): + sub rbx, rdi +FN_PREFIX(CryptonightR_instruction69): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction70): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction71): + xor rbx, rdi +FN_PREFIX(CryptonightR_instruction72): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction73): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction74): + imul rsi, rdi +FN_PREFIX(CryptonightR_instruction75): + add rsi, rdi + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction76): + sub rsi, rdi +FN_PREFIX(CryptonightR_instruction77): + ror esi, cl +FN_PREFIX(CryptonightR_instruction78): + rol esi, cl +FN_PREFIX(CryptonightR_instruction79): + xor rsi, rdi +FN_PREFIX(CryptonightR_instruction80): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction81): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction82): + imul rdi, rdi +FN_PREFIX(CryptonightR_instruction83): + add rdi, rdi + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction84): + sub rdi, rdi +FN_PREFIX(CryptonightR_instruction85): + ror edi, cl +FN_PREFIX(CryptonightR_instruction86): + rol edi, cl +FN_PREFIX(CryptonightR_instruction87): + xor rdi, rdi +FN_PREFIX(CryptonightR_instruction88): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction89): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction90): + imul rbp, rdi +FN_PREFIX(CryptonightR_instruction91): + add rbp, rdi + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction92): + sub rbp, rdi +FN_PREFIX(CryptonightR_instruction93): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction94): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction95): + xor rbp, rdi +FN_PREFIX(CryptonightR_instruction96): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction97): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction98): + imul rbx, rbp +FN_PREFIX(CryptonightR_instruction99): + add rbx, rbp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction100): + sub rbx, rbp +FN_PREFIX(CryptonightR_instruction101): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction102): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction103): + xor rbx, rbp +FN_PREFIX(CryptonightR_instruction104): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction105): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction106): + imul rsi, rbp +FN_PREFIX(CryptonightR_instruction107): + add rsi, rbp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction108): + sub rsi, rbp +FN_PREFIX(CryptonightR_instruction109): + ror esi, cl +FN_PREFIX(CryptonightR_instruction110): + rol esi, cl +FN_PREFIX(CryptonightR_instruction111): + xor rsi, rbp +FN_PREFIX(CryptonightR_instruction112): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction113): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction114): + imul rdi, rbp +FN_PREFIX(CryptonightR_instruction115): + add rdi, rbp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction116): + sub rdi, rbp +FN_PREFIX(CryptonightR_instruction117): + ror edi, cl +FN_PREFIX(CryptonightR_instruction118): + rol edi, cl +FN_PREFIX(CryptonightR_instruction119): + xor rdi, rbp +FN_PREFIX(CryptonightR_instruction120): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction121): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction122): + imul rbp, rbp +FN_PREFIX(CryptonightR_instruction123): + add rbp, rbp + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction124): + sub rbp, rbp +FN_PREFIX(CryptonightR_instruction125): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction126): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction127): + xor rbp, rbp +FN_PREFIX(CryptonightR_instruction128): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction129): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction130): + imul rbx, rsp +FN_PREFIX(CryptonightR_instruction131): + add rbx, rsp + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction132): + sub rbx, rsp +FN_PREFIX(CryptonightR_instruction133): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction134): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction135): + xor rbx, rsp +FN_PREFIX(CryptonightR_instruction136): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction137): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction138): + imul rsi, rsp +FN_PREFIX(CryptonightR_instruction139): + add rsi, rsp + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction140): + sub rsi, rsp +FN_PREFIX(CryptonightR_instruction141): + ror esi, cl +FN_PREFIX(CryptonightR_instruction142): + rol esi, cl +FN_PREFIX(CryptonightR_instruction143): + xor rsi, rsp +FN_PREFIX(CryptonightR_instruction144): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction145): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction146): + imul rdi, rsp +FN_PREFIX(CryptonightR_instruction147): + add rdi, rsp + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction148): + sub rdi, rsp +FN_PREFIX(CryptonightR_instruction149): + ror edi, cl +FN_PREFIX(CryptonightR_instruction150): + rol edi, cl +FN_PREFIX(CryptonightR_instruction151): + xor rdi, rsp +FN_PREFIX(CryptonightR_instruction152): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction153): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction154): + imul rbp, rsp +FN_PREFIX(CryptonightR_instruction155): + add rbp, rsp + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction156): + sub rbp, rsp +FN_PREFIX(CryptonightR_instruction157): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction158): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction159): + xor rbp, rsp +FN_PREFIX(CryptonightR_instruction160): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction161): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction162): + imul rbx, r15 +FN_PREFIX(CryptonightR_instruction163): + add rbx, r15 + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction164): + sub rbx, r15 +FN_PREFIX(CryptonightR_instruction165): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction166): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction167): + xor rbx, r15 +FN_PREFIX(CryptonightR_instruction168): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction169): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction170): + imul rsi, r15 +FN_PREFIX(CryptonightR_instruction171): + add rsi, r15 + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction172): + sub rsi, r15 +FN_PREFIX(CryptonightR_instruction173): + ror esi, cl +FN_PREFIX(CryptonightR_instruction174): + rol esi, cl +FN_PREFIX(CryptonightR_instruction175): + xor rsi, r15 +FN_PREFIX(CryptonightR_instruction176): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction177): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction178): + imul rdi, r15 +FN_PREFIX(CryptonightR_instruction179): + add rdi, r15 + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction180): + sub rdi, r15 +FN_PREFIX(CryptonightR_instruction181): + ror edi, cl +FN_PREFIX(CryptonightR_instruction182): + rol edi, cl +FN_PREFIX(CryptonightR_instruction183): + xor rdi, r15 +FN_PREFIX(CryptonightR_instruction184): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction185): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction186): + imul rbp, r15 +FN_PREFIX(CryptonightR_instruction187): + add rbp, r15 + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction188): + sub rbp, r15 +FN_PREFIX(CryptonightR_instruction189): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction190): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction191): + xor rbp, r15 +FN_PREFIX(CryptonightR_instruction192): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction193): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction194): + imul rbx, rax +FN_PREFIX(CryptonightR_instruction195): + add rbx, rax + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction196): + sub rbx, rax +FN_PREFIX(CryptonightR_instruction197): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction198): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction199): + xor rbx, rax +FN_PREFIX(CryptonightR_instruction200): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction201): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction202): + imul rsi, rax +FN_PREFIX(CryptonightR_instruction203): + add rsi, rax + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction204): + sub rsi, rax +FN_PREFIX(CryptonightR_instruction205): + ror esi, cl +FN_PREFIX(CryptonightR_instruction206): + rol esi, cl +FN_PREFIX(CryptonightR_instruction207): + xor rsi, rax +FN_PREFIX(CryptonightR_instruction208): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction209): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction210): + imul rdi, rax +FN_PREFIX(CryptonightR_instruction211): + add rdi, rax + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction212): + sub rdi, rax +FN_PREFIX(CryptonightR_instruction213): + ror edi, cl +FN_PREFIX(CryptonightR_instruction214): + rol edi, cl +FN_PREFIX(CryptonightR_instruction215): + xor rdi, rax +FN_PREFIX(CryptonightR_instruction216): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction217): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction218): + imul rbp, rax +FN_PREFIX(CryptonightR_instruction219): + add rbp, rax + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction220): + sub rbp, rax +FN_PREFIX(CryptonightR_instruction221): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction222): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction223): + xor rbp, rax +FN_PREFIX(CryptonightR_instruction224): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction225): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction226): + imul rbx, rdx +FN_PREFIX(CryptonightR_instruction227): + add rbx, rdx + add rbx, 2147483647 +FN_PREFIX(CryptonightR_instruction228): + sub rbx, rdx +FN_PREFIX(CryptonightR_instruction229): + ror ebx, cl +FN_PREFIX(CryptonightR_instruction230): + rol ebx, cl +FN_PREFIX(CryptonightR_instruction231): + xor rbx, rdx +FN_PREFIX(CryptonightR_instruction232): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction233): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction234): + imul rsi, rdx +FN_PREFIX(CryptonightR_instruction235): + add rsi, rdx + add rsi, 2147483647 +FN_PREFIX(CryptonightR_instruction236): + sub rsi, rdx +FN_PREFIX(CryptonightR_instruction237): + ror esi, cl +FN_PREFIX(CryptonightR_instruction238): + rol esi, cl +FN_PREFIX(CryptonightR_instruction239): + xor rsi, rdx +FN_PREFIX(CryptonightR_instruction240): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction241): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction242): + imul rdi, rdx +FN_PREFIX(CryptonightR_instruction243): + add rdi, rdx + add rdi, 2147483647 +FN_PREFIX(CryptonightR_instruction244): + sub rdi, rdx +FN_PREFIX(CryptonightR_instruction245): + ror edi, cl +FN_PREFIX(CryptonightR_instruction246): + rol edi, cl +FN_PREFIX(CryptonightR_instruction247): + xor rdi, rdx +FN_PREFIX(CryptonightR_instruction248): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction249): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction250): + imul rbp, rdx +FN_PREFIX(CryptonightR_instruction251): + add rbp, rdx + add rbp, 2147483647 +FN_PREFIX(CryptonightR_instruction252): + sub rbp, rdx +FN_PREFIX(CryptonightR_instruction253): + ror ebp, cl +FN_PREFIX(CryptonightR_instruction254): + rol ebp, cl +FN_PREFIX(CryptonightR_instruction255): + xor rbp, rdx +FN_PREFIX(CryptonightR_instruction256): + imul rbx, rbx +FN_PREFIX(CryptonightR_instruction_mov0): + +FN_PREFIX(CryptonightR_instruction_mov1): + +FN_PREFIX(CryptonightR_instruction_mov2): + +FN_PREFIX(CryptonightR_instruction_mov3): + +FN_PREFIX(CryptonightR_instruction_mov4): + +FN_PREFIX(CryptonightR_instruction_mov5): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov6): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov7): + +FN_PREFIX(CryptonightR_instruction_mov8): + +FN_PREFIX(CryptonightR_instruction_mov9): + +FN_PREFIX(CryptonightR_instruction_mov10): + +FN_PREFIX(CryptonightR_instruction_mov11): + +FN_PREFIX(CryptonightR_instruction_mov12): + +FN_PREFIX(CryptonightR_instruction_mov13): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov14): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov15): + +FN_PREFIX(CryptonightR_instruction_mov16): + +FN_PREFIX(CryptonightR_instruction_mov17): + +FN_PREFIX(CryptonightR_instruction_mov18): + +FN_PREFIX(CryptonightR_instruction_mov19): + +FN_PREFIX(CryptonightR_instruction_mov20): + +FN_PREFIX(CryptonightR_instruction_mov21): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov22): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov23): + +FN_PREFIX(CryptonightR_instruction_mov24): + +FN_PREFIX(CryptonightR_instruction_mov25): + +FN_PREFIX(CryptonightR_instruction_mov26): + +FN_PREFIX(CryptonightR_instruction_mov27): + +FN_PREFIX(CryptonightR_instruction_mov28): + +FN_PREFIX(CryptonightR_instruction_mov29): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov30): + mov rcx, rbx +FN_PREFIX(CryptonightR_instruction_mov31): + +FN_PREFIX(CryptonightR_instruction_mov32): + +FN_PREFIX(CryptonightR_instruction_mov33): + +FN_PREFIX(CryptonightR_instruction_mov34): + +FN_PREFIX(CryptonightR_instruction_mov35): + +FN_PREFIX(CryptonightR_instruction_mov36): + +FN_PREFIX(CryptonightR_instruction_mov37): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov38): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov39): + +FN_PREFIX(CryptonightR_instruction_mov40): + +FN_PREFIX(CryptonightR_instruction_mov41): + +FN_PREFIX(CryptonightR_instruction_mov42): + +FN_PREFIX(CryptonightR_instruction_mov43): + +FN_PREFIX(CryptonightR_instruction_mov44): + +FN_PREFIX(CryptonightR_instruction_mov45): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov46): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov47): + +FN_PREFIX(CryptonightR_instruction_mov48): + +FN_PREFIX(CryptonightR_instruction_mov49): + +FN_PREFIX(CryptonightR_instruction_mov50): + +FN_PREFIX(CryptonightR_instruction_mov51): + +FN_PREFIX(CryptonightR_instruction_mov52): + +FN_PREFIX(CryptonightR_instruction_mov53): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov54): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov55): + +FN_PREFIX(CryptonightR_instruction_mov56): + +FN_PREFIX(CryptonightR_instruction_mov57): + +FN_PREFIX(CryptonightR_instruction_mov58): + +FN_PREFIX(CryptonightR_instruction_mov59): + +FN_PREFIX(CryptonightR_instruction_mov60): + +FN_PREFIX(CryptonightR_instruction_mov61): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov62): + mov rcx, rsi +FN_PREFIX(CryptonightR_instruction_mov63): + +FN_PREFIX(CryptonightR_instruction_mov64): + +FN_PREFIX(CryptonightR_instruction_mov65): + +FN_PREFIX(CryptonightR_instruction_mov66): + +FN_PREFIX(CryptonightR_instruction_mov67): + +FN_PREFIX(CryptonightR_instruction_mov68): + +FN_PREFIX(CryptonightR_instruction_mov69): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov70): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov71): + +FN_PREFIX(CryptonightR_instruction_mov72): + +FN_PREFIX(CryptonightR_instruction_mov73): + +FN_PREFIX(CryptonightR_instruction_mov74): + +FN_PREFIX(CryptonightR_instruction_mov75): + +FN_PREFIX(CryptonightR_instruction_mov76): + +FN_PREFIX(CryptonightR_instruction_mov77): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov78): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov79): + +FN_PREFIX(CryptonightR_instruction_mov80): + +FN_PREFIX(CryptonightR_instruction_mov81): + +FN_PREFIX(CryptonightR_instruction_mov82): + +FN_PREFIX(CryptonightR_instruction_mov83): + +FN_PREFIX(CryptonightR_instruction_mov84): + +FN_PREFIX(CryptonightR_instruction_mov85): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov86): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov87): + +FN_PREFIX(CryptonightR_instruction_mov88): + +FN_PREFIX(CryptonightR_instruction_mov89): + +FN_PREFIX(CryptonightR_instruction_mov90): + +FN_PREFIX(CryptonightR_instruction_mov91): + +FN_PREFIX(CryptonightR_instruction_mov92): + +FN_PREFIX(CryptonightR_instruction_mov93): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov94): + mov rcx, rdi +FN_PREFIX(CryptonightR_instruction_mov95): + +FN_PREFIX(CryptonightR_instruction_mov96): + +FN_PREFIX(CryptonightR_instruction_mov97): + +FN_PREFIX(CryptonightR_instruction_mov98): + +FN_PREFIX(CryptonightR_instruction_mov99): + +FN_PREFIX(CryptonightR_instruction_mov100): + +FN_PREFIX(CryptonightR_instruction_mov101): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov102): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov103): + +FN_PREFIX(CryptonightR_instruction_mov104): + +FN_PREFIX(CryptonightR_instruction_mov105): + +FN_PREFIX(CryptonightR_instruction_mov106): + +FN_PREFIX(CryptonightR_instruction_mov107): + +FN_PREFIX(CryptonightR_instruction_mov108): + +FN_PREFIX(CryptonightR_instruction_mov109): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov110): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov111): + +FN_PREFIX(CryptonightR_instruction_mov112): + +FN_PREFIX(CryptonightR_instruction_mov113): + +FN_PREFIX(CryptonightR_instruction_mov114): + +FN_PREFIX(CryptonightR_instruction_mov115): + +FN_PREFIX(CryptonightR_instruction_mov116): + +FN_PREFIX(CryptonightR_instruction_mov117): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov118): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov119): + +FN_PREFIX(CryptonightR_instruction_mov120): + +FN_PREFIX(CryptonightR_instruction_mov121): + +FN_PREFIX(CryptonightR_instruction_mov122): + +FN_PREFIX(CryptonightR_instruction_mov123): + +FN_PREFIX(CryptonightR_instruction_mov124): + +FN_PREFIX(CryptonightR_instruction_mov125): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov126): + mov rcx, rbp +FN_PREFIX(CryptonightR_instruction_mov127): + +FN_PREFIX(CryptonightR_instruction_mov128): + +FN_PREFIX(CryptonightR_instruction_mov129): + +FN_PREFIX(CryptonightR_instruction_mov130): + +FN_PREFIX(CryptonightR_instruction_mov131): + +FN_PREFIX(CryptonightR_instruction_mov132): + +FN_PREFIX(CryptonightR_instruction_mov133): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov134): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov135): + +FN_PREFIX(CryptonightR_instruction_mov136): + +FN_PREFIX(CryptonightR_instruction_mov137): + +FN_PREFIX(CryptonightR_instruction_mov138): + +FN_PREFIX(CryptonightR_instruction_mov139): + +FN_PREFIX(CryptonightR_instruction_mov140): + +FN_PREFIX(CryptonightR_instruction_mov141): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov142): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov143): + +FN_PREFIX(CryptonightR_instruction_mov144): + +FN_PREFIX(CryptonightR_instruction_mov145): + +FN_PREFIX(CryptonightR_instruction_mov146): + +FN_PREFIX(CryptonightR_instruction_mov147): + +FN_PREFIX(CryptonightR_instruction_mov148): + +FN_PREFIX(CryptonightR_instruction_mov149): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov150): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov151): + +FN_PREFIX(CryptonightR_instruction_mov152): + +FN_PREFIX(CryptonightR_instruction_mov153): + +FN_PREFIX(CryptonightR_instruction_mov154): + +FN_PREFIX(CryptonightR_instruction_mov155): + +FN_PREFIX(CryptonightR_instruction_mov156): + +FN_PREFIX(CryptonightR_instruction_mov157): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov158): + mov rcx, rsp +FN_PREFIX(CryptonightR_instruction_mov159): + +FN_PREFIX(CryptonightR_instruction_mov160): + +FN_PREFIX(CryptonightR_instruction_mov161): + +FN_PREFIX(CryptonightR_instruction_mov162): + +FN_PREFIX(CryptonightR_instruction_mov163): + +FN_PREFIX(CryptonightR_instruction_mov164): + +FN_PREFIX(CryptonightR_instruction_mov165): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov166): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov167): + +FN_PREFIX(CryptonightR_instruction_mov168): + +FN_PREFIX(CryptonightR_instruction_mov169): + +FN_PREFIX(CryptonightR_instruction_mov170): + +FN_PREFIX(CryptonightR_instruction_mov171): + +FN_PREFIX(CryptonightR_instruction_mov172): + +FN_PREFIX(CryptonightR_instruction_mov173): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov174): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov175): + +FN_PREFIX(CryptonightR_instruction_mov176): + +FN_PREFIX(CryptonightR_instruction_mov177): + +FN_PREFIX(CryptonightR_instruction_mov178): + +FN_PREFIX(CryptonightR_instruction_mov179): + +FN_PREFIX(CryptonightR_instruction_mov180): + +FN_PREFIX(CryptonightR_instruction_mov181): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov182): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov183): + +FN_PREFIX(CryptonightR_instruction_mov184): + +FN_PREFIX(CryptonightR_instruction_mov185): + +FN_PREFIX(CryptonightR_instruction_mov186): + +FN_PREFIX(CryptonightR_instruction_mov187): + +FN_PREFIX(CryptonightR_instruction_mov188): + +FN_PREFIX(CryptonightR_instruction_mov189): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov190): + mov rcx, r15 +FN_PREFIX(CryptonightR_instruction_mov191): + +FN_PREFIX(CryptonightR_instruction_mov192): + +FN_PREFIX(CryptonightR_instruction_mov193): + +FN_PREFIX(CryptonightR_instruction_mov194): + +FN_PREFIX(CryptonightR_instruction_mov195): + +FN_PREFIX(CryptonightR_instruction_mov196): + +FN_PREFIX(CryptonightR_instruction_mov197): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov198): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov199): + +FN_PREFIX(CryptonightR_instruction_mov200): + +FN_PREFIX(CryptonightR_instruction_mov201): + +FN_PREFIX(CryptonightR_instruction_mov202): + +FN_PREFIX(CryptonightR_instruction_mov203): + +FN_PREFIX(CryptonightR_instruction_mov204): + +FN_PREFIX(CryptonightR_instruction_mov205): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov206): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov207): + +FN_PREFIX(CryptonightR_instruction_mov208): + +FN_PREFIX(CryptonightR_instruction_mov209): + +FN_PREFIX(CryptonightR_instruction_mov210): + +FN_PREFIX(CryptonightR_instruction_mov211): + +FN_PREFIX(CryptonightR_instruction_mov212): + +FN_PREFIX(CryptonightR_instruction_mov213): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov214): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov215): + +FN_PREFIX(CryptonightR_instruction_mov216): + +FN_PREFIX(CryptonightR_instruction_mov217): + +FN_PREFIX(CryptonightR_instruction_mov218): + +FN_PREFIX(CryptonightR_instruction_mov219): + +FN_PREFIX(CryptonightR_instruction_mov220): + +FN_PREFIX(CryptonightR_instruction_mov221): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov222): + mov rcx, rax +FN_PREFIX(CryptonightR_instruction_mov223): + +FN_PREFIX(CryptonightR_instruction_mov224): + +FN_PREFIX(CryptonightR_instruction_mov225): + +FN_PREFIX(CryptonightR_instruction_mov226): + +FN_PREFIX(CryptonightR_instruction_mov227): + +FN_PREFIX(CryptonightR_instruction_mov228): + +FN_PREFIX(CryptonightR_instruction_mov229): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov230): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov231): + +FN_PREFIX(CryptonightR_instruction_mov232): + +FN_PREFIX(CryptonightR_instruction_mov233): + +FN_PREFIX(CryptonightR_instruction_mov234): + +FN_PREFIX(CryptonightR_instruction_mov235): + +FN_PREFIX(CryptonightR_instruction_mov236): + +FN_PREFIX(CryptonightR_instruction_mov237): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov238): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov239): + +FN_PREFIX(CryptonightR_instruction_mov240): + +FN_PREFIX(CryptonightR_instruction_mov241): + +FN_PREFIX(CryptonightR_instruction_mov242): + +FN_PREFIX(CryptonightR_instruction_mov243): + +FN_PREFIX(CryptonightR_instruction_mov244): + +FN_PREFIX(CryptonightR_instruction_mov245): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov246): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov247): + +FN_PREFIX(CryptonightR_instruction_mov248): + +FN_PREFIX(CryptonightR_instruction_mov249): + +FN_PREFIX(CryptonightR_instruction_mov250): + +FN_PREFIX(CryptonightR_instruction_mov251): + +FN_PREFIX(CryptonightR_instruction_mov252): + +FN_PREFIX(CryptonightR_instruction_mov253): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov254): + mov rcx, rdx +FN_PREFIX(CryptonightR_instruction_mov255): + +FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/src/crypto/asm/win64/CryptonightR_template.asm b/src/crypto/asm/win64/CryptonightR_template.asm new file mode 100644 index 00000000..ec8ad5af --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_template.asm @@ -0,0 +1,1582 @@ +; Auto-generated file, do not edit + +_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE +PUBLIC CryptonightR_instruction0 +PUBLIC CryptonightR_instruction1 +PUBLIC CryptonightR_instruction2 +PUBLIC CryptonightR_instruction3 +PUBLIC CryptonightR_instruction4 +PUBLIC CryptonightR_instruction5 +PUBLIC CryptonightR_instruction6 +PUBLIC CryptonightR_instruction7 +PUBLIC CryptonightR_instruction8 +PUBLIC CryptonightR_instruction9 +PUBLIC CryptonightR_instruction10 +PUBLIC CryptonightR_instruction11 +PUBLIC CryptonightR_instruction12 +PUBLIC CryptonightR_instruction13 +PUBLIC CryptonightR_instruction14 +PUBLIC CryptonightR_instruction15 +PUBLIC CryptonightR_instruction16 +PUBLIC CryptonightR_instruction17 +PUBLIC CryptonightR_instruction18 +PUBLIC CryptonightR_instruction19 +PUBLIC CryptonightR_instruction20 +PUBLIC CryptonightR_instruction21 +PUBLIC CryptonightR_instruction22 +PUBLIC CryptonightR_instruction23 +PUBLIC CryptonightR_instruction24 +PUBLIC CryptonightR_instruction25 +PUBLIC CryptonightR_instruction26 +PUBLIC CryptonightR_instruction27 +PUBLIC CryptonightR_instruction28 +PUBLIC CryptonightR_instruction29 +PUBLIC CryptonightR_instruction30 +PUBLIC CryptonightR_instruction31 +PUBLIC CryptonightR_instruction32 +PUBLIC CryptonightR_instruction33 +PUBLIC CryptonightR_instruction34 +PUBLIC CryptonightR_instruction35 +PUBLIC CryptonightR_instruction36 +PUBLIC CryptonightR_instruction37 +PUBLIC CryptonightR_instruction38 +PUBLIC CryptonightR_instruction39 +PUBLIC CryptonightR_instruction40 +PUBLIC CryptonightR_instruction41 +PUBLIC CryptonightR_instruction42 +PUBLIC CryptonightR_instruction43 +PUBLIC CryptonightR_instruction44 +PUBLIC CryptonightR_instruction45 +PUBLIC CryptonightR_instruction46 +PUBLIC CryptonightR_instruction47 +PUBLIC CryptonightR_instruction48 +PUBLIC CryptonightR_instruction49 +PUBLIC CryptonightR_instruction50 +PUBLIC CryptonightR_instruction51 +PUBLIC CryptonightR_instruction52 +PUBLIC CryptonightR_instruction53 +PUBLIC CryptonightR_instruction54 +PUBLIC CryptonightR_instruction55 +PUBLIC CryptonightR_instruction56 +PUBLIC CryptonightR_instruction57 +PUBLIC CryptonightR_instruction58 +PUBLIC CryptonightR_instruction59 +PUBLIC CryptonightR_instruction60 +PUBLIC CryptonightR_instruction61 +PUBLIC CryptonightR_instruction62 +PUBLIC CryptonightR_instruction63 +PUBLIC CryptonightR_instruction64 +PUBLIC CryptonightR_instruction65 +PUBLIC CryptonightR_instruction66 +PUBLIC CryptonightR_instruction67 +PUBLIC CryptonightR_instruction68 +PUBLIC CryptonightR_instruction69 +PUBLIC CryptonightR_instruction70 +PUBLIC CryptonightR_instruction71 +PUBLIC CryptonightR_instruction72 +PUBLIC CryptonightR_instruction73 +PUBLIC CryptonightR_instruction74 +PUBLIC CryptonightR_instruction75 +PUBLIC CryptonightR_instruction76 +PUBLIC CryptonightR_instruction77 +PUBLIC CryptonightR_instruction78 +PUBLIC CryptonightR_instruction79 +PUBLIC CryptonightR_instruction80 +PUBLIC CryptonightR_instruction81 +PUBLIC CryptonightR_instruction82 +PUBLIC CryptonightR_instruction83 +PUBLIC CryptonightR_instruction84 +PUBLIC CryptonightR_instruction85 +PUBLIC CryptonightR_instruction86 +PUBLIC CryptonightR_instruction87 +PUBLIC CryptonightR_instruction88 +PUBLIC CryptonightR_instruction89 +PUBLIC CryptonightR_instruction90 +PUBLIC CryptonightR_instruction91 +PUBLIC CryptonightR_instruction92 +PUBLIC CryptonightR_instruction93 +PUBLIC CryptonightR_instruction94 +PUBLIC CryptonightR_instruction95 +PUBLIC CryptonightR_instruction96 +PUBLIC CryptonightR_instruction97 +PUBLIC CryptonightR_instruction98 +PUBLIC CryptonightR_instruction99 +PUBLIC CryptonightR_instruction100 +PUBLIC CryptonightR_instruction101 +PUBLIC CryptonightR_instruction102 +PUBLIC CryptonightR_instruction103 +PUBLIC CryptonightR_instruction104 +PUBLIC CryptonightR_instruction105 +PUBLIC CryptonightR_instruction106 +PUBLIC CryptonightR_instruction107 +PUBLIC CryptonightR_instruction108 +PUBLIC CryptonightR_instruction109 +PUBLIC CryptonightR_instruction110 +PUBLIC CryptonightR_instruction111 +PUBLIC CryptonightR_instruction112 +PUBLIC CryptonightR_instruction113 +PUBLIC CryptonightR_instruction114 +PUBLIC CryptonightR_instruction115 +PUBLIC CryptonightR_instruction116 +PUBLIC CryptonightR_instruction117 +PUBLIC CryptonightR_instruction118 +PUBLIC CryptonightR_instruction119 +PUBLIC CryptonightR_instruction120 +PUBLIC CryptonightR_instruction121 +PUBLIC CryptonightR_instruction122 +PUBLIC CryptonightR_instruction123 +PUBLIC CryptonightR_instruction124 +PUBLIC CryptonightR_instruction125 +PUBLIC CryptonightR_instruction126 +PUBLIC CryptonightR_instruction127 +PUBLIC CryptonightR_instruction128 +PUBLIC CryptonightR_instruction129 +PUBLIC CryptonightR_instruction130 +PUBLIC CryptonightR_instruction131 +PUBLIC CryptonightR_instruction132 +PUBLIC CryptonightR_instruction133 +PUBLIC CryptonightR_instruction134 +PUBLIC CryptonightR_instruction135 +PUBLIC CryptonightR_instruction136 +PUBLIC CryptonightR_instruction137 +PUBLIC CryptonightR_instruction138 +PUBLIC CryptonightR_instruction139 +PUBLIC CryptonightR_instruction140 +PUBLIC CryptonightR_instruction141 +PUBLIC CryptonightR_instruction142 +PUBLIC CryptonightR_instruction143 +PUBLIC CryptonightR_instruction144 +PUBLIC CryptonightR_instruction145 +PUBLIC CryptonightR_instruction146 +PUBLIC CryptonightR_instruction147 +PUBLIC CryptonightR_instruction148 +PUBLIC CryptonightR_instruction149 +PUBLIC CryptonightR_instruction150 +PUBLIC CryptonightR_instruction151 +PUBLIC CryptonightR_instruction152 +PUBLIC CryptonightR_instruction153 +PUBLIC CryptonightR_instruction154 +PUBLIC CryptonightR_instruction155 +PUBLIC CryptonightR_instruction156 +PUBLIC CryptonightR_instruction157 +PUBLIC CryptonightR_instruction158 +PUBLIC CryptonightR_instruction159 +PUBLIC CryptonightR_instruction160 +PUBLIC CryptonightR_instruction161 +PUBLIC CryptonightR_instruction162 +PUBLIC CryptonightR_instruction163 +PUBLIC CryptonightR_instruction164 +PUBLIC CryptonightR_instruction165 +PUBLIC CryptonightR_instruction166 +PUBLIC CryptonightR_instruction167 +PUBLIC CryptonightR_instruction168 +PUBLIC CryptonightR_instruction169 +PUBLIC CryptonightR_instruction170 +PUBLIC CryptonightR_instruction171 +PUBLIC CryptonightR_instruction172 +PUBLIC CryptonightR_instruction173 +PUBLIC CryptonightR_instruction174 +PUBLIC CryptonightR_instruction175 +PUBLIC CryptonightR_instruction176 +PUBLIC CryptonightR_instruction177 +PUBLIC CryptonightR_instruction178 +PUBLIC CryptonightR_instruction179 +PUBLIC CryptonightR_instruction180 +PUBLIC CryptonightR_instruction181 +PUBLIC CryptonightR_instruction182 +PUBLIC CryptonightR_instruction183 +PUBLIC CryptonightR_instruction184 +PUBLIC CryptonightR_instruction185 +PUBLIC CryptonightR_instruction186 +PUBLIC CryptonightR_instruction187 +PUBLIC CryptonightR_instruction188 +PUBLIC CryptonightR_instruction189 +PUBLIC CryptonightR_instruction190 +PUBLIC CryptonightR_instruction191 +PUBLIC CryptonightR_instruction192 +PUBLIC CryptonightR_instruction193 +PUBLIC CryptonightR_instruction194 +PUBLIC CryptonightR_instruction195 +PUBLIC CryptonightR_instruction196 +PUBLIC CryptonightR_instruction197 +PUBLIC CryptonightR_instruction198 +PUBLIC CryptonightR_instruction199 +PUBLIC CryptonightR_instruction200 +PUBLIC CryptonightR_instruction201 +PUBLIC CryptonightR_instruction202 +PUBLIC CryptonightR_instruction203 +PUBLIC CryptonightR_instruction204 +PUBLIC CryptonightR_instruction205 +PUBLIC CryptonightR_instruction206 +PUBLIC CryptonightR_instruction207 +PUBLIC CryptonightR_instruction208 +PUBLIC CryptonightR_instruction209 +PUBLIC CryptonightR_instruction210 +PUBLIC CryptonightR_instruction211 +PUBLIC CryptonightR_instruction212 +PUBLIC CryptonightR_instruction213 +PUBLIC CryptonightR_instruction214 +PUBLIC CryptonightR_instruction215 +PUBLIC CryptonightR_instruction216 +PUBLIC CryptonightR_instruction217 +PUBLIC CryptonightR_instruction218 +PUBLIC CryptonightR_instruction219 +PUBLIC CryptonightR_instruction220 +PUBLIC CryptonightR_instruction221 +PUBLIC CryptonightR_instruction222 +PUBLIC CryptonightR_instruction223 +PUBLIC CryptonightR_instruction224 +PUBLIC CryptonightR_instruction225 +PUBLIC CryptonightR_instruction226 +PUBLIC CryptonightR_instruction227 +PUBLIC CryptonightR_instruction228 +PUBLIC CryptonightR_instruction229 +PUBLIC CryptonightR_instruction230 +PUBLIC CryptonightR_instruction231 +PUBLIC CryptonightR_instruction232 +PUBLIC CryptonightR_instruction233 +PUBLIC CryptonightR_instruction234 +PUBLIC CryptonightR_instruction235 +PUBLIC CryptonightR_instruction236 +PUBLIC CryptonightR_instruction237 +PUBLIC CryptonightR_instruction238 +PUBLIC CryptonightR_instruction239 +PUBLIC CryptonightR_instruction240 +PUBLIC CryptonightR_instruction241 +PUBLIC CryptonightR_instruction242 +PUBLIC CryptonightR_instruction243 +PUBLIC CryptonightR_instruction244 +PUBLIC CryptonightR_instruction245 +PUBLIC CryptonightR_instruction246 +PUBLIC CryptonightR_instruction247 +PUBLIC CryptonightR_instruction248 +PUBLIC CryptonightR_instruction249 +PUBLIC CryptonightR_instruction250 +PUBLIC CryptonightR_instruction251 +PUBLIC CryptonightR_instruction252 +PUBLIC CryptonightR_instruction253 +PUBLIC CryptonightR_instruction254 +PUBLIC CryptonightR_instruction255 +PUBLIC CryptonightR_instruction256 +PUBLIC CryptonightR_instruction_mov0 +PUBLIC CryptonightR_instruction_mov1 +PUBLIC CryptonightR_instruction_mov2 +PUBLIC CryptonightR_instruction_mov3 +PUBLIC CryptonightR_instruction_mov4 +PUBLIC CryptonightR_instruction_mov5 +PUBLIC CryptonightR_instruction_mov6 +PUBLIC CryptonightR_instruction_mov7 +PUBLIC CryptonightR_instruction_mov8 +PUBLIC CryptonightR_instruction_mov9 +PUBLIC CryptonightR_instruction_mov10 +PUBLIC CryptonightR_instruction_mov11 +PUBLIC CryptonightR_instruction_mov12 +PUBLIC CryptonightR_instruction_mov13 +PUBLIC CryptonightR_instruction_mov14 +PUBLIC CryptonightR_instruction_mov15 +PUBLIC CryptonightR_instruction_mov16 +PUBLIC CryptonightR_instruction_mov17 +PUBLIC CryptonightR_instruction_mov18 +PUBLIC CryptonightR_instruction_mov19 +PUBLIC CryptonightR_instruction_mov20 +PUBLIC CryptonightR_instruction_mov21 +PUBLIC CryptonightR_instruction_mov22 +PUBLIC CryptonightR_instruction_mov23 +PUBLIC CryptonightR_instruction_mov24 +PUBLIC CryptonightR_instruction_mov25 +PUBLIC CryptonightR_instruction_mov26 +PUBLIC CryptonightR_instruction_mov27 +PUBLIC CryptonightR_instruction_mov28 +PUBLIC CryptonightR_instruction_mov29 +PUBLIC CryptonightR_instruction_mov30 +PUBLIC CryptonightR_instruction_mov31 +PUBLIC CryptonightR_instruction_mov32 +PUBLIC CryptonightR_instruction_mov33 +PUBLIC CryptonightR_instruction_mov34 +PUBLIC CryptonightR_instruction_mov35 +PUBLIC CryptonightR_instruction_mov36 +PUBLIC CryptonightR_instruction_mov37 +PUBLIC CryptonightR_instruction_mov38 +PUBLIC CryptonightR_instruction_mov39 +PUBLIC CryptonightR_instruction_mov40 +PUBLIC CryptonightR_instruction_mov41 +PUBLIC CryptonightR_instruction_mov42 +PUBLIC CryptonightR_instruction_mov43 +PUBLIC CryptonightR_instruction_mov44 +PUBLIC CryptonightR_instruction_mov45 +PUBLIC CryptonightR_instruction_mov46 +PUBLIC CryptonightR_instruction_mov47 +PUBLIC CryptonightR_instruction_mov48 +PUBLIC CryptonightR_instruction_mov49 +PUBLIC CryptonightR_instruction_mov50 +PUBLIC CryptonightR_instruction_mov51 +PUBLIC CryptonightR_instruction_mov52 +PUBLIC CryptonightR_instruction_mov53 +PUBLIC CryptonightR_instruction_mov54 +PUBLIC CryptonightR_instruction_mov55 +PUBLIC CryptonightR_instruction_mov56 +PUBLIC CryptonightR_instruction_mov57 +PUBLIC CryptonightR_instruction_mov58 +PUBLIC CryptonightR_instruction_mov59 +PUBLIC CryptonightR_instruction_mov60 +PUBLIC CryptonightR_instruction_mov61 +PUBLIC CryptonightR_instruction_mov62 +PUBLIC CryptonightR_instruction_mov63 +PUBLIC CryptonightR_instruction_mov64 +PUBLIC CryptonightR_instruction_mov65 +PUBLIC CryptonightR_instruction_mov66 +PUBLIC CryptonightR_instruction_mov67 +PUBLIC CryptonightR_instruction_mov68 +PUBLIC CryptonightR_instruction_mov69 +PUBLIC CryptonightR_instruction_mov70 +PUBLIC CryptonightR_instruction_mov71 +PUBLIC CryptonightR_instruction_mov72 +PUBLIC CryptonightR_instruction_mov73 +PUBLIC CryptonightR_instruction_mov74 +PUBLIC CryptonightR_instruction_mov75 +PUBLIC CryptonightR_instruction_mov76 +PUBLIC CryptonightR_instruction_mov77 +PUBLIC CryptonightR_instruction_mov78 +PUBLIC CryptonightR_instruction_mov79 +PUBLIC CryptonightR_instruction_mov80 +PUBLIC CryptonightR_instruction_mov81 +PUBLIC CryptonightR_instruction_mov82 +PUBLIC CryptonightR_instruction_mov83 +PUBLIC CryptonightR_instruction_mov84 +PUBLIC CryptonightR_instruction_mov85 +PUBLIC CryptonightR_instruction_mov86 +PUBLIC CryptonightR_instruction_mov87 +PUBLIC CryptonightR_instruction_mov88 +PUBLIC CryptonightR_instruction_mov89 +PUBLIC CryptonightR_instruction_mov90 +PUBLIC CryptonightR_instruction_mov91 +PUBLIC CryptonightR_instruction_mov92 +PUBLIC CryptonightR_instruction_mov93 +PUBLIC CryptonightR_instruction_mov94 +PUBLIC CryptonightR_instruction_mov95 +PUBLIC CryptonightR_instruction_mov96 +PUBLIC CryptonightR_instruction_mov97 +PUBLIC CryptonightR_instruction_mov98 +PUBLIC CryptonightR_instruction_mov99 +PUBLIC CryptonightR_instruction_mov100 +PUBLIC CryptonightR_instruction_mov101 +PUBLIC CryptonightR_instruction_mov102 +PUBLIC CryptonightR_instruction_mov103 +PUBLIC CryptonightR_instruction_mov104 +PUBLIC CryptonightR_instruction_mov105 +PUBLIC CryptonightR_instruction_mov106 +PUBLIC CryptonightR_instruction_mov107 +PUBLIC CryptonightR_instruction_mov108 +PUBLIC CryptonightR_instruction_mov109 +PUBLIC CryptonightR_instruction_mov110 +PUBLIC CryptonightR_instruction_mov111 +PUBLIC CryptonightR_instruction_mov112 +PUBLIC CryptonightR_instruction_mov113 +PUBLIC CryptonightR_instruction_mov114 +PUBLIC CryptonightR_instruction_mov115 +PUBLIC CryptonightR_instruction_mov116 +PUBLIC CryptonightR_instruction_mov117 +PUBLIC CryptonightR_instruction_mov118 +PUBLIC CryptonightR_instruction_mov119 +PUBLIC CryptonightR_instruction_mov120 +PUBLIC CryptonightR_instruction_mov121 +PUBLIC CryptonightR_instruction_mov122 +PUBLIC CryptonightR_instruction_mov123 +PUBLIC CryptonightR_instruction_mov124 +PUBLIC CryptonightR_instruction_mov125 +PUBLIC CryptonightR_instruction_mov126 +PUBLIC CryptonightR_instruction_mov127 +PUBLIC CryptonightR_instruction_mov128 +PUBLIC CryptonightR_instruction_mov129 +PUBLIC CryptonightR_instruction_mov130 +PUBLIC CryptonightR_instruction_mov131 +PUBLIC CryptonightR_instruction_mov132 +PUBLIC CryptonightR_instruction_mov133 +PUBLIC CryptonightR_instruction_mov134 +PUBLIC CryptonightR_instruction_mov135 +PUBLIC CryptonightR_instruction_mov136 +PUBLIC CryptonightR_instruction_mov137 +PUBLIC CryptonightR_instruction_mov138 +PUBLIC CryptonightR_instruction_mov139 +PUBLIC CryptonightR_instruction_mov140 +PUBLIC CryptonightR_instruction_mov141 +PUBLIC CryptonightR_instruction_mov142 +PUBLIC CryptonightR_instruction_mov143 +PUBLIC CryptonightR_instruction_mov144 +PUBLIC CryptonightR_instruction_mov145 +PUBLIC CryptonightR_instruction_mov146 +PUBLIC CryptonightR_instruction_mov147 +PUBLIC CryptonightR_instruction_mov148 +PUBLIC CryptonightR_instruction_mov149 +PUBLIC CryptonightR_instruction_mov150 +PUBLIC CryptonightR_instruction_mov151 +PUBLIC CryptonightR_instruction_mov152 +PUBLIC CryptonightR_instruction_mov153 +PUBLIC CryptonightR_instruction_mov154 +PUBLIC CryptonightR_instruction_mov155 +PUBLIC CryptonightR_instruction_mov156 +PUBLIC CryptonightR_instruction_mov157 +PUBLIC CryptonightR_instruction_mov158 +PUBLIC CryptonightR_instruction_mov159 +PUBLIC CryptonightR_instruction_mov160 +PUBLIC CryptonightR_instruction_mov161 +PUBLIC CryptonightR_instruction_mov162 +PUBLIC CryptonightR_instruction_mov163 +PUBLIC CryptonightR_instruction_mov164 +PUBLIC CryptonightR_instruction_mov165 +PUBLIC CryptonightR_instruction_mov166 +PUBLIC CryptonightR_instruction_mov167 +PUBLIC CryptonightR_instruction_mov168 +PUBLIC CryptonightR_instruction_mov169 +PUBLIC CryptonightR_instruction_mov170 +PUBLIC CryptonightR_instruction_mov171 +PUBLIC CryptonightR_instruction_mov172 +PUBLIC CryptonightR_instruction_mov173 +PUBLIC CryptonightR_instruction_mov174 +PUBLIC CryptonightR_instruction_mov175 +PUBLIC CryptonightR_instruction_mov176 +PUBLIC CryptonightR_instruction_mov177 +PUBLIC CryptonightR_instruction_mov178 +PUBLIC CryptonightR_instruction_mov179 +PUBLIC CryptonightR_instruction_mov180 +PUBLIC CryptonightR_instruction_mov181 +PUBLIC CryptonightR_instruction_mov182 +PUBLIC CryptonightR_instruction_mov183 +PUBLIC CryptonightR_instruction_mov184 +PUBLIC CryptonightR_instruction_mov185 +PUBLIC CryptonightR_instruction_mov186 +PUBLIC CryptonightR_instruction_mov187 +PUBLIC CryptonightR_instruction_mov188 +PUBLIC CryptonightR_instruction_mov189 +PUBLIC CryptonightR_instruction_mov190 +PUBLIC CryptonightR_instruction_mov191 +PUBLIC CryptonightR_instruction_mov192 +PUBLIC CryptonightR_instruction_mov193 +PUBLIC CryptonightR_instruction_mov194 +PUBLIC CryptonightR_instruction_mov195 +PUBLIC CryptonightR_instruction_mov196 +PUBLIC CryptonightR_instruction_mov197 +PUBLIC CryptonightR_instruction_mov198 +PUBLIC CryptonightR_instruction_mov199 +PUBLIC CryptonightR_instruction_mov200 +PUBLIC CryptonightR_instruction_mov201 +PUBLIC CryptonightR_instruction_mov202 +PUBLIC CryptonightR_instruction_mov203 +PUBLIC CryptonightR_instruction_mov204 +PUBLIC CryptonightR_instruction_mov205 +PUBLIC CryptonightR_instruction_mov206 +PUBLIC CryptonightR_instruction_mov207 +PUBLIC CryptonightR_instruction_mov208 +PUBLIC CryptonightR_instruction_mov209 +PUBLIC CryptonightR_instruction_mov210 +PUBLIC CryptonightR_instruction_mov211 +PUBLIC CryptonightR_instruction_mov212 +PUBLIC CryptonightR_instruction_mov213 +PUBLIC CryptonightR_instruction_mov214 +PUBLIC CryptonightR_instruction_mov215 +PUBLIC CryptonightR_instruction_mov216 +PUBLIC CryptonightR_instruction_mov217 +PUBLIC CryptonightR_instruction_mov218 +PUBLIC CryptonightR_instruction_mov219 +PUBLIC CryptonightR_instruction_mov220 +PUBLIC CryptonightR_instruction_mov221 +PUBLIC CryptonightR_instruction_mov222 +PUBLIC CryptonightR_instruction_mov223 +PUBLIC CryptonightR_instruction_mov224 +PUBLIC CryptonightR_instruction_mov225 +PUBLIC CryptonightR_instruction_mov226 +PUBLIC CryptonightR_instruction_mov227 +PUBLIC CryptonightR_instruction_mov228 +PUBLIC CryptonightR_instruction_mov229 +PUBLIC CryptonightR_instruction_mov230 +PUBLIC CryptonightR_instruction_mov231 +PUBLIC CryptonightR_instruction_mov232 +PUBLIC CryptonightR_instruction_mov233 +PUBLIC CryptonightR_instruction_mov234 +PUBLIC CryptonightR_instruction_mov235 +PUBLIC CryptonightR_instruction_mov236 +PUBLIC CryptonightR_instruction_mov237 +PUBLIC CryptonightR_instruction_mov238 +PUBLIC CryptonightR_instruction_mov239 +PUBLIC CryptonightR_instruction_mov240 +PUBLIC CryptonightR_instruction_mov241 +PUBLIC CryptonightR_instruction_mov242 +PUBLIC CryptonightR_instruction_mov243 +PUBLIC CryptonightR_instruction_mov244 +PUBLIC CryptonightR_instruction_mov245 +PUBLIC CryptonightR_instruction_mov246 +PUBLIC CryptonightR_instruction_mov247 +PUBLIC CryptonightR_instruction_mov248 +PUBLIC CryptonightR_instruction_mov249 +PUBLIC CryptonightR_instruction_mov250 +PUBLIC CryptonightR_instruction_mov251 +PUBLIC CryptonightR_instruction_mov252 +PUBLIC CryptonightR_instruction_mov253 +PUBLIC CryptonightR_instruction_mov254 +PUBLIC CryptonightR_instruction_mov255 +PUBLIC CryptonightR_instruction_mov256 + +INCLUDE CryptonightR_template_win.inc + +CryptonightR_instruction0: + imul rbx, rbx +CryptonightR_instruction1: + imul rbx, rbx +CryptonightR_instruction2: + imul rbx, rbx +CryptonightR_instruction3: + add rbx, rbx + add rbx, 2147483647 +CryptonightR_instruction4: + sub rbx, rbx +CryptonightR_instruction5: + ror ebx, cl +CryptonightR_instruction6: + rol ebx, cl +CryptonightR_instruction7: + xor rbx, rbx +CryptonightR_instruction8: + imul rsi, rbx +CryptonightR_instruction9: + imul rsi, rbx +CryptonightR_instruction10: + imul rsi, rbx +CryptonightR_instruction11: + add rsi, rbx + add rsi, 2147483647 +CryptonightR_instruction12: + sub rsi, rbx +CryptonightR_instruction13: + ror esi, cl +CryptonightR_instruction14: + rol esi, cl +CryptonightR_instruction15: + xor rsi, rbx +CryptonightR_instruction16: + imul rdi, rbx +CryptonightR_instruction17: + imul rdi, rbx +CryptonightR_instruction18: + imul rdi, rbx +CryptonightR_instruction19: + add rdi, rbx + add rdi, 2147483647 +CryptonightR_instruction20: + sub rdi, rbx +CryptonightR_instruction21: + ror edi, cl +CryptonightR_instruction22: + rol edi, cl +CryptonightR_instruction23: + xor rdi, rbx +CryptonightR_instruction24: + imul rbp, rbx +CryptonightR_instruction25: + imul rbp, rbx +CryptonightR_instruction26: + imul rbp, rbx +CryptonightR_instruction27: + add rbp, rbx + add rbp, 2147483647 +CryptonightR_instruction28: + sub rbp, rbx +CryptonightR_instruction29: + ror ebp, cl +CryptonightR_instruction30: + rol ebp, cl +CryptonightR_instruction31: + xor rbp, rbx +CryptonightR_instruction32: + imul rbx, rsi +CryptonightR_instruction33: + imul rbx, rsi +CryptonightR_instruction34: + imul rbx, rsi +CryptonightR_instruction35: + add rbx, rsi + add rbx, 2147483647 +CryptonightR_instruction36: + sub rbx, rsi +CryptonightR_instruction37: + ror ebx, cl +CryptonightR_instruction38: + rol ebx, cl +CryptonightR_instruction39: + xor rbx, rsi +CryptonightR_instruction40: + imul rsi, rsi +CryptonightR_instruction41: + imul rsi, rsi +CryptonightR_instruction42: + imul rsi, rsi +CryptonightR_instruction43: + add rsi, rsi + add rsi, 2147483647 +CryptonightR_instruction44: + sub rsi, rsi +CryptonightR_instruction45: + ror esi, cl +CryptonightR_instruction46: + rol esi, cl +CryptonightR_instruction47: + xor rsi, rsi +CryptonightR_instruction48: + imul rdi, rsi +CryptonightR_instruction49: + imul rdi, rsi +CryptonightR_instruction50: + imul rdi, rsi +CryptonightR_instruction51: + add rdi, rsi + add rdi, 2147483647 +CryptonightR_instruction52: + sub rdi, rsi +CryptonightR_instruction53: + ror edi, cl +CryptonightR_instruction54: + rol edi, cl +CryptonightR_instruction55: + xor rdi, rsi +CryptonightR_instruction56: + imul rbp, rsi +CryptonightR_instruction57: + imul rbp, rsi +CryptonightR_instruction58: + imul rbp, rsi +CryptonightR_instruction59: + add rbp, rsi + add rbp, 2147483647 +CryptonightR_instruction60: + sub rbp, rsi +CryptonightR_instruction61: + ror ebp, cl +CryptonightR_instruction62: + rol ebp, cl +CryptonightR_instruction63: + xor rbp, rsi +CryptonightR_instruction64: + imul rbx, rdi +CryptonightR_instruction65: + imul rbx, rdi +CryptonightR_instruction66: + imul rbx, rdi +CryptonightR_instruction67: + add rbx, rdi + add rbx, 2147483647 +CryptonightR_instruction68: + sub rbx, rdi +CryptonightR_instruction69: + ror ebx, cl +CryptonightR_instruction70: + rol ebx, cl +CryptonightR_instruction71: + xor rbx, rdi +CryptonightR_instruction72: + imul rsi, rdi +CryptonightR_instruction73: + imul rsi, rdi +CryptonightR_instruction74: + imul rsi, rdi +CryptonightR_instruction75: + add rsi, rdi + add rsi, 2147483647 +CryptonightR_instruction76: + sub rsi, rdi +CryptonightR_instruction77: + ror esi, cl +CryptonightR_instruction78: + rol esi, cl +CryptonightR_instruction79: + xor rsi, rdi +CryptonightR_instruction80: + imul rdi, rdi +CryptonightR_instruction81: + imul rdi, rdi +CryptonightR_instruction82: + imul rdi, rdi +CryptonightR_instruction83: + add rdi, rdi + add rdi, 2147483647 +CryptonightR_instruction84: + sub rdi, rdi +CryptonightR_instruction85: + ror edi, cl +CryptonightR_instruction86: + rol edi, cl +CryptonightR_instruction87: + xor rdi, rdi +CryptonightR_instruction88: + imul rbp, rdi +CryptonightR_instruction89: + imul rbp, rdi +CryptonightR_instruction90: + imul rbp, rdi +CryptonightR_instruction91: + add rbp, rdi + add rbp, 2147483647 +CryptonightR_instruction92: + sub rbp, rdi +CryptonightR_instruction93: + ror ebp, cl +CryptonightR_instruction94: + rol ebp, cl +CryptonightR_instruction95: + xor rbp, rdi +CryptonightR_instruction96: + imul rbx, rbp +CryptonightR_instruction97: + imul rbx, rbp +CryptonightR_instruction98: + imul rbx, rbp +CryptonightR_instruction99: + add rbx, rbp + add rbx, 2147483647 +CryptonightR_instruction100: + sub rbx, rbp +CryptonightR_instruction101: + ror ebx, cl +CryptonightR_instruction102: + rol ebx, cl +CryptonightR_instruction103: + xor rbx, rbp +CryptonightR_instruction104: + imul rsi, rbp +CryptonightR_instruction105: + imul rsi, rbp +CryptonightR_instruction106: + imul rsi, rbp +CryptonightR_instruction107: + add rsi, rbp + add rsi, 2147483647 +CryptonightR_instruction108: + sub rsi, rbp +CryptonightR_instruction109: + ror esi, cl +CryptonightR_instruction110: + rol esi, cl +CryptonightR_instruction111: + xor rsi, rbp +CryptonightR_instruction112: + imul rdi, rbp +CryptonightR_instruction113: + imul rdi, rbp +CryptonightR_instruction114: + imul rdi, rbp +CryptonightR_instruction115: + add rdi, rbp + add rdi, 2147483647 +CryptonightR_instruction116: + sub rdi, rbp +CryptonightR_instruction117: + ror edi, cl +CryptonightR_instruction118: + rol edi, cl +CryptonightR_instruction119: + xor rdi, rbp +CryptonightR_instruction120: + imul rbp, rbp +CryptonightR_instruction121: + imul rbp, rbp +CryptonightR_instruction122: + imul rbp, rbp +CryptonightR_instruction123: + add rbp, rbp + add rbp, 2147483647 +CryptonightR_instruction124: + sub rbp, rbp +CryptonightR_instruction125: + ror ebp, cl +CryptonightR_instruction126: + rol ebp, cl +CryptonightR_instruction127: + xor rbp, rbp +CryptonightR_instruction128: + imul rbx, rsp +CryptonightR_instruction129: + imul rbx, rsp +CryptonightR_instruction130: + imul rbx, rsp +CryptonightR_instruction131: + add rbx, rsp + add rbx, 2147483647 +CryptonightR_instruction132: + sub rbx, rsp +CryptonightR_instruction133: + ror ebx, cl +CryptonightR_instruction134: + rol ebx, cl +CryptonightR_instruction135: + xor rbx, rsp +CryptonightR_instruction136: + imul rsi, rsp +CryptonightR_instruction137: + imul rsi, rsp +CryptonightR_instruction138: + imul rsi, rsp +CryptonightR_instruction139: + add rsi, rsp + add rsi, 2147483647 +CryptonightR_instruction140: + sub rsi, rsp +CryptonightR_instruction141: + ror esi, cl +CryptonightR_instruction142: + rol esi, cl +CryptonightR_instruction143: + xor rsi, rsp +CryptonightR_instruction144: + imul rdi, rsp +CryptonightR_instruction145: + imul rdi, rsp +CryptonightR_instruction146: + imul rdi, rsp +CryptonightR_instruction147: + add rdi, rsp + add rdi, 2147483647 +CryptonightR_instruction148: + sub rdi, rsp +CryptonightR_instruction149: + ror edi, cl +CryptonightR_instruction150: + rol edi, cl +CryptonightR_instruction151: + xor rdi, rsp +CryptonightR_instruction152: + imul rbp, rsp +CryptonightR_instruction153: + imul rbp, rsp +CryptonightR_instruction154: + imul rbp, rsp +CryptonightR_instruction155: + add rbp, rsp + add rbp, 2147483647 +CryptonightR_instruction156: + sub rbp, rsp +CryptonightR_instruction157: + ror ebp, cl +CryptonightR_instruction158: + rol ebp, cl +CryptonightR_instruction159: + xor rbp, rsp +CryptonightR_instruction160: + imul rbx, r15 +CryptonightR_instruction161: + imul rbx, r15 +CryptonightR_instruction162: + imul rbx, r15 +CryptonightR_instruction163: + add rbx, r15 + add rbx, 2147483647 +CryptonightR_instruction164: + sub rbx, r15 +CryptonightR_instruction165: + ror ebx, cl +CryptonightR_instruction166: + rol ebx, cl +CryptonightR_instruction167: + xor rbx, r15 +CryptonightR_instruction168: + imul rsi, r15 +CryptonightR_instruction169: + imul rsi, r15 +CryptonightR_instruction170: + imul rsi, r15 +CryptonightR_instruction171: + add rsi, r15 + add rsi, 2147483647 +CryptonightR_instruction172: + sub rsi, r15 +CryptonightR_instruction173: + ror esi, cl +CryptonightR_instruction174: + rol esi, cl +CryptonightR_instruction175: + xor rsi, r15 +CryptonightR_instruction176: + imul rdi, r15 +CryptonightR_instruction177: + imul rdi, r15 +CryptonightR_instruction178: + imul rdi, r15 +CryptonightR_instruction179: + add rdi, r15 + add rdi, 2147483647 +CryptonightR_instruction180: + sub rdi, r15 +CryptonightR_instruction181: + ror edi, cl +CryptonightR_instruction182: + rol edi, cl +CryptonightR_instruction183: + xor rdi, r15 +CryptonightR_instruction184: + imul rbp, r15 +CryptonightR_instruction185: + imul rbp, r15 +CryptonightR_instruction186: + imul rbp, r15 +CryptonightR_instruction187: + add rbp, r15 + add rbp, 2147483647 +CryptonightR_instruction188: + sub rbp, r15 +CryptonightR_instruction189: + ror ebp, cl +CryptonightR_instruction190: + rol ebp, cl +CryptonightR_instruction191: + xor rbp, r15 +CryptonightR_instruction192: + imul rbx, rax +CryptonightR_instruction193: + imul rbx, rax +CryptonightR_instruction194: + imul rbx, rax +CryptonightR_instruction195: + add rbx, rax + add rbx, 2147483647 +CryptonightR_instruction196: + sub rbx, rax +CryptonightR_instruction197: + ror ebx, cl +CryptonightR_instruction198: + rol ebx, cl +CryptonightR_instruction199: + xor rbx, rax +CryptonightR_instruction200: + imul rsi, rax +CryptonightR_instruction201: + imul rsi, rax +CryptonightR_instruction202: + imul rsi, rax +CryptonightR_instruction203: + add rsi, rax + add rsi, 2147483647 +CryptonightR_instruction204: + sub rsi, rax +CryptonightR_instruction205: + ror esi, cl +CryptonightR_instruction206: + rol esi, cl +CryptonightR_instruction207: + xor rsi, rax +CryptonightR_instruction208: + imul rdi, rax +CryptonightR_instruction209: + imul rdi, rax +CryptonightR_instruction210: + imul rdi, rax +CryptonightR_instruction211: + add rdi, rax + add rdi, 2147483647 +CryptonightR_instruction212: + sub rdi, rax +CryptonightR_instruction213: + ror edi, cl +CryptonightR_instruction214: + rol edi, cl +CryptonightR_instruction215: + xor rdi, rax +CryptonightR_instruction216: + imul rbp, rax +CryptonightR_instruction217: + imul rbp, rax +CryptonightR_instruction218: + imul rbp, rax +CryptonightR_instruction219: + add rbp, rax + add rbp, 2147483647 +CryptonightR_instruction220: + sub rbp, rax +CryptonightR_instruction221: + ror ebp, cl +CryptonightR_instruction222: + rol ebp, cl +CryptonightR_instruction223: + xor rbp, rax +CryptonightR_instruction224: + imul rbx, rdx +CryptonightR_instruction225: + imul rbx, rdx +CryptonightR_instruction226: + imul rbx, rdx +CryptonightR_instruction227: + add rbx, rdx + add rbx, 2147483647 +CryptonightR_instruction228: + sub rbx, rdx +CryptonightR_instruction229: + ror ebx, cl +CryptonightR_instruction230: + rol ebx, cl +CryptonightR_instruction231: + xor rbx, rdx +CryptonightR_instruction232: + imul rsi, rdx +CryptonightR_instruction233: + imul rsi, rdx +CryptonightR_instruction234: + imul rsi, rdx +CryptonightR_instruction235: + add rsi, rdx + add rsi, 2147483647 +CryptonightR_instruction236: + sub rsi, rdx +CryptonightR_instruction237: + ror esi, cl +CryptonightR_instruction238: + rol esi, cl +CryptonightR_instruction239: + xor rsi, rdx +CryptonightR_instruction240: + imul rdi, rdx +CryptonightR_instruction241: + imul rdi, rdx +CryptonightR_instruction242: + imul rdi, rdx +CryptonightR_instruction243: + add rdi, rdx + add rdi, 2147483647 +CryptonightR_instruction244: + sub rdi, rdx +CryptonightR_instruction245: + ror edi, cl +CryptonightR_instruction246: + rol edi, cl +CryptonightR_instruction247: + xor rdi, rdx +CryptonightR_instruction248: + imul rbp, rdx +CryptonightR_instruction249: + imul rbp, rdx +CryptonightR_instruction250: + imul rbp, rdx +CryptonightR_instruction251: + add rbp, rdx + add rbp, 2147483647 +CryptonightR_instruction252: + sub rbp, rdx +CryptonightR_instruction253: + ror ebp, cl +CryptonightR_instruction254: + rol ebp, cl +CryptonightR_instruction255: + xor rbp, rdx +CryptonightR_instruction256: + imul rbx, rbx +CryptonightR_instruction_mov0: + +CryptonightR_instruction_mov1: + +CryptonightR_instruction_mov2: + +CryptonightR_instruction_mov3: + +CryptonightR_instruction_mov4: + +CryptonightR_instruction_mov5: + mov rcx, rbx +CryptonightR_instruction_mov6: + mov rcx, rbx +CryptonightR_instruction_mov7: + +CryptonightR_instruction_mov8: + +CryptonightR_instruction_mov9: + +CryptonightR_instruction_mov10: + +CryptonightR_instruction_mov11: + +CryptonightR_instruction_mov12: + +CryptonightR_instruction_mov13: + mov rcx, rbx +CryptonightR_instruction_mov14: + mov rcx, rbx +CryptonightR_instruction_mov15: + +CryptonightR_instruction_mov16: + +CryptonightR_instruction_mov17: + +CryptonightR_instruction_mov18: + +CryptonightR_instruction_mov19: + +CryptonightR_instruction_mov20: + +CryptonightR_instruction_mov21: + mov rcx, rbx +CryptonightR_instruction_mov22: + mov rcx, rbx +CryptonightR_instruction_mov23: + +CryptonightR_instruction_mov24: + +CryptonightR_instruction_mov25: + +CryptonightR_instruction_mov26: + +CryptonightR_instruction_mov27: + +CryptonightR_instruction_mov28: + +CryptonightR_instruction_mov29: + mov rcx, rbx +CryptonightR_instruction_mov30: + mov rcx, rbx +CryptonightR_instruction_mov31: + +CryptonightR_instruction_mov32: + +CryptonightR_instruction_mov33: + +CryptonightR_instruction_mov34: + +CryptonightR_instruction_mov35: + +CryptonightR_instruction_mov36: + +CryptonightR_instruction_mov37: + mov rcx, rsi +CryptonightR_instruction_mov38: + mov rcx, rsi +CryptonightR_instruction_mov39: + +CryptonightR_instruction_mov40: + +CryptonightR_instruction_mov41: + +CryptonightR_instruction_mov42: + +CryptonightR_instruction_mov43: + +CryptonightR_instruction_mov44: + +CryptonightR_instruction_mov45: + mov rcx, rsi +CryptonightR_instruction_mov46: + mov rcx, rsi +CryptonightR_instruction_mov47: + +CryptonightR_instruction_mov48: + +CryptonightR_instruction_mov49: + +CryptonightR_instruction_mov50: + +CryptonightR_instruction_mov51: + +CryptonightR_instruction_mov52: + +CryptonightR_instruction_mov53: + mov rcx, rsi +CryptonightR_instruction_mov54: + mov rcx, rsi +CryptonightR_instruction_mov55: + +CryptonightR_instruction_mov56: + +CryptonightR_instruction_mov57: + +CryptonightR_instruction_mov58: + +CryptonightR_instruction_mov59: + +CryptonightR_instruction_mov60: + +CryptonightR_instruction_mov61: + mov rcx, rsi +CryptonightR_instruction_mov62: + mov rcx, rsi +CryptonightR_instruction_mov63: + +CryptonightR_instruction_mov64: + +CryptonightR_instruction_mov65: + +CryptonightR_instruction_mov66: + +CryptonightR_instruction_mov67: + +CryptonightR_instruction_mov68: + +CryptonightR_instruction_mov69: + mov rcx, rdi +CryptonightR_instruction_mov70: + mov rcx, rdi +CryptonightR_instruction_mov71: + +CryptonightR_instruction_mov72: + +CryptonightR_instruction_mov73: + +CryptonightR_instruction_mov74: + +CryptonightR_instruction_mov75: + +CryptonightR_instruction_mov76: + +CryptonightR_instruction_mov77: + mov rcx, rdi +CryptonightR_instruction_mov78: + mov rcx, rdi +CryptonightR_instruction_mov79: + +CryptonightR_instruction_mov80: + +CryptonightR_instruction_mov81: + +CryptonightR_instruction_mov82: + +CryptonightR_instruction_mov83: + +CryptonightR_instruction_mov84: + +CryptonightR_instruction_mov85: + mov rcx, rdi +CryptonightR_instruction_mov86: + mov rcx, rdi +CryptonightR_instruction_mov87: + +CryptonightR_instruction_mov88: + +CryptonightR_instruction_mov89: + +CryptonightR_instruction_mov90: + +CryptonightR_instruction_mov91: + +CryptonightR_instruction_mov92: + +CryptonightR_instruction_mov93: + mov rcx, rdi +CryptonightR_instruction_mov94: + mov rcx, rdi +CryptonightR_instruction_mov95: + +CryptonightR_instruction_mov96: + +CryptonightR_instruction_mov97: + +CryptonightR_instruction_mov98: + +CryptonightR_instruction_mov99: + +CryptonightR_instruction_mov100: + +CryptonightR_instruction_mov101: + mov rcx, rbp +CryptonightR_instruction_mov102: + mov rcx, rbp +CryptonightR_instruction_mov103: + +CryptonightR_instruction_mov104: + +CryptonightR_instruction_mov105: + +CryptonightR_instruction_mov106: + +CryptonightR_instruction_mov107: + +CryptonightR_instruction_mov108: + +CryptonightR_instruction_mov109: + mov rcx, rbp +CryptonightR_instruction_mov110: + mov rcx, rbp +CryptonightR_instruction_mov111: + +CryptonightR_instruction_mov112: + +CryptonightR_instruction_mov113: + +CryptonightR_instruction_mov114: + +CryptonightR_instruction_mov115: + +CryptonightR_instruction_mov116: + +CryptonightR_instruction_mov117: + mov rcx, rbp +CryptonightR_instruction_mov118: + mov rcx, rbp +CryptonightR_instruction_mov119: + +CryptonightR_instruction_mov120: + +CryptonightR_instruction_mov121: + +CryptonightR_instruction_mov122: + +CryptonightR_instruction_mov123: + +CryptonightR_instruction_mov124: + +CryptonightR_instruction_mov125: + mov rcx, rbp +CryptonightR_instruction_mov126: + mov rcx, rbp +CryptonightR_instruction_mov127: + +CryptonightR_instruction_mov128: + +CryptonightR_instruction_mov129: + +CryptonightR_instruction_mov130: + +CryptonightR_instruction_mov131: + +CryptonightR_instruction_mov132: + +CryptonightR_instruction_mov133: + mov rcx, rsp +CryptonightR_instruction_mov134: + mov rcx, rsp +CryptonightR_instruction_mov135: + +CryptonightR_instruction_mov136: + +CryptonightR_instruction_mov137: + +CryptonightR_instruction_mov138: + +CryptonightR_instruction_mov139: + +CryptonightR_instruction_mov140: + +CryptonightR_instruction_mov141: + mov rcx, rsp +CryptonightR_instruction_mov142: + mov rcx, rsp +CryptonightR_instruction_mov143: + +CryptonightR_instruction_mov144: + +CryptonightR_instruction_mov145: + +CryptonightR_instruction_mov146: + +CryptonightR_instruction_mov147: + +CryptonightR_instruction_mov148: + +CryptonightR_instruction_mov149: + mov rcx, rsp +CryptonightR_instruction_mov150: + mov rcx, rsp +CryptonightR_instruction_mov151: + +CryptonightR_instruction_mov152: + +CryptonightR_instruction_mov153: + +CryptonightR_instruction_mov154: + +CryptonightR_instruction_mov155: + +CryptonightR_instruction_mov156: + +CryptonightR_instruction_mov157: + mov rcx, rsp +CryptonightR_instruction_mov158: + mov rcx, rsp +CryptonightR_instruction_mov159: + +CryptonightR_instruction_mov160: + +CryptonightR_instruction_mov161: + +CryptonightR_instruction_mov162: + +CryptonightR_instruction_mov163: + +CryptonightR_instruction_mov164: + +CryptonightR_instruction_mov165: + mov rcx, r15 +CryptonightR_instruction_mov166: + mov rcx, r15 +CryptonightR_instruction_mov167: + +CryptonightR_instruction_mov168: + +CryptonightR_instruction_mov169: + +CryptonightR_instruction_mov170: + +CryptonightR_instruction_mov171: + +CryptonightR_instruction_mov172: + +CryptonightR_instruction_mov173: + mov rcx, r15 +CryptonightR_instruction_mov174: + mov rcx, r15 +CryptonightR_instruction_mov175: + +CryptonightR_instruction_mov176: + +CryptonightR_instruction_mov177: + +CryptonightR_instruction_mov178: + +CryptonightR_instruction_mov179: + +CryptonightR_instruction_mov180: + +CryptonightR_instruction_mov181: + mov rcx, r15 +CryptonightR_instruction_mov182: + mov rcx, r15 +CryptonightR_instruction_mov183: + +CryptonightR_instruction_mov184: + +CryptonightR_instruction_mov185: + +CryptonightR_instruction_mov186: + +CryptonightR_instruction_mov187: + +CryptonightR_instruction_mov188: + +CryptonightR_instruction_mov189: + mov rcx, r15 +CryptonightR_instruction_mov190: + mov rcx, r15 +CryptonightR_instruction_mov191: + +CryptonightR_instruction_mov192: + +CryptonightR_instruction_mov193: + +CryptonightR_instruction_mov194: + +CryptonightR_instruction_mov195: + +CryptonightR_instruction_mov196: + +CryptonightR_instruction_mov197: + mov rcx, rax +CryptonightR_instruction_mov198: + mov rcx, rax +CryptonightR_instruction_mov199: + +CryptonightR_instruction_mov200: + +CryptonightR_instruction_mov201: + +CryptonightR_instruction_mov202: + +CryptonightR_instruction_mov203: + +CryptonightR_instruction_mov204: + +CryptonightR_instruction_mov205: + mov rcx, rax +CryptonightR_instruction_mov206: + mov rcx, rax +CryptonightR_instruction_mov207: + +CryptonightR_instruction_mov208: + +CryptonightR_instruction_mov209: + +CryptonightR_instruction_mov210: + +CryptonightR_instruction_mov211: + +CryptonightR_instruction_mov212: + +CryptonightR_instruction_mov213: + mov rcx, rax +CryptonightR_instruction_mov214: + mov rcx, rax +CryptonightR_instruction_mov215: + +CryptonightR_instruction_mov216: + +CryptonightR_instruction_mov217: + +CryptonightR_instruction_mov218: + +CryptonightR_instruction_mov219: + +CryptonightR_instruction_mov220: + +CryptonightR_instruction_mov221: + mov rcx, rax +CryptonightR_instruction_mov222: + mov rcx, rax +CryptonightR_instruction_mov223: + +CryptonightR_instruction_mov224: + +CryptonightR_instruction_mov225: + +CryptonightR_instruction_mov226: + +CryptonightR_instruction_mov227: + +CryptonightR_instruction_mov228: + +CryptonightR_instruction_mov229: + mov rcx, rdx +CryptonightR_instruction_mov230: + mov rcx, rdx +CryptonightR_instruction_mov231: + +CryptonightR_instruction_mov232: + +CryptonightR_instruction_mov233: + +CryptonightR_instruction_mov234: + +CryptonightR_instruction_mov235: + +CryptonightR_instruction_mov236: + +CryptonightR_instruction_mov237: + mov rcx, rdx +CryptonightR_instruction_mov238: + mov rcx, rdx +CryptonightR_instruction_mov239: + +CryptonightR_instruction_mov240: + +CryptonightR_instruction_mov241: + +CryptonightR_instruction_mov242: + +CryptonightR_instruction_mov243: + +CryptonightR_instruction_mov244: + +CryptonightR_instruction_mov245: + mov rcx, rdx +CryptonightR_instruction_mov246: + mov rcx, rdx +CryptonightR_instruction_mov247: + +CryptonightR_instruction_mov248: + +CryptonightR_instruction_mov249: + +CryptonightR_instruction_mov250: + +CryptonightR_instruction_mov251: + +CryptonightR_instruction_mov252: + +CryptonightR_instruction_mov253: + mov rcx, rdx +CryptonightR_instruction_mov254: + mov rcx, rdx +CryptonightR_instruction_mov255: + +CryptonightR_instruction_mov256: + +_TEXT_CN_TEMPLATE ENDS +END diff --git a/src/crypto/asm/win64/CryptonightR_template.h b/src/crypto/asm/win64/CryptonightR_template.h new file mode 100644 index 00000000..182c6870 --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_template.h @@ -0,0 +1,1050 @@ +// Auto-generated file, do not edit + +extern "C" +{ + void CryptonightR_template_part1(); + void CryptonightR_template_mainloop(); + void CryptonightR_template_part2(); + void CryptonightR_template_part3(); + void CryptonightR_template_end(); + void CryptonightR_template_double_part1(); + void CryptonightR_template_double_mainloop(); + void CryptonightR_template_double_part2(); + void CryptonightR_template_double_part3(); + void CryptonightR_template_double_part4(); + void CryptonightR_template_double_end(); + void CryptonightR_instruction0(); + void CryptonightR_instruction1(); + void CryptonightR_instruction2(); + void CryptonightR_instruction3(); + void CryptonightR_instruction4(); + void CryptonightR_instruction5(); + void CryptonightR_instruction6(); + void CryptonightR_instruction7(); + void CryptonightR_instruction8(); + void CryptonightR_instruction9(); + void CryptonightR_instruction10(); + void CryptonightR_instruction11(); + void CryptonightR_instruction12(); + void CryptonightR_instruction13(); + void CryptonightR_instruction14(); + void CryptonightR_instruction15(); + void CryptonightR_instruction16(); + void CryptonightR_instruction17(); + void CryptonightR_instruction18(); + void CryptonightR_instruction19(); + void CryptonightR_instruction20(); + void CryptonightR_instruction21(); + void CryptonightR_instruction22(); + void CryptonightR_instruction23(); + void CryptonightR_instruction24(); + void CryptonightR_instruction25(); + void CryptonightR_instruction26(); + void CryptonightR_instruction27(); + void CryptonightR_instruction28(); + void CryptonightR_instruction29(); + void CryptonightR_instruction30(); + void CryptonightR_instruction31(); + void CryptonightR_instruction32(); + void CryptonightR_instruction33(); + void CryptonightR_instruction34(); + void CryptonightR_instruction35(); + void CryptonightR_instruction36(); + void CryptonightR_instruction37(); + void CryptonightR_instruction38(); + void CryptonightR_instruction39(); + void CryptonightR_instruction40(); + void CryptonightR_instruction41(); + void CryptonightR_instruction42(); + void CryptonightR_instruction43(); + void CryptonightR_instruction44(); + void CryptonightR_instruction45(); + void CryptonightR_instruction46(); + void CryptonightR_instruction47(); + void CryptonightR_instruction48(); + void CryptonightR_instruction49(); + void CryptonightR_instruction50(); + void CryptonightR_instruction51(); + void CryptonightR_instruction52(); + void CryptonightR_instruction53(); + void CryptonightR_instruction54(); + void CryptonightR_instruction55(); + void CryptonightR_instruction56(); + void CryptonightR_instruction57(); + void CryptonightR_instruction58(); + void CryptonightR_instruction59(); + void CryptonightR_instruction60(); + void CryptonightR_instruction61(); + void CryptonightR_instruction62(); + void CryptonightR_instruction63(); + void CryptonightR_instruction64(); + void CryptonightR_instruction65(); + void CryptonightR_instruction66(); + void CryptonightR_instruction67(); + void CryptonightR_instruction68(); + void CryptonightR_instruction69(); + void CryptonightR_instruction70(); + void CryptonightR_instruction71(); + void CryptonightR_instruction72(); + void CryptonightR_instruction73(); + void CryptonightR_instruction74(); + void CryptonightR_instruction75(); + void CryptonightR_instruction76(); + void CryptonightR_instruction77(); + void CryptonightR_instruction78(); + void CryptonightR_instruction79(); + void CryptonightR_instruction80(); + void CryptonightR_instruction81(); + void CryptonightR_instruction82(); + void CryptonightR_instruction83(); + void CryptonightR_instruction84(); + void CryptonightR_instruction85(); + void CryptonightR_instruction86(); + void CryptonightR_instruction87(); + void CryptonightR_instruction88(); + void CryptonightR_instruction89(); + void CryptonightR_instruction90(); + void CryptonightR_instruction91(); + void CryptonightR_instruction92(); + void CryptonightR_instruction93(); + void CryptonightR_instruction94(); + void CryptonightR_instruction95(); + void CryptonightR_instruction96(); + void CryptonightR_instruction97(); + void CryptonightR_instruction98(); + void CryptonightR_instruction99(); + void CryptonightR_instruction100(); + void CryptonightR_instruction101(); + void CryptonightR_instruction102(); + void CryptonightR_instruction103(); + void CryptonightR_instruction104(); + void CryptonightR_instruction105(); + void CryptonightR_instruction106(); + void CryptonightR_instruction107(); + void CryptonightR_instruction108(); + void CryptonightR_instruction109(); + void CryptonightR_instruction110(); + void CryptonightR_instruction111(); + void CryptonightR_instruction112(); + void CryptonightR_instruction113(); + void CryptonightR_instruction114(); + void CryptonightR_instruction115(); + void CryptonightR_instruction116(); + void CryptonightR_instruction117(); + void CryptonightR_instruction118(); + void CryptonightR_instruction119(); + void CryptonightR_instruction120(); + void CryptonightR_instruction121(); + void CryptonightR_instruction122(); + void CryptonightR_instruction123(); + void CryptonightR_instruction124(); + void CryptonightR_instruction125(); + void CryptonightR_instruction126(); + void CryptonightR_instruction127(); + void CryptonightR_instruction128(); + void CryptonightR_instruction129(); + void CryptonightR_instruction130(); + void CryptonightR_instruction131(); + void CryptonightR_instruction132(); + void CryptonightR_instruction133(); + void CryptonightR_instruction134(); + void CryptonightR_instruction135(); + void CryptonightR_instruction136(); + void CryptonightR_instruction137(); + void CryptonightR_instruction138(); + void CryptonightR_instruction139(); + void CryptonightR_instruction140(); + void CryptonightR_instruction141(); + void CryptonightR_instruction142(); + void CryptonightR_instruction143(); + void CryptonightR_instruction144(); + void CryptonightR_instruction145(); + void CryptonightR_instruction146(); + void CryptonightR_instruction147(); + void CryptonightR_instruction148(); + void CryptonightR_instruction149(); + void CryptonightR_instruction150(); + void CryptonightR_instruction151(); + void CryptonightR_instruction152(); + void CryptonightR_instruction153(); + void CryptonightR_instruction154(); + void CryptonightR_instruction155(); + void CryptonightR_instruction156(); + void CryptonightR_instruction157(); + void CryptonightR_instruction158(); + void CryptonightR_instruction159(); + void CryptonightR_instruction160(); + void CryptonightR_instruction161(); + void CryptonightR_instruction162(); + void CryptonightR_instruction163(); + void CryptonightR_instruction164(); + void CryptonightR_instruction165(); + void CryptonightR_instruction166(); + void CryptonightR_instruction167(); + void CryptonightR_instruction168(); + void CryptonightR_instruction169(); + void CryptonightR_instruction170(); + void CryptonightR_instruction171(); + void CryptonightR_instruction172(); + void CryptonightR_instruction173(); + void CryptonightR_instruction174(); + void CryptonightR_instruction175(); + void CryptonightR_instruction176(); + void CryptonightR_instruction177(); + void CryptonightR_instruction178(); + void CryptonightR_instruction179(); + void CryptonightR_instruction180(); + void CryptonightR_instruction181(); + void CryptonightR_instruction182(); + void CryptonightR_instruction183(); + void CryptonightR_instruction184(); + void CryptonightR_instruction185(); + void CryptonightR_instruction186(); + void CryptonightR_instruction187(); + void CryptonightR_instruction188(); + void CryptonightR_instruction189(); + void CryptonightR_instruction190(); + void CryptonightR_instruction191(); + void CryptonightR_instruction192(); + void CryptonightR_instruction193(); + void CryptonightR_instruction194(); + void CryptonightR_instruction195(); + void CryptonightR_instruction196(); + void CryptonightR_instruction197(); + void CryptonightR_instruction198(); + void CryptonightR_instruction199(); + void CryptonightR_instruction200(); + void CryptonightR_instruction201(); + void CryptonightR_instruction202(); + void CryptonightR_instruction203(); + void CryptonightR_instruction204(); + void CryptonightR_instruction205(); + void CryptonightR_instruction206(); + void CryptonightR_instruction207(); + void CryptonightR_instruction208(); + void CryptonightR_instruction209(); + void CryptonightR_instruction210(); + void CryptonightR_instruction211(); + void CryptonightR_instruction212(); + void CryptonightR_instruction213(); + void CryptonightR_instruction214(); + void CryptonightR_instruction215(); + void CryptonightR_instruction216(); + void CryptonightR_instruction217(); + void CryptonightR_instruction218(); + void CryptonightR_instruction219(); + void CryptonightR_instruction220(); + void CryptonightR_instruction221(); + void CryptonightR_instruction222(); + void CryptonightR_instruction223(); + void CryptonightR_instruction224(); + void CryptonightR_instruction225(); + void CryptonightR_instruction226(); + void CryptonightR_instruction227(); + void CryptonightR_instruction228(); + void CryptonightR_instruction229(); + void CryptonightR_instruction230(); + void CryptonightR_instruction231(); + void CryptonightR_instruction232(); + void CryptonightR_instruction233(); + void CryptonightR_instruction234(); + void CryptonightR_instruction235(); + void CryptonightR_instruction236(); + void CryptonightR_instruction237(); + void CryptonightR_instruction238(); + void CryptonightR_instruction239(); + void CryptonightR_instruction240(); + void CryptonightR_instruction241(); + void CryptonightR_instruction242(); + void CryptonightR_instruction243(); + void CryptonightR_instruction244(); + void CryptonightR_instruction245(); + void CryptonightR_instruction246(); + void CryptonightR_instruction247(); + void CryptonightR_instruction248(); + void CryptonightR_instruction249(); + void CryptonightR_instruction250(); + void CryptonightR_instruction251(); + void CryptonightR_instruction252(); + void CryptonightR_instruction253(); + void CryptonightR_instruction254(); + void CryptonightR_instruction255(); + void CryptonightR_instruction256(); + void CryptonightR_instruction_mov0(); + void CryptonightR_instruction_mov1(); + void CryptonightR_instruction_mov2(); + void CryptonightR_instruction_mov3(); + void CryptonightR_instruction_mov4(); + void CryptonightR_instruction_mov5(); + void CryptonightR_instruction_mov6(); + void CryptonightR_instruction_mov7(); + void CryptonightR_instruction_mov8(); + void CryptonightR_instruction_mov9(); + void CryptonightR_instruction_mov10(); + void CryptonightR_instruction_mov11(); + void CryptonightR_instruction_mov12(); + void CryptonightR_instruction_mov13(); + void CryptonightR_instruction_mov14(); + void CryptonightR_instruction_mov15(); + void CryptonightR_instruction_mov16(); + void CryptonightR_instruction_mov17(); + void CryptonightR_instruction_mov18(); + void CryptonightR_instruction_mov19(); + void CryptonightR_instruction_mov20(); + void CryptonightR_instruction_mov21(); + void CryptonightR_instruction_mov22(); + void CryptonightR_instruction_mov23(); + void CryptonightR_instruction_mov24(); + void CryptonightR_instruction_mov25(); + void CryptonightR_instruction_mov26(); + void CryptonightR_instruction_mov27(); + void CryptonightR_instruction_mov28(); + void CryptonightR_instruction_mov29(); + void CryptonightR_instruction_mov30(); + void CryptonightR_instruction_mov31(); + void CryptonightR_instruction_mov32(); + void CryptonightR_instruction_mov33(); + void CryptonightR_instruction_mov34(); + void CryptonightR_instruction_mov35(); + void CryptonightR_instruction_mov36(); + void CryptonightR_instruction_mov37(); + void CryptonightR_instruction_mov38(); + void CryptonightR_instruction_mov39(); + void CryptonightR_instruction_mov40(); + void CryptonightR_instruction_mov41(); + void CryptonightR_instruction_mov42(); + void CryptonightR_instruction_mov43(); + void CryptonightR_instruction_mov44(); + void CryptonightR_instruction_mov45(); + void CryptonightR_instruction_mov46(); + void CryptonightR_instruction_mov47(); + void CryptonightR_instruction_mov48(); + void CryptonightR_instruction_mov49(); + void CryptonightR_instruction_mov50(); + void CryptonightR_instruction_mov51(); + void CryptonightR_instruction_mov52(); + void CryptonightR_instruction_mov53(); + void CryptonightR_instruction_mov54(); + void CryptonightR_instruction_mov55(); + void CryptonightR_instruction_mov56(); + void CryptonightR_instruction_mov57(); + void CryptonightR_instruction_mov58(); + void CryptonightR_instruction_mov59(); + void CryptonightR_instruction_mov60(); + void CryptonightR_instruction_mov61(); + void CryptonightR_instruction_mov62(); + void CryptonightR_instruction_mov63(); + void CryptonightR_instruction_mov64(); + void CryptonightR_instruction_mov65(); + void CryptonightR_instruction_mov66(); + void CryptonightR_instruction_mov67(); + void CryptonightR_instruction_mov68(); + void CryptonightR_instruction_mov69(); + void CryptonightR_instruction_mov70(); + void CryptonightR_instruction_mov71(); + void CryptonightR_instruction_mov72(); + void CryptonightR_instruction_mov73(); + void CryptonightR_instruction_mov74(); + void CryptonightR_instruction_mov75(); + void CryptonightR_instruction_mov76(); + void CryptonightR_instruction_mov77(); + void CryptonightR_instruction_mov78(); + void CryptonightR_instruction_mov79(); + void CryptonightR_instruction_mov80(); + void CryptonightR_instruction_mov81(); + void CryptonightR_instruction_mov82(); + void CryptonightR_instruction_mov83(); + void CryptonightR_instruction_mov84(); + void CryptonightR_instruction_mov85(); + void CryptonightR_instruction_mov86(); + void CryptonightR_instruction_mov87(); + void CryptonightR_instruction_mov88(); + void CryptonightR_instruction_mov89(); + void CryptonightR_instruction_mov90(); + void CryptonightR_instruction_mov91(); + void CryptonightR_instruction_mov92(); + void CryptonightR_instruction_mov93(); + void CryptonightR_instruction_mov94(); + void CryptonightR_instruction_mov95(); + void CryptonightR_instruction_mov96(); + void CryptonightR_instruction_mov97(); + void CryptonightR_instruction_mov98(); + void CryptonightR_instruction_mov99(); + void CryptonightR_instruction_mov100(); + void CryptonightR_instruction_mov101(); + void CryptonightR_instruction_mov102(); + void CryptonightR_instruction_mov103(); + void CryptonightR_instruction_mov104(); + void CryptonightR_instruction_mov105(); + void CryptonightR_instruction_mov106(); + void CryptonightR_instruction_mov107(); + void CryptonightR_instruction_mov108(); + void CryptonightR_instruction_mov109(); + void CryptonightR_instruction_mov110(); + void CryptonightR_instruction_mov111(); + void CryptonightR_instruction_mov112(); + void CryptonightR_instruction_mov113(); + void CryptonightR_instruction_mov114(); + void CryptonightR_instruction_mov115(); + void CryptonightR_instruction_mov116(); + void CryptonightR_instruction_mov117(); + void CryptonightR_instruction_mov118(); + void CryptonightR_instruction_mov119(); + void CryptonightR_instruction_mov120(); + void CryptonightR_instruction_mov121(); + void CryptonightR_instruction_mov122(); + void CryptonightR_instruction_mov123(); + void CryptonightR_instruction_mov124(); + void CryptonightR_instruction_mov125(); + void CryptonightR_instruction_mov126(); + void CryptonightR_instruction_mov127(); + void CryptonightR_instruction_mov128(); + void CryptonightR_instruction_mov129(); + void CryptonightR_instruction_mov130(); + void CryptonightR_instruction_mov131(); + void CryptonightR_instruction_mov132(); + void CryptonightR_instruction_mov133(); + void CryptonightR_instruction_mov134(); + void CryptonightR_instruction_mov135(); + void CryptonightR_instruction_mov136(); + void CryptonightR_instruction_mov137(); + void CryptonightR_instruction_mov138(); + void CryptonightR_instruction_mov139(); + void CryptonightR_instruction_mov140(); + void CryptonightR_instruction_mov141(); + void CryptonightR_instruction_mov142(); + void CryptonightR_instruction_mov143(); + void CryptonightR_instruction_mov144(); + void CryptonightR_instruction_mov145(); + void CryptonightR_instruction_mov146(); + void CryptonightR_instruction_mov147(); + void CryptonightR_instruction_mov148(); + void CryptonightR_instruction_mov149(); + void CryptonightR_instruction_mov150(); + void CryptonightR_instruction_mov151(); + void CryptonightR_instruction_mov152(); + void CryptonightR_instruction_mov153(); + void CryptonightR_instruction_mov154(); + void CryptonightR_instruction_mov155(); + void CryptonightR_instruction_mov156(); + void CryptonightR_instruction_mov157(); + void CryptonightR_instruction_mov158(); + void CryptonightR_instruction_mov159(); + void CryptonightR_instruction_mov160(); + void CryptonightR_instruction_mov161(); + void CryptonightR_instruction_mov162(); + void CryptonightR_instruction_mov163(); + void CryptonightR_instruction_mov164(); + void CryptonightR_instruction_mov165(); + void CryptonightR_instruction_mov166(); + void CryptonightR_instruction_mov167(); + void CryptonightR_instruction_mov168(); + void CryptonightR_instruction_mov169(); + void CryptonightR_instruction_mov170(); + void CryptonightR_instruction_mov171(); + void CryptonightR_instruction_mov172(); + void CryptonightR_instruction_mov173(); + void CryptonightR_instruction_mov174(); + void CryptonightR_instruction_mov175(); + void CryptonightR_instruction_mov176(); + void CryptonightR_instruction_mov177(); + void CryptonightR_instruction_mov178(); + void CryptonightR_instruction_mov179(); + void CryptonightR_instruction_mov180(); + void CryptonightR_instruction_mov181(); + void CryptonightR_instruction_mov182(); + void CryptonightR_instruction_mov183(); + void CryptonightR_instruction_mov184(); + void CryptonightR_instruction_mov185(); + void CryptonightR_instruction_mov186(); + void CryptonightR_instruction_mov187(); + void CryptonightR_instruction_mov188(); + void CryptonightR_instruction_mov189(); + void CryptonightR_instruction_mov190(); + void CryptonightR_instruction_mov191(); + void CryptonightR_instruction_mov192(); + void CryptonightR_instruction_mov193(); + void CryptonightR_instruction_mov194(); + void CryptonightR_instruction_mov195(); + void CryptonightR_instruction_mov196(); + void CryptonightR_instruction_mov197(); + void CryptonightR_instruction_mov198(); + void CryptonightR_instruction_mov199(); + void CryptonightR_instruction_mov200(); + void CryptonightR_instruction_mov201(); + void CryptonightR_instruction_mov202(); + void CryptonightR_instruction_mov203(); + void CryptonightR_instruction_mov204(); + void CryptonightR_instruction_mov205(); + void CryptonightR_instruction_mov206(); + void CryptonightR_instruction_mov207(); + void CryptonightR_instruction_mov208(); + void CryptonightR_instruction_mov209(); + void CryptonightR_instruction_mov210(); + void CryptonightR_instruction_mov211(); + void CryptonightR_instruction_mov212(); + void CryptonightR_instruction_mov213(); + void CryptonightR_instruction_mov214(); + void CryptonightR_instruction_mov215(); + void CryptonightR_instruction_mov216(); + void CryptonightR_instruction_mov217(); + void CryptonightR_instruction_mov218(); + void CryptonightR_instruction_mov219(); + void CryptonightR_instruction_mov220(); + void CryptonightR_instruction_mov221(); + void CryptonightR_instruction_mov222(); + void CryptonightR_instruction_mov223(); + void CryptonightR_instruction_mov224(); + void CryptonightR_instruction_mov225(); + void CryptonightR_instruction_mov226(); + void CryptonightR_instruction_mov227(); + void CryptonightR_instruction_mov228(); + void CryptonightR_instruction_mov229(); + void CryptonightR_instruction_mov230(); + void CryptonightR_instruction_mov231(); + void CryptonightR_instruction_mov232(); + void CryptonightR_instruction_mov233(); + void CryptonightR_instruction_mov234(); + void CryptonightR_instruction_mov235(); + void CryptonightR_instruction_mov236(); + void CryptonightR_instruction_mov237(); + void CryptonightR_instruction_mov238(); + void CryptonightR_instruction_mov239(); + void CryptonightR_instruction_mov240(); + void CryptonightR_instruction_mov241(); + void CryptonightR_instruction_mov242(); + void CryptonightR_instruction_mov243(); + void CryptonightR_instruction_mov244(); + void CryptonightR_instruction_mov245(); + void CryptonightR_instruction_mov246(); + void CryptonightR_instruction_mov247(); + void CryptonightR_instruction_mov248(); + void CryptonightR_instruction_mov249(); + void CryptonightR_instruction_mov250(); + void CryptonightR_instruction_mov251(); + void CryptonightR_instruction_mov252(); + void CryptonightR_instruction_mov253(); + void CryptonightR_instruction_mov254(); + void CryptonightR_instruction_mov255(); + void CryptonightR_instruction_mov256(); +} + +const void_func instructions[257] = { + CryptonightR_instruction0, + CryptonightR_instruction1, + CryptonightR_instruction2, + CryptonightR_instruction3, + CryptonightR_instruction4, + CryptonightR_instruction5, + CryptonightR_instruction6, + CryptonightR_instruction7, + CryptonightR_instruction8, + CryptonightR_instruction9, + CryptonightR_instruction10, + CryptonightR_instruction11, + CryptonightR_instruction12, + CryptonightR_instruction13, + CryptonightR_instruction14, + CryptonightR_instruction15, + CryptonightR_instruction16, + CryptonightR_instruction17, + CryptonightR_instruction18, + CryptonightR_instruction19, + CryptonightR_instruction20, + CryptonightR_instruction21, + CryptonightR_instruction22, + CryptonightR_instruction23, + CryptonightR_instruction24, + CryptonightR_instruction25, + CryptonightR_instruction26, + CryptonightR_instruction27, + CryptonightR_instruction28, + CryptonightR_instruction29, + CryptonightR_instruction30, + CryptonightR_instruction31, + CryptonightR_instruction32, + CryptonightR_instruction33, + CryptonightR_instruction34, + CryptonightR_instruction35, + CryptonightR_instruction36, + CryptonightR_instruction37, + CryptonightR_instruction38, + CryptonightR_instruction39, + CryptonightR_instruction40, + CryptonightR_instruction41, + CryptonightR_instruction42, + CryptonightR_instruction43, + CryptonightR_instruction44, + CryptonightR_instruction45, + CryptonightR_instruction46, + CryptonightR_instruction47, + CryptonightR_instruction48, + CryptonightR_instruction49, + CryptonightR_instruction50, + CryptonightR_instruction51, + CryptonightR_instruction52, + CryptonightR_instruction53, + CryptonightR_instruction54, + CryptonightR_instruction55, + CryptonightR_instruction56, + CryptonightR_instruction57, + CryptonightR_instruction58, + CryptonightR_instruction59, + CryptonightR_instruction60, + CryptonightR_instruction61, + CryptonightR_instruction62, + CryptonightR_instruction63, + CryptonightR_instruction64, + CryptonightR_instruction65, + CryptonightR_instruction66, + CryptonightR_instruction67, + CryptonightR_instruction68, + CryptonightR_instruction69, + CryptonightR_instruction70, + CryptonightR_instruction71, + CryptonightR_instruction72, + CryptonightR_instruction73, + CryptonightR_instruction74, + CryptonightR_instruction75, + CryptonightR_instruction76, + CryptonightR_instruction77, + CryptonightR_instruction78, + CryptonightR_instruction79, + CryptonightR_instruction80, + CryptonightR_instruction81, + CryptonightR_instruction82, + CryptonightR_instruction83, + CryptonightR_instruction84, + CryptonightR_instruction85, + CryptonightR_instruction86, + CryptonightR_instruction87, + CryptonightR_instruction88, + CryptonightR_instruction89, + CryptonightR_instruction90, + CryptonightR_instruction91, + CryptonightR_instruction92, + CryptonightR_instruction93, + CryptonightR_instruction94, + CryptonightR_instruction95, + CryptonightR_instruction96, + CryptonightR_instruction97, + CryptonightR_instruction98, + CryptonightR_instruction99, + CryptonightR_instruction100, + CryptonightR_instruction101, + CryptonightR_instruction102, + CryptonightR_instruction103, + CryptonightR_instruction104, + CryptonightR_instruction105, + CryptonightR_instruction106, + CryptonightR_instruction107, + CryptonightR_instruction108, + CryptonightR_instruction109, + CryptonightR_instruction110, + CryptonightR_instruction111, + CryptonightR_instruction112, + CryptonightR_instruction113, + CryptonightR_instruction114, + CryptonightR_instruction115, + CryptonightR_instruction116, + CryptonightR_instruction117, + CryptonightR_instruction118, + CryptonightR_instruction119, + CryptonightR_instruction120, + CryptonightR_instruction121, + CryptonightR_instruction122, + CryptonightR_instruction123, + CryptonightR_instruction124, + CryptonightR_instruction125, + CryptonightR_instruction126, + CryptonightR_instruction127, + CryptonightR_instruction128, + CryptonightR_instruction129, + CryptonightR_instruction130, + CryptonightR_instruction131, + CryptonightR_instruction132, + CryptonightR_instruction133, + CryptonightR_instruction134, + CryptonightR_instruction135, + CryptonightR_instruction136, + CryptonightR_instruction137, + CryptonightR_instruction138, + CryptonightR_instruction139, + CryptonightR_instruction140, + CryptonightR_instruction141, + CryptonightR_instruction142, + CryptonightR_instruction143, + CryptonightR_instruction144, + CryptonightR_instruction145, + CryptonightR_instruction146, + CryptonightR_instruction147, + CryptonightR_instruction148, + CryptonightR_instruction149, + CryptonightR_instruction150, + CryptonightR_instruction151, + CryptonightR_instruction152, + CryptonightR_instruction153, + CryptonightR_instruction154, + CryptonightR_instruction155, + CryptonightR_instruction156, + CryptonightR_instruction157, + CryptonightR_instruction158, + CryptonightR_instruction159, + CryptonightR_instruction160, + CryptonightR_instruction161, + CryptonightR_instruction162, + CryptonightR_instruction163, + CryptonightR_instruction164, + CryptonightR_instruction165, + CryptonightR_instruction166, + CryptonightR_instruction167, + CryptonightR_instruction168, + CryptonightR_instruction169, + CryptonightR_instruction170, + CryptonightR_instruction171, + CryptonightR_instruction172, + CryptonightR_instruction173, + CryptonightR_instruction174, + CryptonightR_instruction175, + CryptonightR_instruction176, + CryptonightR_instruction177, + CryptonightR_instruction178, + CryptonightR_instruction179, + CryptonightR_instruction180, + CryptonightR_instruction181, + CryptonightR_instruction182, + CryptonightR_instruction183, + CryptonightR_instruction184, + CryptonightR_instruction185, + CryptonightR_instruction186, + CryptonightR_instruction187, + CryptonightR_instruction188, + CryptonightR_instruction189, + CryptonightR_instruction190, + CryptonightR_instruction191, + CryptonightR_instruction192, + CryptonightR_instruction193, + CryptonightR_instruction194, + CryptonightR_instruction195, + CryptonightR_instruction196, + CryptonightR_instruction197, + CryptonightR_instruction198, + CryptonightR_instruction199, + CryptonightR_instruction200, + CryptonightR_instruction201, + CryptonightR_instruction202, + CryptonightR_instruction203, + CryptonightR_instruction204, + CryptonightR_instruction205, + CryptonightR_instruction206, + CryptonightR_instruction207, + CryptonightR_instruction208, + CryptonightR_instruction209, + CryptonightR_instruction210, + CryptonightR_instruction211, + CryptonightR_instruction212, + CryptonightR_instruction213, + CryptonightR_instruction214, + CryptonightR_instruction215, + CryptonightR_instruction216, + CryptonightR_instruction217, + CryptonightR_instruction218, + CryptonightR_instruction219, + CryptonightR_instruction220, + CryptonightR_instruction221, + CryptonightR_instruction222, + CryptonightR_instruction223, + CryptonightR_instruction224, + CryptonightR_instruction225, + CryptonightR_instruction226, + CryptonightR_instruction227, + CryptonightR_instruction228, + CryptonightR_instruction229, + CryptonightR_instruction230, + CryptonightR_instruction231, + CryptonightR_instruction232, + CryptonightR_instruction233, + CryptonightR_instruction234, + CryptonightR_instruction235, + CryptonightR_instruction236, + CryptonightR_instruction237, + CryptonightR_instruction238, + CryptonightR_instruction239, + CryptonightR_instruction240, + CryptonightR_instruction241, + CryptonightR_instruction242, + CryptonightR_instruction243, + CryptonightR_instruction244, + CryptonightR_instruction245, + CryptonightR_instruction246, + CryptonightR_instruction247, + CryptonightR_instruction248, + CryptonightR_instruction249, + CryptonightR_instruction250, + CryptonightR_instruction251, + CryptonightR_instruction252, + CryptonightR_instruction253, + CryptonightR_instruction254, + CryptonightR_instruction255, + CryptonightR_instruction256, +}; + +const void_func instructions_mov[257] = { + CryptonightR_instruction_mov0, + CryptonightR_instruction_mov1, + CryptonightR_instruction_mov2, + CryptonightR_instruction_mov3, + CryptonightR_instruction_mov4, + CryptonightR_instruction_mov5, + CryptonightR_instruction_mov6, + CryptonightR_instruction_mov7, + CryptonightR_instruction_mov8, + CryptonightR_instruction_mov9, + CryptonightR_instruction_mov10, + CryptonightR_instruction_mov11, + CryptonightR_instruction_mov12, + CryptonightR_instruction_mov13, + CryptonightR_instruction_mov14, + CryptonightR_instruction_mov15, + CryptonightR_instruction_mov16, + CryptonightR_instruction_mov17, + CryptonightR_instruction_mov18, + CryptonightR_instruction_mov19, + CryptonightR_instruction_mov20, + CryptonightR_instruction_mov21, + CryptonightR_instruction_mov22, + CryptonightR_instruction_mov23, + CryptonightR_instruction_mov24, + CryptonightR_instruction_mov25, + CryptonightR_instruction_mov26, + CryptonightR_instruction_mov27, + CryptonightR_instruction_mov28, + CryptonightR_instruction_mov29, + CryptonightR_instruction_mov30, + CryptonightR_instruction_mov31, + CryptonightR_instruction_mov32, + CryptonightR_instruction_mov33, + CryptonightR_instruction_mov34, + CryptonightR_instruction_mov35, + CryptonightR_instruction_mov36, + CryptonightR_instruction_mov37, + CryptonightR_instruction_mov38, + CryptonightR_instruction_mov39, + CryptonightR_instruction_mov40, + CryptonightR_instruction_mov41, + CryptonightR_instruction_mov42, + CryptonightR_instruction_mov43, + CryptonightR_instruction_mov44, + CryptonightR_instruction_mov45, + CryptonightR_instruction_mov46, + CryptonightR_instruction_mov47, + CryptonightR_instruction_mov48, + CryptonightR_instruction_mov49, + CryptonightR_instruction_mov50, + CryptonightR_instruction_mov51, + CryptonightR_instruction_mov52, + CryptonightR_instruction_mov53, + CryptonightR_instruction_mov54, + CryptonightR_instruction_mov55, + CryptonightR_instruction_mov56, + CryptonightR_instruction_mov57, + CryptonightR_instruction_mov58, + CryptonightR_instruction_mov59, + CryptonightR_instruction_mov60, + CryptonightR_instruction_mov61, + CryptonightR_instruction_mov62, + CryptonightR_instruction_mov63, + CryptonightR_instruction_mov64, + CryptonightR_instruction_mov65, + CryptonightR_instruction_mov66, + CryptonightR_instruction_mov67, + CryptonightR_instruction_mov68, + CryptonightR_instruction_mov69, + CryptonightR_instruction_mov70, + CryptonightR_instruction_mov71, + CryptonightR_instruction_mov72, + CryptonightR_instruction_mov73, + CryptonightR_instruction_mov74, + CryptonightR_instruction_mov75, + CryptonightR_instruction_mov76, + CryptonightR_instruction_mov77, + CryptonightR_instruction_mov78, + CryptonightR_instruction_mov79, + CryptonightR_instruction_mov80, + CryptonightR_instruction_mov81, + CryptonightR_instruction_mov82, + CryptonightR_instruction_mov83, + CryptonightR_instruction_mov84, + CryptonightR_instruction_mov85, + CryptonightR_instruction_mov86, + CryptonightR_instruction_mov87, + CryptonightR_instruction_mov88, + CryptonightR_instruction_mov89, + CryptonightR_instruction_mov90, + CryptonightR_instruction_mov91, + CryptonightR_instruction_mov92, + CryptonightR_instruction_mov93, + CryptonightR_instruction_mov94, + CryptonightR_instruction_mov95, + CryptonightR_instruction_mov96, + CryptonightR_instruction_mov97, + CryptonightR_instruction_mov98, + CryptonightR_instruction_mov99, + CryptonightR_instruction_mov100, + CryptonightR_instruction_mov101, + CryptonightR_instruction_mov102, + CryptonightR_instruction_mov103, + CryptonightR_instruction_mov104, + CryptonightR_instruction_mov105, + CryptonightR_instruction_mov106, + CryptonightR_instruction_mov107, + CryptonightR_instruction_mov108, + CryptonightR_instruction_mov109, + CryptonightR_instruction_mov110, + CryptonightR_instruction_mov111, + CryptonightR_instruction_mov112, + CryptonightR_instruction_mov113, + CryptonightR_instruction_mov114, + CryptonightR_instruction_mov115, + CryptonightR_instruction_mov116, + CryptonightR_instruction_mov117, + CryptonightR_instruction_mov118, + CryptonightR_instruction_mov119, + CryptonightR_instruction_mov120, + CryptonightR_instruction_mov121, + CryptonightR_instruction_mov122, + CryptonightR_instruction_mov123, + CryptonightR_instruction_mov124, + CryptonightR_instruction_mov125, + CryptonightR_instruction_mov126, + CryptonightR_instruction_mov127, + CryptonightR_instruction_mov128, + CryptonightR_instruction_mov129, + CryptonightR_instruction_mov130, + CryptonightR_instruction_mov131, + CryptonightR_instruction_mov132, + CryptonightR_instruction_mov133, + CryptonightR_instruction_mov134, + CryptonightR_instruction_mov135, + CryptonightR_instruction_mov136, + CryptonightR_instruction_mov137, + CryptonightR_instruction_mov138, + CryptonightR_instruction_mov139, + CryptonightR_instruction_mov140, + CryptonightR_instruction_mov141, + CryptonightR_instruction_mov142, + CryptonightR_instruction_mov143, + CryptonightR_instruction_mov144, + CryptonightR_instruction_mov145, + CryptonightR_instruction_mov146, + CryptonightR_instruction_mov147, + CryptonightR_instruction_mov148, + CryptonightR_instruction_mov149, + CryptonightR_instruction_mov150, + CryptonightR_instruction_mov151, + CryptonightR_instruction_mov152, + CryptonightR_instruction_mov153, + CryptonightR_instruction_mov154, + CryptonightR_instruction_mov155, + CryptonightR_instruction_mov156, + CryptonightR_instruction_mov157, + CryptonightR_instruction_mov158, + CryptonightR_instruction_mov159, + CryptonightR_instruction_mov160, + CryptonightR_instruction_mov161, + CryptonightR_instruction_mov162, + CryptonightR_instruction_mov163, + CryptonightR_instruction_mov164, + CryptonightR_instruction_mov165, + CryptonightR_instruction_mov166, + CryptonightR_instruction_mov167, + CryptonightR_instruction_mov168, + CryptonightR_instruction_mov169, + CryptonightR_instruction_mov170, + CryptonightR_instruction_mov171, + CryptonightR_instruction_mov172, + CryptonightR_instruction_mov173, + CryptonightR_instruction_mov174, + CryptonightR_instruction_mov175, + CryptonightR_instruction_mov176, + CryptonightR_instruction_mov177, + CryptonightR_instruction_mov178, + CryptonightR_instruction_mov179, + CryptonightR_instruction_mov180, + CryptonightR_instruction_mov181, + CryptonightR_instruction_mov182, + CryptonightR_instruction_mov183, + CryptonightR_instruction_mov184, + CryptonightR_instruction_mov185, + CryptonightR_instruction_mov186, + CryptonightR_instruction_mov187, + CryptonightR_instruction_mov188, + CryptonightR_instruction_mov189, + CryptonightR_instruction_mov190, + CryptonightR_instruction_mov191, + CryptonightR_instruction_mov192, + CryptonightR_instruction_mov193, + CryptonightR_instruction_mov194, + CryptonightR_instruction_mov195, + CryptonightR_instruction_mov196, + CryptonightR_instruction_mov197, + CryptonightR_instruction_mov198, + CryptonightR_instruction_mov199, + CryptonightR_instruction_mov200, + CryptonightR_instruction_mov201, + CryptonightR_instruction_mov202, + CryptonightR_instruction_mov203, + CryptonightR_instruction_mov204, + CryptonightR_instruction_mov205, + CryptonightR_instruction_mov206, + CryptonightR_instruction_mov207, + CryptonightR_instruction_mov208, + CryptonightR_instruction_mov209, + CryptonightR_instruction_mov210, + CryptonightR_instruction_mov211, + CryptonightR_instruction_mov212, + CryptonightR_instruction_mov213, + CryptonightR_instruction_mov214, + CryptonightR_instruction_mov215, + CryptonightR_instruction_mov216, + CryptonightR_instruction_mov217, + CryptonightR_instruction_mov218, + CryptonightR_instruction_mov219, + CryptonightR_instruction_mov220, + CryptonightR_instruction_mov221, + CryptonightR_instruction_mov222, + CryptonightR_instruction_mov223, + CryptonightR_instruction_mov224, + CryptonightR_instruction_mov225, + CryptonightR_instruction_mov226, + CryptonightR_instruction_mov227, + CryptonightR_instruction_mov228, + CryptonightR_instruction_mov229, + CryptonightR_instruction_mov230, + CryptonightR_instruction_mov231, + CryptonightR_instruction_mov232, + CryptonightR_instruction_mov233, + CryptonightR_instruction_mov234, + CryptonightR_instruction_mov235, + CryptonightR_instruction_mov236, + CryptonightR_instruction_mov237, + CryptonightR_instruction_mov238, + CryptonightR_instruction_mov239, + CryptonightR_instruction_mov240, + CryptonightR_instruction_mov241, + CryptonightR_instruction_mov242, + CryptonightR_instruction_mov243, + CryptonightR_instruction_mov244, + CryptonightR_instruction_mov245, + CryptonightR_instruction_mov246, + CryptonightR_instruction_mov247, + CryptonightR_instruction_mov248, + CryptonightR_instruction_mov249, + CryptonightR_instruction_mov250, + CryptonightR_instruction_mov251, + CryptonightR_instruction_mov252, + CryptonightR_instruction_mov253, + CryptonightR_instruction_mov254, + CryptonightR_instruction_mov255, + CryptonightR_instruction_mov256, +}; diff --git a/src/crypto/asm/win64/CryptonightR_template.inc b/src/crypto/asm/win64/CryptonightR_template.inc new file mode 100644 index 00000000..f6e6ef45 --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_template.inc @@ -0,0 +1,478 @@ +PUBLIC FN_PREFIX(CryptonightR_template_part1) +PUBLIC FN_PREFIX(CryptonightR_template_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_part2) +PUBLIC FN_PREFIX(CryptonightR_template_part3) +PUBLIC FN_PREFIX(CryptonightR_template_end) +PUBLIC FN_PREFIX(CryptonightR_template_double_part1) +PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) +PUBLIC FN_PREFIX(CryptonightR_template_double_part2) +PUBLIC FN_PREFIX(CryptonightR_template_double_part3) +PUBLIC FN_PREFIX(CryptonightR_template_double_part4) +PUBLIC FN_PREFIX(CryptonightR_template_double_end) + +FN_PREFIX(CryptonightR_template_part1): + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movd xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movd xmm0, r12 + movd xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movd xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_mainloop): + movdqa xmm5, XMMWORD PTR [r9+r11] + movd xmm0, r15 + movd xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movd r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + +FN_PREFIX(CryptonightR_template_part2): + mov rax, r13 + mul r12 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz FN_PREFIX(CryptonightR_template_mainloop) + +FN_PREFIX(CryptonightR_template_part3): + movd rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +FN_PREFIX(CryptonightR_template_end): + +ALIGN(64) +FN_PREFIX(CryptonightR_template_double_part1): + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movd xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movd xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movd xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movd xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movd xmm0, rcx + mov r11d, 524288 + movd xmm10, rax + punpcklqdq xmm10, xmm0 + + movd xmm14, QWORD PTR [rsp+128] + movd xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +FN_PREFIX(CryptonightR_template_double_mainloop): + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movd xmm0, r12 + mov ecx, ebx + movd xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movd rdx, xmm6 + movd xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movd xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movd rdi, xmm5 + movd rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movd xmm0, rsp + movd xmm1, rsi + movd xmm2, rdi + movd xmm11, rbp + movd xmm12, r15 + movd xmm13, rdx + mov [rsp+112], rcx + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + +FN_PREFIX(CryptonightR_template_double_part2): + + movd rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movd rsi, xmm1 + movd rdi, xmm2 + movd rbp, xmm11 + movd r15, xmm12 + movd rdx, xmm13 + mov rcx, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movd xmm1, rdx + movd xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movd rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movd xmm0, rsp + movd xmm1, rbx + movd xmm2, rsi + movd xmm11, rdi + movd xmm12, rbp + movd xmm13, r15 + mov [rsp+104], rcx + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movd xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + +FN_PREFIX(CryptonightR_template_double_part3): + + movd rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movd rbx, xmm1 + movd rsi, xmm2 + movd rdi, xmm11 + movd rbp, xmm12 + movd r15, xmm13 + mov rcx, [rsp+104] + + mov rax, r8 + mul rdi + movd xmm1, rdx + movd xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movd rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz FN_PREFIX(CryptonightR_template_double_mainloop) + +FN_PREFIX(CryptonightR_template_double_part4): + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +FN_PREFIX(CryptonightR_template_double_end): diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc new file mode 100644 index 00000000..b0217e04 --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_template_win.inc @@ -0,0 +1,478 @@ +PUBLIC CryptonightR_template_part1 +PUBLIC CryptonightR_template_mainloop +PUBLIC CryptonightR_template_part2 +PUBLIC CryptonightR_template_part3 +PUBLIC CryptonightR_template_end +PUBLIC CryptonightR_template_double_part1 +PUBLIC CryptonightR_template_double_mainloop +PUBLIC CryptonightR_template_double_part2 +PUBLIC CryptonightR_template_double_part3 +PUBLIC CryptonightR_template_double_part4 +PUBLIC CryptonightR_template_double_end + +CryptonightR_template_part1: + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push rdi + sub rsp, 64 + mov r12, rcx + mov r8, QWORD PTR [r12+32] + mov rdx, r12 + xor r8, QWORD PTR [r12] + mov r15, QWORD PTR [r12+40] + mov r9, r8 + xor r15, QWORD PTR [r12+8] + mov r11, QWORD PTR [r12+224] + mov r12, QWORD PTR [r12+56] + xor r12, QWORD PTR [rdx+24] + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm0, r12 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + movaps XMMWORD PTR [rsp], xmm9 + mov r12, QWORD PTR [rdx+88] + xor r12, QWORD PTR [rdx+72] + movd xmm6, rax + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm6, xmm0 + and r9d, 2097136 + movd xmm0, r12 + movd xmm7, rax + punpcklqdq xmm7, xmm0 + mov r10d, r9d + movd xmm9, rsp + mov rsp, r8 + mov r8d, 524288 + + mov ebx, [rdx+96] + mov esi, [rdx+100] + mov edi, [rdx+104] + mov ebp, [rdx+108] + + ALIGN(64) +CryptonightR_template_mainloop: + movdqa xmm5, XMMWORD PTR [r9+r11] + movd xmm0, r15 + movd xmm4, rsp + punpcklqdq xmm4, xmm0 + lea rdx, QWORD PTR [r9+r11] + + aesenc xmm5, xmm4 + movd r10d, xmm5 + and r10d, 2097136 + + mov r12d, r9d + mov eax, r9d + xor r9d, 48 + xor r12d, 16 + xor eax, 32 + movdqu xmm0, XMMWORD PTR [r9+r11] + movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm1, XMMWORD PTR [rax+r11] + paddq xmm0, xmm7 + paddq xmm2, xmm6 + paddq xmm1, xmm4 + movdqu XMMWORD PTR [r12+r11], xmm0 + movd r12, xmm5 + movdqu XMMWORD PTR [rax+r11], xmm2 + movdqu XMMWORD PTR [r9+r11], xmm1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [rdx], xmm0 + + lea r13d, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or r13, rdx + + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + + movd eax, xmm6 + movd edx, xmm7 + +CryptonightR_template_part2: + mov rax, r13 + mul r12 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + mov r9d, r10d + mov r12d, r10d + xor r9d, 16 + xor r12d, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [r12+r11] + xor rdx, QWORD PTR [r12+r11] + xor rax, QWORD PTR [r11+r12+8] + movdqa xmm2, XMMWORD PTR [r9+r11] + pxor xmm3, xmm2 + paddq xmm7, XMMWORD PTR [r10+r11] + paddq xmm1, xmm4 + paddq xmm3, xmm6 + movdqu XMMWORD PTR [r9+r11], xmm7 + movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r10+r11], xmm1 + + movdqa xmm7, xmm6 + add r15, rax + add rsp, rdx + xor r10, 48 + mov QWORD PTR [r10+r11], rsp + xor rsp, r13 + mov r9d, esp + mov QWORD PTR [r10+r11+8], r15 + and r9d, 2097136 + xor r15, r14 + movdqa xmm6, xmm5 + dec r8d + jnz CryptonightR_template_mainloop + +CryptonightR_template_part3: + movd rsp, xmm9 + + mov rbx, QWORD PTR [rsp+136] + mov rbp, QWORD PTR [rsp+144] + mov rsi, QWORD PTR [rsp+152] + movaps xmm6, XMMWORD PTR [rsp+48] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+16] + movaps xmm9, XMMWORD PTR [rsp] + add rsp, 64 + pop rdi + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret 0 +CryptonightR_template_end: + +ALIGN(64) +CryptonightR_template_double_part1: + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 320 + mov r14, QWORD PTR [rcx+32] + mov r8, rcx + xor r14, QWORD PTR [rcx] + mov r12, QWORD PTR [rcx+40] + mov ebx, r14d + mov rsi, QWORD PTR [rcx+224] + and ebx, 2097136 + xor r12, QWORD PTR [rcx+8] + mov rcx, QWORD PTR [rcx+56] + xor rcx, QWORD PTR [r8+24] + mov rax, QWORD PTR [r8+48] + xor rax, QWORD PTR [r8+16] + mov r15, QWORD PTR [rdx+32] + xor r15, QWORD PTR [rdx] + movd xmm0, rcx + mov rcx, QWORD PTR [r8+88] + xor rcx, QWORD PTR [r8+72] + mov r13, QWORD PTR [rdx+40] + mov rdi, QWORD PTR [rdx+224] + xor r13, QWORD PTR [rdx+8] + movaps XMMWORD PTR [rsp+160], xmm6 + movaps XMMWORD PTR [rsp+176], xmm7 + movaps XMMWORD PTR [rsp+192], xmm8 + movaps XMMWORD PTR [rsp+208], xmm9 + movaps XMMWORD PTR [rsp+224], xmm10 + movaps XMMWORD PTR [rsp+240], xmm11 + movaps XMMWORD PTR [rsp+256], xmm12 + movaps XMMWORD PTR [rsp+272], xmm13 + movaps XMMWORD PTR [rsp+288], xmm14 + movaps XMMWORD PTR [rsp+304], xmm15 + movd xmm7, rax + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + + movaps xmm1, XMMWORD PTR [rdx+96] + movaps xmm2, XMMWORD PTR [r8+96] + movaps XMMWORD PTR [rsp], xmm1 + movaps XMMWORD PTR [rsp+16], xmm2 + + mov r8d, r15d + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+56] + xor rcx, QWORD PTR [rdx+24] + movd xmm9, rax + mov QWORD PTR [rsp+128], rsi + mov rax, QWORD PTR [rdx+48] + xor rax, QWORD PTR [rdx+16] + punpcklqdq xmm9, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [rdx+88] + xor rcx, QWORD PTR [rdx+72] + movd xmm8, rax + mov QWORD PTR [rsp+136], rdi + mov rax, QWORD PTR [rdx+80] + xor rax, QWORD PTR [rdx+64] + punpcklqdq xmm8, xmm0 + and r8d, 2097136 + movd xmm0, rcx + mov r11d, 524288 + movd xmm10, rax + punpcklqdq xmm10, xmm0 + + movd xmm14, QWORD PTR [rsp+128] + movd xmm15, QWORD PTR [rsp+136] + + ALIGN(64) +CryptonightR_template_double_mainloop: + movdqu xmm6, XMMWORD PTR [rbx+rsi] + movd xmm0, r12 + mov ecx, ebx + movd xmm3, r14 + punpcklqdq xmm3, xmm0 + xor ebx, 16 + aesenc xmm6, xmm3 + movd rdx, xmm6 + movd xmm4, r15 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + xor ebx, 48 + paddq xmm0, xmm7 + movdqu xmm1, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm0 + paddq xmm1, xmm3 + xor ebx, 16 + mov eax, ebx + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbx+rsi] + movdqu XMMWORD PTR [rbx+rsi], xmm1 + paddq xmm0, xmm9 + movdqu XMMWORD PTR [rax+rsi], xmm0 + movdqa xmm0, xmm6 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [rcx+rsi], xmm0 + mov esi, edx + movdqu xmm5, XMMWORD PTR [r8+rdi] + and esi, 2097136 + mov ecx, r8d + movd xmm0, r13 + punpcklqdq xmm4, xmm0 + xor r8d, 16 + aesenc xmm5, xmm4 + movdqu xmm0, XMMWORD PTR [r8+rdi] + xor r8d, 48 + paddq xmm0, xmm8 + movdqu xmm1, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm0 + paddq xmm1, xmm4 + xor r8d, 16 + mov eax, r8d + xor rax, 32 + movdqu xmm0, XMMWORD PTR [r8+rdi] + movdqu XMMWORD PTR [r8+rdi], xmm1 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rdi], xmm0 + movdqa xmm0, xmm5 + pxor xmm0, xmm8 + movdqu XMMWORD PTR [rcx+rdi], xmm0 + movd rdi, xmm5 + movd rcx, xmm14 + mov ebp, edi + mov r8, QWORD PTR [rcx+rsi] + mov r10, QWORD PTR [rcx+rsi+8] + lea r9, QWORD PTR [rcx+rsi] + xor esi, 16 + + movd xmm0, rsp + movd xmm1, rsi + movd xmm2, rdi + movd xmm11, rbp + movd xmm12, r15 + movd xmm13, rdx + mov [rsp+112], rcx + + mov ebx, DWORD PTR [rsp+16] + mov esi, DWORD PTR [rsp+20] + mov edi, DWORD PTR [rsp+24] + mov ebp, DWORD PTR [rsp+28] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + xor r8, rax + + movd esp, xmm3 + pextrd r15d, xmm3, 2 + movd eax, xmm7 + movd edx, xmm9 + +CryptonightR_template_double_part2: + + movd rsp, xmm0 + mov DWORD PTR [rsp+16], ebx + mov DWORD PTR [rsp+20], esi + mov DWORD PTR [rsp+24], edi + mov DWORD PTR [rsp+28], ebp + + movd rsi, xmm1 + movd rdi, xmm2 + movd rbp, xmm11 + movd r15, xmm12 + movd rdx, xmm13 + mov rcx, [rsp+112] + + mov rbx, r8 + mov rax, r8 + mul rdx + and ebp, 2097136 + mov r8, rax + movd xmm1, rdx + movd xmm0, r8 + punpcklqdq xmm1, xmm0 + pxor xmm1, XMMWORD PTR [rcx+rsi] + xor esi, 48 + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rsi+rcx] + xor rdx, QWORD PTR [rsi+rcx] + paddq xmm2, xmm3 + xor r8, QWORD PTR [rsi+rcx+8] + movdqu XMMWORD PTR [rsi+rcx], xmm1 + xor esi, 16 + mov eax, esi + mov rsi, rcx + movdqu xmm0, XMMWORD PTR [rax+rcx] + movdqu XMMWORD PTR [rax+rcx], xmm2 + paddq xmm0, xmm9 + add r12, r8 + xor rax, 32 + add r14, rdx + movdqa xmm9, xmm7 + movdqa xmm7, xmm6 + movdqu XMMWORD PTR [rax+rcx], xmm0 + mov QWORD PTR [r9+8], r12 + xor r12, r10 + mov QWORD PTR [r9], r14 + movd rcx, xmm15 + xor r14, rbx + mov r10d, ebp + mov ebx, r14d + xor ebp, 16 + and ebx, 2097136 + mov r8, QWORD PTR [r10+rcx] + mov r9, QWORD PTR [r10+rcx+8] + + movd xmm0, rsp + movd xmm1, rbx + movd xmm2, rsi + movd xmm11, rdi + movd xmm12, rbp + movd xmm13, r15 + mov [rsp+104], rcx + + mov ebx, DWORD PTR [rsp] + mov esi, DWORD PTR [rsp+4] + mov edi, DWORD PTR [rsp+8] + mov ebp, DWORD PTR [rsp+12] + + lea eax, [ebx+esi] + lea edx, [edi+ebp] + shl rdx, 32 + or rax, rdx + + xor r8, rax + movd xmm3, r8 + + movd esp, xmm4 + pextrd r15d, xmm4, 2 + movd eax, xmm8 + movd edx, xmm10 + +CryptonightR_template_double_part3: + + movd rsp, xmm0 + mov DWORD PTR [rsp], ebx + mov DWORD PTR [rsp+4], esi + mov DWORD PTR [rsp+8], edi + mov DWORD PTR [rsp+12], ebp + + movd rbx, xmm1 + movd rsi, xmm2 + movd rdi, xmm11 + movd rbp, xmm12 + movd r15, xmm13 + mov rcx, [rsp+104] + + mov rax, r8 + mul rdi + movd xmm1, rdx + movd xmm0, rax + punpcklqdq xmm1, xmm0 + mov rdi, rcx + mov r8, rax + pxor xmm1, XMMWORD PTR [rbp+rcx] + xor ebp, 48 + paddq xmm1, xmm8 + xor r8, QWORD PTR [rbp+rcx+8] + xor rdx, QWORD PTR [rbp+rcx] + add r13, r8 + movdqu xmm2, XMMWORD PTR [rbp+rcx] + add r15, rdx + movdqu XMMWORD PTR [rbp+rcx], xmm1 + paddq xmm2, xmm4 + xor ebp, 16 + mov eax, ebp + xor rax, 32 + movdqu xmm0, XMMWORD PTR [rbp+rcx] + movdqu XMMWORD PTR [rbp+rcx], xmm2 + paddq xmm0, xmm10 + movdqu XMMWORD PTR [rax+rcx], xmm0 + movd rax, xmm3 + movdqa xmm10, xmm8 + mov QWORD PTR [r10+rcx], r15 + movdqa xmm8, xmm5 + xor r15, rax + mov QWORD PTR [r10+rcx+8], r13 + mov r8d, r15d + xor r13, r9 + and r8d, 2097136 + dec r11d + jnz CryptonightR_template_double_mainloop + +CryptonightR_template_double_part4: + + mov rbx, QWORD PTR [rsp+400] + movaps xmm6, XMMWORD PTR [rsp+160] + movaps xmm7, XMMWORD PTR [rsp+176] + movaps xmm8, XMMWORD PTR [rsp+192] + movaps xmm9, XMMWORD PTR [rsp+208] + movaps xmm10, XMMWORD PTR [rsp+224] + movaps xmm11, XMMWORD PTR [rsp+240] + movaps xmm12, XMMWORD PTR [rsp+256] + movaps xmm13, XMMWORD PTR [rsp+272] + movaps xmm14, XMMWORD PTR [rsp+288] + movaps xmm15, XMMWORD PTR [rsp+304] + add rsp, 320 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + ret 0 +CryptonightR_template_double_end: diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc new file mode 100644 index 00000000..05af9393 --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -0,0 +1,410 @@ + mov rax, rsp + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 184 + + stmxcsr DWORD PTR [rsp+272] + mov DWORD PTR [rsp+276], 24448 + ldmxcsr DWORD PTR [rsp+276] + + mov r13, QWORD PTR [rcx+224] + mov r9, rdx + mov r10, QWORD PTR [rcx+32] + mov r8, rcx + xor r10, QWORD PTR [rcx] + mov r14d, 524288 + mov r11, QWORD PTR [rcx+40] + xor r11, QWORD PTR [rcx+8] + mov rsi, QWORD PTR [rdx+224] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov rdi, QWORD PTR [r9+32] + xor rdi, QWORD PTR [r9] + mov rbp, QWORD PTR [r9+40] + xor rbp, QWORD PTR [r9+8] + movd xmm0, rdx + movaps XMMWORD PTR [rax-88], xmm6 + movaps XMMWORD PTR [rax-104], xmm7 + movaps XMMWORD PTR [rax-120], xmm8 + movaps XMMWORD PTR [rsp+112], xmm9 + movaps XMMWORD PTR [rsp+96], xmm10 + movaps XMMWORD PTR [rsp+80], xmm11 + movaps XMMWORD PTR [rsp+64], xmm12 + movaps XMMWORD PTR [rsp+48], xmm13 + movaps XMMWORD PTR [rsp+32], xmm14 + movaps XMMWORD PTR [rsp+16], xmm15 + mov rdx, r10 + movd xmm4, QWORD PTR [r8+96] + and edx, 2097136 + mov rax, QWORD PTR [rcx+48] + xorps xmm13, xmm13 + xor rax, QWORD PTR [rcx+16] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r8+72] + movd xmm5, QWORD PTR [r8+104] + movd xmm7, rax + + mov eax, 1 + shl rax, 52 + movd xmm14, rax + punpcklqdq xmm14, xmm14 + + mov eax, 1023 + shl rax, 52 + movd xmm12, rax + punpcklqdq xmm12, xmm12 + + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [r9+56] + xor rcx, QWORD PTR [r9+24] + movd xmm3, rax + mov rax, QWORD PTR [r9+48] + xor rax, QWORD PTR [r9+16] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp], r13 + mov rcx, QWORD PTR [r9+88] + xor rcx, QWORD PTR [r9+72] + movd xmm6, rax + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + punpcklqdq xmm6, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp+256], r10 + mov rcx, rdi + mov QWORD PTR [rsp+264], r11 + movd xmm8, rax + and ecx, 2097136 + punpcklqdq xmm8, xmm0 + movd xmm0, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, QWORD PTR [r9+104] + lea r8, QWORD PTR [rcx+rsi] + movdqu xmm11, XMMWORD PTR [r8] + punpcklqdq xmm5, xmm0 + lea r9, QWORD PTR [rdx+r13] + movdqu xmm15, XMMWORD PTR [r9] + + ALIGN(64) +main_loop_double_sandybridge: + movdqu xmm9, xmm15 + mov eax, edx + mov ebx, edx + xor eax, 16 + xor ebx, 32 + xor edx, 48 + + movd xmm0, r11 + movd xmm2, r10 + punpcklqdq xmm2, xmm0 + aesenc xmm9, xmm2 + + movdqu xmm0, XMMWORD PTR [rax+r13] + movdqu xmm1, XMMWORD PTR [rbx+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [rbx+r13], xmm0 + movdqu xmm0, XMMWORD PTR [rdx+r13] + movdqu XMMWORD PTR [rdx+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [rax+r13], xmm0 + + movd r11, xmm9 + mov edx, r11d + and edx, 2097136 + movdqa xmm0, xmm9 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [r9], xmm0 + + lea rbx, QWORD PTR [rdx+r13] + mov r10, QWORD PTR [rdx+r13] + + movdqu xmm10, xmm11 + movd xmm0, rbp + movd xmm11, rdi + punpcklqdq xmm11, xmm0 + aesenc xmm10, xmm11 + + mov eax, ecx + mov r12d, ecx + xor eax, 16 + xor r12d, 32 + xor ecx, 48 + + movdqu xmm0, XMMWORD PTR [rax+rsi] + paddq xmm0, xmm6 + movdqu xmm1, XMMWORD PTR [r12+rsi] + movdqu XMMWORD PTR [r12+rsi], xmm0 + paddq xmm1, xmm11 + movdqu xmm0, XMMWORD PTR [rcx+rsi] + movdqu XMMWORD PTR [rcx+rsi], xmm1 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [rax+rsi], xmm0 + + movd rcx, xmm10 + and ecx, 2097136 + + movdqa xmm0, xmm10 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [r8], xmm0 + mov r12, QWORD PTR [rcx+rsi] + + mov r9, QWORD PTR [rbx+8] + + xor edx, 16 + mov r8d, edx + mov r15d, edx + + movd rdx, xmm5 + shl rdx, 32 + movd rax, xmm4 + xor rdx, rax + xor r10, rdx + mov rax, r10 + mul r11 + mov r11d, r8d + xor r11d, 48 + movd xmm0, rdx + xor rdx, [r11+r13] + movd xmm1, rax + xor rax, [r11+r13+8] + punpcklqdq xmm0, xmm1 + + pxor xmm0, XMMWORD PTR [r8+r13] + xor r8d, 32 + movdqu xmm1, XMMWORD PTR [r11+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [r11+r13], xmm0 + movdqu xmm0, XMMWORD PTR [r8+r13] + movdqu XMMWORD PTR [r8+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [r15+r13], xmm0 + + mov r11, QWORD PTR [rsp+256] + add r11, rdx + mov rdx, QWORD PTR [rsp+264] + add rdx, rax + mov QWORD PTR [rbx], r11 + xor r11, r10 + mov QWORD PTR [rbx+8], rdx + xor rdx, r9 + mov QWORD PTR [rsp+256], r11 + and r11d, 2097136 + mov QWORD PTR [rsp+264], rdx + mov QWORD PTR [rsp+8], r11 + lea r15, QWORD PTR [r11+r13] + movdqu xmm15, XMMWORD PTR [r11+r13] + lea r13, QWORD PTR [rsi+rcx] + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movaps xmm2, xmm13 + movd r10, xmm0 + psllq xmm5, 1 + shl r10, 32 + movdqa xmm0, xmm9 + psrldq xmm0, 8 + movdqa xmm1, xmm10 + movd r11, xmm0 + psrldq xmm1, 8 + movd r8, xmm1 + psrldq xmm4, 8 + movaps xmm0, xmm13 + movd rax, xmm4 + xor r10, rax + movaps xmm1, xmm13 + xor r10, r12 + lea rax, QWORD PTR [r11+1] + shr rax, 1 + movdqa xmm3, xmm9 + punpcklqdq xmm3, xmm10 + paddq xmm5, xmm3 + movd rdx, xmm5 + psrldq xmm5, 8 + cvtsi2sd xmm2, rax + or edx, -2147483647 + lea rax, QWORD PTR [r8+1] + shr rax, 1 + movd r9, xmm5 + cvtsi2sd xmm0, rax + or r9d, -2147483647 + cvtsi2sd xmm1, rdx + unpcklpd xmm2, xmm0 + movaps xmm0, xmm13 + cvtsi2sd xmm0, r9 + unpcklpd xmm1, xmm0 + divpd xmm2, xmm1 + paddq xmm2, xmm14 + cvttsd2si rax, xmm2 + psrldq xmm2, 8 + mov rbx, rax + imul rax, rdx + sub r11, rax + js div_fix_1_sandybridge +div_fix_1_ret_sandybridge: + + cvttsd2si rdx, xmm2 + mov rax, rdx + imul rax, r9 + movd xmm2, r11d + movd xmm4, ebx + sub r8, rax + js div_fix_2_sandybridge +div_fix_2_ret_sandybridge: + + movd xmm1, r8d + movd xmm0, edx + punpckldq xmm2, xmm1 + punpckldq xmm4, xmm0 + punpckldq xmm4, xmm2 + paddq xmm3, xmm4 + movdqa xmm0, xmm3 + psrlq xmm0, 12 + paddq xmm0, xmm12 + sqrtpd xmm1, xmm0 + movd r9, xmm1 + movdqa xmm5, xmm1 + psrlq xmm5, 19 + test r9, 524287 + je sqrt_fix_1_sandybridge +sqrt_fix_1_ret_sandybridge: + + movd r9, xmm10 + psrldq xmm1, 8 + movd r8, xmm1 + test r8, 524287 + je sqrt_fix_2_sandybridge +sqrt_fix_2_ret_sandybridge: + + mov r12d, ecx + mov r8d, ecx + xor r12d, 16 + xor r8d, 32 + xor ecx, 48 + mov rax, r10 + mul r9 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + movdqu xmm0, XMMWORD PTR [r12+rsi] + pxor xmm0, xmm3 + movdqu xmm1, XMMWORD PTR [r8+rsi] + xor rdx, [r8+rsi] + xor rax, [r8+rsi+8] + movdqu xmm3, XMMWORD PTR [rcx+rsi] + paddq xmm0, xmm6 + paddq xmm1, xmm11 + paddq xmm3, xmm8 + movdqu XMMWORD PTR [r8+rsi], xmm0 + movdqu XMMWORD PTR [rcx+rsi], xmm1 + movdqu XMMWORD PTR [r12+rsi], xmm3 + + add rdi, rdx + mov QWORD PTR [r13], rdi + xor rdi, r10 + mov ecx, edi + and ecx, 2097136 + lea r8, QWORD PTR [rcx+rsi] + + mov rdx, QWORD PTR [r13+8] + add rbp, rax + mov QWORD PTR [r13+8], rbp + movdqu xmm11, XMMWORD PTR [rcx+rsi] + xor rbp, rdx + mov r13, QWORD PTR [rsp] + movdqa xmm3, xmm7 + mov rdx, QWORD PTR [rsp+8] + movdqa xmm8, xmm6 + mov r10, QWORD PTR [rsp+256] + movdqa xmm7, xmm9 + mov r11, QWORD PTR [rsp+264] + movdqa xmm6, xmm10 + mov r9, r15 + dec r14d + jne main_loop_double_sandybridge + + ldmxcsr DWORD PTR [rsp+272] + movaps xmm13, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+184] + movaps xmm6, XMMWORD PTR [r11-24] + movaps xmm7, XMMWORD PTR [r11-40] + movaps xmm8, XMMWORD PTR [r11-56] + movaps xmm9, XMMWORD PTR [r11-72] + movaps xmm10, XMMWORD PTR [r11-88] + movaps xmm11, XMMWORD PTR [r11-104] + movaps xmm12, XMMWORD PTR [r11-120] + movaps xmm14, XMMWORD PTR [rsp+32] + movaps xmm15, XMMWORD PTR [rsp+16] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + jmp cnv2_double_mainloop_asm_sandybridge_endp + +div_fix_1_sandybridge: + dec rbx + add r11, rdx + jmp div_fix_1_ret_sandybridge + +div_fix_2_sandybridge: + dec rdx + add r8, r9 + jmp div_fix_2_ret_sandybridge + +sqrt_fix_1_sandybridge: + movd r8, xmm3 + movdqa xmm0, xmm5 + psrldq xmm0, 8 + dec r9 + mov r11d, -1022 + shl r11, 32 + mov rax, r9 + shr r9, 19 + shr rax, 20 + mov rdx, r9 + sub rdx, rax + lea rdx, [rdx+r11+1] + add rax, r11 + imul rdx, rax + sub rdx, r8 + adc r9, 0 + movd xmm5, r9 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_1_ret_sandybridge + +sqrt_fix_2_sandybridge: + psrldq xmm3, 8 + movd r11, xmm3 + dec r8 + mov ebx, -1022 + shl rbx, 32 + mov rax, r8 + shr r8, 19 + shr rax, 20 + mov rdx, r8 + sub rdx, rax + lea rdx, [rdx+rbx+1] + add rax, rbx + imul rdx, rax + sub rdx, r11 + adc r8, 0 + movd xmm0, r8 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_2_ret_sandybridge + +cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc new file mode 100644 index 00000000..03a36f48 --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -0,0 +1,180 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movd xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN(64) +cnv2_main_loop_bulldozer: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movd xmm6, r8 + pinsrq xmm6, r11, 1 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + + mov edi, 1023 + shl rdi, 52 + + movd r14, xmm5 + pextrq rax, xmm5, 1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + div r9 + mov eax, eax + shl rdx, 32 + lea r15, [rax+rdx] + lea rax, [r14+r15] + shr rax, 12 + add rax, rdi + movd xmm0, rax + sqrtsd xmm1, xmm0 + movd rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_bulldozer + shr rdi, 19 + +sqrt_fixup_bulldozer_ret: + mov rax, rsi + mul r14 + movd xmm1, rax + movd xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne cnv2_main_loop_bulldozer + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_bulldozer_endp + +sqrt_fixup_bulldozer: + movd r9, xmm5 + add r9, r15 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_bulldozer_ret + +cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc new file mode 100644 index 00000000..77e28f80 --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -0,0 +1,186 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 524288 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movd xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movd xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movd xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, rcx + punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] + + ALIGN(64) +main_loop_ivybridge: + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movd xmm0, r11 + movd xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movd rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm2, XMMWORD PTR [rcx+rbx] + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm1, xmm7 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [rax+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movd rcx, xmm3 + mov rax, rcx + shl rax, 32 + xor rdi, rax + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movd rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movd xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movd rdx, xmm3 + test edx, 524287 + je sqrt_fixup_ivybridge + psrlq xmm3, 19 +sqrt_fixup_ivybridge_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movd xmm2, rdx + xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 + movd xmm0, rax + xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm4 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm0 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + movdqu xmm6, [rdi+rbx] + mov r10d, edi + xor r11, r12 + dec rsi + jne main_loop_ivybridge + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp cnv2_main_loop_ivybridge_endp + +sqrt_fixup_ivybridge: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movd xmm3, rdx + jmp sqrt_fixup_ivybridge_ret + +cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc new file mode 100644 index 00000000..7e5c127f --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -0,0 +1,179 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movd xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN(64) +main_loop_ryzen: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movd xmm0, r11 + movd xmm6, r8 + punpcklqdq xmm6, xmm0 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + movd r14, xmm5 + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movd rax, xmm0 + + div r9 + movd xmm0, rax + movd xmm1, rdx + punpckldq xmm0, xmm1 + movd r15, xmm0 + paddq xmm0, xmm5 + movdqa xmm2, xmm0 + psrlq xmm0, 12 + paddq xmm0, xmm7 + sqrtsd xmm1, xmm0 + movd rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_ryzen + shr rdi, 19 + +sqrt_fixup_ryzen_ret: + mov rax, rsi + mul r14 + movd xmm1, rax + movd xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne main_loop_ryzen + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_ryzen_endp + +sqrt_fixup_ryzen: + movd r9, xmm2 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_ryzen_ret + +cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S new file mode 100644 index 00000000..1200c4df --- /dev/null +++ b/src/crypto/asm/win64/cn_main_loop.S @@ -0,0 +1,31 @@ +#define ALIGN(x) .align 64 +.intel_syntax noprefix +.section .text +.global cnv2_mainloop_ivybridge_asm +.global cnv2_mainloop_ryzen_asm +.global cnv2_mainloop_bulldozer_asm +.global cnv2_double_mainloop_sandybridge_asm + +ALIGN(64) +cnv2_mainloop_ivybridge_asm: + #include "../cn2/cnv2_main_loop_ivybridge.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_mainloop_ryzen_asm: + #include "../cn2/cnv2_main_loop_ryzen.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_mainloop_bulldozer_asm: + #include "../cn2/cnv2_main_loop_bulldozer.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_double_mainloop_sandybridge_asm: + #include "../cn2/cnv2_double_main_loop_sandybridge.inc" + ret 0 + mov eax, 3735929054 diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm new file mode 100644 index 00000000..846b860c --- /dev/null +++ b/src/crypto/asm/win64/cn_main_loop.asm @@ -0,0 +1,36 @@ +_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE +PUBLIC cnv2_mainloop_ivybridge_asm +PUBLIC cnv2_mainloop_ryzen_asm +PUBLIC cnv2_mainloop_bulldozer_asm +PUBLIC cnv2_double_mainloop_sandybridge_asm + +ALIGN 64 +cnv2_mainloop_ivybridge_asm PROC + INCLUDE cn2/cnv2_main_loop_ivybridge.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_ivybridge_asm ENDP + +ALIGN 64 +cnv2_mainloop_ryzen_asm PROC + INCLUDE cn2/cnv2_main_loop_ryzen.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_ryzen_asm ENDP + +ALIGN 64 +cnv2_mainloop_bulldozer_asm PROC + INCLUDE cn2/cnv2_main_loop_bulldozer.inc + ret 0 + mov eax, 3735929054 +cnv2_mainloop_bulldozer_asm ENDP + +ALIGN 64 +cnv2_double_mainloop_sandybridge_asm PROC + INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc + ret 0 + mov eax, 3735929054 +cnv2_double_mainloop_sandybridge_asm ENDP + +_TEXT_CNV2_MAINLOOP ENDS +END diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h new file mode 100644 index 00000000..793e9e03 --- /dev/null +++ b/src/crypto/variant4_random_math.h @@ -0,0 +1,428 @@ +#ifndef VARIANT4_RANDOM_MATH_H +#define VARIANT4_RANDOM_MATH_H + +extern "C" +{ + #include "c_blake256.h" +} + +enum V4_Settings +{ + // Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications + TOTAL_LATENCY = 15 * 3, + + // Always generate at least 60 instructions + NUM_INSTRUCTIONS = 60, + + // Available ALUs for MUL + // Modern CPUs typically have only 1 ALU which can do multiplications + ALU_COUNT_MUL = 1, + + // Total available ALUs + // Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code + ALU_COUNT = 3, +}; + +enum V4_InstructionList +{ + MUL, // a*b + ADD, // a+b + C, C is an unsigned 32-bit constant + SUB, // a-b + ROR, // rotate right "a" by "b & 31" bits + ROL, // rotate left "a" by "b & 31" bits + XOR, // a^b + RET, // finish execution + V4_INSTRUCTION_COUNT = RET, +}; + +// V4_InstructionDefinition is used to generate code from random data +// Every random sequence of bytes is a valid code +// +// There are 8 registers in total: +// - 4 variable registers +// - 4 constant registers initialized from loop variables +// +// This is why dst_index is 2 bits +enum V4_InstructionDefinition +{ + V4_OPCODE_BITS = 3, + V4_DST_INDEX_BITS = 2, + V4_SRC_INDEX_BITS = 3, +}; + +struct V4_Instruction +{ + uint8_t opcode; + uint8_t dst_index; + uint8_t src_index; + uint32_t C; +}; + +#ifndef FORCEINLINE +#ifdef __GNUC__ +#define FORCEINLINE __attribute__((always_inline)) inline +#elif _MSC_VER +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE inline +#endif +#endif + +#ifndef UNREACHABLE_CODE +#ifdef __GNUC__ +#define UNREACHABLE_CODE __builtin_unreachable() +#elif _MSC_VER +#define UNREACHABLE_CODE __assume(false) +#else +#define UNREACHABLE_CODE +#endif +#endif + +// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU: +// every switch-case will point to the same destination on every iteration of Cryptonight main loop +// +// This is about as fast as it can get without using low-level machine code generation +template +static void v4_random_math(const struct V4_Instruction* code, v4_reg* r) +{ + enum + { + REG_BITS = sizeof(v4_reg) * 8, + }; + +#define V4_EXEC(i) \ + { \ + const struct V4_Instruction* op = code + i; \ + const v4_reg src = r[op->src_index]; \ + v4_reg* dst = r + op->dst_index; \ + switch (op->opcode) \ + { \ + case MUL: \ + *dst *= src; \ + break; \ + case ADD: \ + *dst += src + op->C; \ + break; \ + case SUB: \ + *dst -= src; \ + break; \ + case ROR: \ + { \ + const uint32_t shift = src % REG_BITS; \ + *dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \ + } \ + break; \ + case ROL: \ + { \ + const uint32_t shift = src % REG_BITS; \ + *dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \ + } \ + break; \ + case XOR: \ + *dst ^= src; \ + break; \ + case RET: \ + return; \ + default: \ + UNREACHABLE_CODE; \ + break; \ + } \ + } + +#define V4_EXEC_10(j) \ + V4_EXEC(j + 0) \ + V4_EXEC(j + 1) \ + V4_EXEC(j + 2) \ + V4_EXEC(j + 3) \ + V4_EXEC(j + 4) \ + V4_EXEC(j + 5) \ + V4_EXEC(j + 6) \ + V4_EXEC(j + 7) \ + V4_EXEC(j + 8) \ + V4_EXEC(j + 9) + + // Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency + // I've checked all block heights < 10,000,000 and here is the distribution of program sizes: + // + // 60 28495 + // 61 106077 + // 62 2455855 + // 63 5114930 + // 64 1020868 + // 65 1109026 + // 66 151756 + // 67 8429 + // 68 4477 + // 69 87 + + // Unroll 70 instructions here + V4_EXEC_10(0); // instructions 0-9 + V4_EXEC_10(10); // instructions 10-19 + V4_EXEC_10(20); // instructions 20-29 + V4_EXEC_10(30); // instructions 30-39 + V4_EXEC_10(40); // instructions 40-49 + V4_EXEC_10(50); // instructions 50-59 + V4_EXEC_10(60); // instructions 60-69 + +#undef V4_EXEC_10 +#undef V4_EXEC +} + +// If we don't have enough data available, generate more +static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size) +{ + if (*data_index + bytes_needed > data_size) + { + hash_extra_blake(data, data_size, (char*) data); + *data_index = 0; + } +} + +// Generates as many random math operations as possible with given latency and ALU restrictions +static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height) +{ + // MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle + // These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake + // + // AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors + // Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors + // AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same + // Source: https://www.agner.org/optimize/instruction_tables.pdf + const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 }; + + // Instruction latencies for theoretical ASIC implementation + const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 }; + + // Available ALUs for each instruction + const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT }; + + int8_t data[32]; + memset(data, 0, sizeof(data)); + uint64_t tmp = SWAP64LE(height); + memcpy(data, &tmp, sizeof(uint64_t)); + + // Set data_index past the last byte in data + // to trigger full data update with blake hash + // before we start using it + size_t data_index = sizeof(data); + + int code_size; + do { + int latency[8]; + int asic_latency[8]; + + // Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution + // byte 0: current value of the destination register + // byte 1: instruction opcode + // byte 2: current value of the source register + // + // Registers R4-R7 are constant and are treated as having the same value because when we do + // the same operation twice with two constant source registers, it can be optimized into a single operation + uint32_t inst_data[8] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF }; + + bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT]; + bool is_rotation[V4_INSTRUCTION_COUNT]; + bool rotated[4]; + int rotate_count = 0; + + memset(latency, 0, sizeof(latency)); + memset(asic_latency, 0, sizeof(asic_latency)); + memset(alu_busy, 0, sizeof(alu_busy)); + memset(is_rotation, 0, sizeof(is_rotation)); + memset(rotated, 0, sizeof(rotated)); + is_rotation[ROR] = true; + is_rotation[ROL] = true; + + int num_retries = 0; + code_size = 0; + + int total_iterations = 0; + + // Generate random code to achieve minimal required latency for our abstract CPU + // Try to get this latency for all 4 registers + while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64)) + { + // Fail-safe to guarantee loop termination + ++total_iterations; + if (total_iterations > 256) + break; + + check_data(&data_index, 1, data, sizeof(data)); + + const uint8_t c = ((uint8_t*)data)[data_index++]; + + // MUL = opcodes 0-2 + // ADD = opcode 3 + // SUB = opcode 4 + // ROR/ROL = opcode 5, shift direction is selected randomly + // XOR = opcodes 6-7 + uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1); + if (opcode == 5) + { + check_data(&data_index, 1, data, sizeof(data)); + opcode = (data[data_index++] >= 0) ? ROR : ROL; + } + else if (opcode >= 6) + { + opcode = XOR; + } + else + { + opcode = (opcode <= 2) ? MUL : (opcode - 2); + } + + uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1); + uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1); + + const int a = dst_index; + int b = src_index; + + // Don't do ADD/SUB/XOR with the same register + if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b)) + { + // a is always < 4, so we don't need to check bounds here + b = a + 4; + src_index = b; + } + + // Don't do rotation with the same destination twice because it's equal to a single rotation + if (is_rotation[opcode] && rotated[a]) + { + continue; + } + + // Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized: + // 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations + // 2xXOR(a, b) = NOP + if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16))) + { + continue; + } + + // Find which ALU is available (and when) for this instruction + int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b]; + int alu_index = -1; + while (next_latency < TOTAL_LATENCY) + { + for (int i = op_ALUs[opcode] - 1; i >= 0; --i) + { + if (!alu_busy[next_latency][i]) + { + // ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check + if ((opcode == ADD) && alu_busy[next_latency + 1][i]) + { + continue; + } + + // Rotation can only start when previous rotation is finished, so do an additional availability check + if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode])) + { + continue; + } + + alu_index = i; + break; + } + } + if (alu_index >= 0) + { + break; + } + ++next_latency; + } + + // Don't generate instructions that leave some register unchanged for more than 7 cycles + if (next_latency > latency[a] + 7) + { + continue; + } + + next_latency += op_latency[opcode]; + + if (next_latency <= TOTAL_LATENCY) + { + if (is_rotation[opcode]) + { + ++rotate_count; + } + + // Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined + alu_busy[next_latency - op_latency[opcode]][alu_index] = true; + latency[a] = next_latency; + + // ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple + asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode]; + + rotated[a] = is_rotation[opcode]; + + inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16); + + code[code_size].opcode = opcode; + code[code_size].dst_index = dst_index; + code[code_size].src_index = src_index; + code[code_size].C = 0; + + if (opcode == ADD) + { + // ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too + alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true; + + // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C" + check_data(&data_index, sizeof(uint32_t), data, sizeof(data)); + uint32_t t; + memcpy(&t, data + data_index, sizeof(uint32_t)); + code[code_size].C = SWAP32LE(t); + data_index += sizeof(uint32_t); + } + + ++code_size; + if (code_size >= NUM_INSTRUCTIONS) + { + break; + } + } + else + { + ++num_retries; + } + } + + // ASIC has more execution resources and can extract as much parallelism from the code as possible + // We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC + // Get this latency for at least 1 of the 4 registers + const int prev_code_size = code_size; + while ((asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY)) + { + int min_idx = 0; + int max_idx = 0; + for (int i = 1; i < 4; ++i) + { + if (asic_latency[i] < asic_latency[min_idx]) min_idx = i; + if (asic_latency[i] > asic_latency[max_idx]) max_idx = i; + } + + const uint8_t pattern[3] = { ROR, MUL, MUL }; + const uint8_t opcode = pattern[(code_size - prev_code_size) % 3]; + latency[min_idx] = latency[max_idx] + op_latency[opcode]; + asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode]; + + code[code_size].opcode = opcode; + code[code_size].dst_index = min_idx; + code[code_size].src_index = max_idx; + code[code_size].C = 0; + ++code_size; + } + + // There is ~99.8% chance that code_size >= NUM_INSTRUCTIONS here, so second iteration is required rarely + } while (code_size < NUM_INSTRUCTIONS); + + // Add final instruction to stop the interpreter + code[code_size].opcode = RET; + code[code_size].dst_index = 0; + code[code_size].src_index = 0; + code[code_size].C = 0; + + return code_size; +} + +#endif diff --git a/src/net/Network.cpp b/src/net/Network.cpp index b17242d6..08e96d1d 100644 --- a/src/net/Network.cpp +++ b/src/net/Network.cpp @@ -175,9 +175,16 @@ bool Network::isColors() const void Network::setJob(Client *client, const Job &job, bool donate) { - LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s") - : "new job from %s:%d diff %d algo %s", - client->host(), client->port(), job.diff(), job.algorithm().shortName()); + if (job.height()) { + LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s") " height " WHITE_BOLD("%" PRIu64) + : "new job from %s:%d diff %d algo %s height %" PRIu64, + client->host(), client->port(), job.diff(), job.algorithm().shortName(), job.height()); + } + else { + LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s") + : "new job from %s:%d diff %d algo %s", + client->host(), client->port(), job.diff(), job.algorithm().shortName()); + } if (!donate && m_donate) { m_donate->setAlgo(job.algorithm()); diff --git a/src/nvidia/CryptonightR.cu b/src/nvidia/CryptonightR.cu new file mode 100644 index 00000000..d141eb5a --- /dev/null +++ b/src/nvidia/CryptonightR.cu @@ -0,0 +1,573 @@ +R"===( + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long int uint64_t; + +static __constant__ uint32_t d_t_fn[1024] = +{ + 0xa56363c6U, 0x847c7cf8U, 0x997777eeU, 0x8d7b7bf6U, + 0x0df2f2ffU, 0xbd6b6bd6U, 0xb16f6fdeU, 0x54c5c591U, + 0x50303060U, 0x03010102U, 0xa96767ceU, 0x7d2b2b56U, + 0x19fefee7U, 0x62d7d7b5U, 0xe6abab4dU, 0x9a7676ecU, + 0x45caca8fU, 0x9d82821fU, 0x40c9c989U, 0x877d7dfaU, + 0x15fafaefU, 0xeb5959b2U, 0xc947478eU, 0x0bf0f0fbU, + 0xecadad41U, 0x67d4d4b3U, 0xfda2a25fU, 0xeaafaf45U, + 0xbf9c9c23U, 0xf7a4a453U, 0x967272e4U, 0x5bc0c09bU, + 0xc2b7b775U, 0x1cfdfde1U, 0xae93933dU, 0x6a26264cU, + 0x5a36366cU, 0x413f3f7eU, 0x02f7f7f5U, 0x4fcccc83U, + 0x5c343468U, 0xf4a5a551U, 0x34e5e5d1U, 0x08f1f1f9U, + 0x937171e2U, 0x73d8d8abU, 0x53313162U, 0x3f15152aU, + 0x0c040408U, 0x52c7c795U, 0x65232346U, 0x5ec3c39dU, + 0x28181830U, 0xa1969637U, 0x0f05050aU, 0xb59a9a2fU, + 0x0907070eU, 0x36121224U, 0x9b80801bU, 0x3de2e2dfU, + 0x26ebebcdU, 0x6927274eU, 0xcdb2b27fU, 0x9f7575eaU, + 0x1b090912U, 0x9e83831dU, 0x742c2c58U, 0x2e1a1a34U, + 0x2d1b1b36U, 0xb26e6edcU, 0xee5a5ab4U, 0xfba0a05bU, + 0xf65252a4U, 0x4d3b3b76U, 0x61d6d6b7U, 0xceb3b37dU, + 0x7b292952U, 0x3ee3e3ddU, 0x712f2f5eU, 0x97848413U, + 0xf55353a6U, 0x68d1d1b9U, 0x00000000U, 0x2cededc1U, + 0x60202040U, 0x1ffcfce3U, 0xc8b1b179U, 0xed5b5bb6U, + 0xbe6a6ad4U, 0x46cbcb8dU, 0xd9bebe67U, 0x4b393972U, + 0xde4a4a94U, 0xd44c4c98U, 0xe85858b0U, 0x4acfcf85U, + 0x6bd0d0bbU, 0x2aefefc5U, 0xe5aaaa4fU, 0x16fbfbedU, + 0xc5434386U, 0xd74d4d9aU, 0x55333366U, 0x94858511U, + 0xcf45458aU, 0x10f9f9e9U, 0x06020204U, 0x817f7ffeU, + 0xf05050a0U, 0x443c3c78U, 0xba9f9f25U, 0xe3a8a84bU, + 0xf35151a2U, 0xfea3a35dU, 0xc0404080U, 0x8a8f8f05U, + 0xad92923fU, 0xbc9d9d21U, 0x48383870U, 0x04f5f5f1U, + 0xdfbcbc63U, 0xc1b6b677U, 0x75dadaafU, 0x63212142U, + 0x30101020U, 0x1affffe5U, 0x0ef3f3fdU, 0x6dd2d2bfU, + 0x4ccdcd81U, 0x140c0c18U, 0x35131326U, 0x2fececc3U, + 0xe15f5fbeU, 0xa2979735U, 0xcc444488U, 0x3917172eU, + 0x57c4c493U, 0xf2a7a755U, 0x827e7efcU, 0x473d3d7aU, + 0xac6464c8U, 0xe75d5dbaU, 0x2b191932U, 0x957373e6U, + 0xa06060c0U, 0x98818119U, 0xd14f4f9eU, 0x7fdcdca3U, + 0x66222244U, 0x7e2a2a54U, 0xab90903bU, 0x8388880bU, + 0xca46468cU, 0x29eeeec7U, 0xd3b8b86bU, 0x3c141428U, + 0x79dedea7U, 0xe25e5ebcU, 0x1d0b0b16U, 0x76dbdbadU, + 0x3be0e0dbU, 0x56323264U, 0x4e3a3a74U, 0x1e0a0a14U, + 0xdb494992U, 0x0a06060cU, 0x6c242448U, 0xe45c5cb8U, + 0x5dc2c29fU, 0x6ed3d3bdU, 0xefacac43U, 0xa66262c4U, + 0xa8919139U, 0xa4959531U, 0x37e4e4d3U, 0x8b7979f2U, + 0x32e7e7d5U, 0x43c8c88bU, 0x5937376eU, 0xb76d6ddaU, + 0x8c8d8d01U, 0x64d5d5b1U, 0xd24e4e9cU, 0xe0a9a949U, + 0xb46c6cd8U, 0xfa5656acU, 0x07f4f4f3U, 0x25eaeacfU, + 0xaf6565caU, 0x8e7a7af4U, 0xe9aeae47U, 0x18080810U, + 0xd5baba6fU, 0x887878f0U, 0x6f25254aU, 0x722e2e5cU, + 0x241c1c38U, 0xf1a6a657U, 0xc7b4b473U, 0x51c6c697U, + 0x23e8e8cbU, 0x7cdddda1U, 0x9c7474e8U, 0x211f1f3eU, + 0xdd4b4b96U, 0xdcbdbd61U, 0x868b8b0dU, 0x858a8a0fU, + 0x907070e0U, 0x423e3e7cU, 0xc4b5b571U, 0xaa6666ccU, + 0xd8484890U, 0x05030306U, 0x01f6f6f7U, 0x120e0e1cU, + 0xa36161c2U, 0x5f35356aU, 0xf95757aeU, 0xd0b9b969U, + 0x91868617U, 0x58c1c199U, 0x271d1d3aU, 0xb99e9e27U, + 0x38e1e1d9U, 0x13f8f8ebU, 0xb398982bU, 0x33111122U, + 0xbb6969d2U, 0x70d9d9a9U, 0x898e8e07U, 0xa7949433U, + 0xb69b9b2dU, 0x221e1e3cU, 0x92878715U, 0x20e9e9c9U, + 0x49cece87U, 0xff5555aaU, 0x78282850U, 0x7adfdfa5U, + 0x8f8c8c03U, 0xf8a1a159U, 0x80898909U, 0x170d0d1aU, + 0xdabfbf65U, 0x31e6e6d7U, 0xc6424284U, 0xb86868d0U, + 0xc3414182U, 0xb0999929U, 0x772d2d5aU, 0x110f0f1eU, + 0xcbb0b07bU, 0xfc5454a8U, 0xd6bbbb6dU, 0x3a16162cU, + 0x6363c6a5U, 0x7c7cf884U, 0x7777ee99U, 0x7b7bf68dU, + 0xf2f2ff0dU, 0x6b6bd6bdU, 0x6f6fdeb1U, 0xc5c59154U, + 0x30306050U, 0x01010203U, 0x6767cea9U, 0x2b2b567dU, + 0xfefee719U, 0xd7d7b562U, 0xabab4de6U, 0x7676ec9aU, + 0xcaca8f45U, 0x82821f9dU, 0xc9c98940U, 0x7d7dfa87U, + 0xfafaef15U, 0x5959b2ebU, 0x47478ec9U, 0xf0f0fb0bU, + 0xadad41ecU, 0xd4d4b367U, 0xa2a25ffdU, 0xafaf45eaU, + 0x9c9c23bfU, 0xa4a453f7U, 0x7272e496U, 0xc0c09b5bU, + 0xb7b775c2U, 0xfdfde11cU, 0x93933daeU, 0x26264c6aU, + 0x36366c5aU, 0x3f3f7e41U, 0xf7f7f502U, 0xcccc834fU, + 0x3434685cU, 0xa5a551f4U, 0xe5e5d134U, 0xf1f1f908U, + 0x7171e293U, 0xd8d8ab73U, 0x31316253U, 0x15152a3fU, + 0x0404080cU, 0xc7c79552U, 0x23234665U, 0xc3c39d5eU, + 0x18183028U, 0x969637a1U, 0x05050a0fU, 0x9a9a2fb5U, + 0x07070e09U, 0x12122436U, 0x80801b9bU, 0xe2e2df3dU, + 0xebebcd26U, 0x27274e69U, 0xb2b27fcdU, 0x7575ea9fU, + 0x0909121bU, 0x83831d9eU, 0x2c2c5874U, 0x1a1a342eU, + 0x1b1b362dU, 0x6e6edcb2U, 0x5a5ab4eeU, 0xa0a05bfbU, + 0x5252a4f6U, 0x3b3b764dU, 0xd6d6b761U, 0xb3b37dceU, + 0x2929527bU, 0xe3e3dd3eU, 0x2f2f5e71U, 0x84841397U, + 0x5353a6f5U, 0xd1d1b968U, 0x00000000U, 0xededc12cU, + 0x20204060U, 0xfcfce31fU, 0xb1b179c8U, 0x5b5bb6edU, + 0x6a6ad4beU, 0xcbcb8d46U, 0xbebe67d9U, 0x3939724bU, + 0x4a4a94deU, 0x4c4c98d4U, 0x5858b0e8U, 0xcfcf854aU, + 0xd0d0bb6bU, 0xefefc52aU, 0xaaaa4fe5U, 0xfbfbed16U, + 0x434386c5U, 0x4d4d9ad7U, 0x33336655U, 0x85851194U, + 0x45458acfU, 0xf9f9e910U, 0x02020406U, 0x7f7ffe81U, + 0x5050a0f0U, 0x3c3c7844U, 0x9f9f25baU, 0xa8a84be3U, + 0x5151a2f3U, 0xa3a35dfeU, 0x404080c0U, 0x8f8f058aU, + 0x92923fadU, 0x9d9d21bcU, 0x38387048U, 0xf5f5f104U, + 0xbcbc63dfU, 0xb6b677c1U, 0xdadaaf75U, 0x21214263U, + 0x10102030U, 0xffffe51aU, 0xf3f3fd0eU, 0xd2d2bf6dU, + 0xcdcd814cU, 0x0c0c1814U, 0x13132635U, 0xececc32fU, + 0x5f5fbee1U, 0x979735a2U, 0x444488ccU, 0x17172e39U, + 0xc4c49357U, 0xa7a755f2U, 0x7e7efc82U, 0x3d3d7a47U, + 0x6464c8acU, 0x5d5dbae7U, 0x1919322bU, 0x7373e695U, + 0x6060c0a0U, 0x81811998U, 0x4f4f9ed1U, 0xdcdca37fU, + 0x22224466U, 0x2a2a547eU, 0x90903babU, 0x88880b83U, + 0x46468ccaU, 0xeeeec729U, 0xb8b86bd3U, 0x1414283cU, + 0xdedea779U, 0x5e5ebce2U, 0x0b0b161dU, 0xdbdbad76U, + 0xe0e0db3bU, 0x32326456U, 0x3a3a744eU, 0x0a0a141eU, + 0x494992dbU, 0x06060c0aU, 0x2424486cU, 0x5c5cb8e4U, + 0xc2c29f5dU, 0xd3d3bd6eU, 0xacac43efU, 0x6262c4a6U, + 0x919139a8U, 0x959531a4U, 0xe4e4d337U, 0x7979f28bU, + 0xe7e7d532U, 0xc8c88b43U, 0x37376e59U, 0x6d6ddab7U, + 0x8d8d018cU, 0xd5d5b164U, 0x4e4e9cd2U, 0xa9a949e0U, + 0x6c6cd8b4U, 0x5656acfaU, 0xf4f4f307U, 0xeaeacf25U, + 0x6565caafU, 0x7a7af48eU, 0xaeae47e9U, 0x08081018U, + 0xbaba6fd5U, 0x7878f088U, 0x25254a6fU, 0x2e2e5c72U, + 0x1c1c3824U, 0xa6a657f1U, 0xb4b473c7U, 0xc6c69751U, + 0xe8e8cb23U, 0xdddda17cU, 0x7474e89cU, 0x1f1f3e21U, + 0x4b4b96ddU, 0xbdbd61dcU, 0x8b8b0d86U, 0x8a8a0f85U, + 0x7070e090U, 0x3e3e7c42U, 0xb5b571c4U, 0x6666ccaaU, + 0x484890d8U, 0x03030605U, 0xf6f6f701U, 0x0e0e1c12U, + 0x6161c2a3U, 0x35356a5fU, 0x5757aef9U, 0xb9b969d0U, + 0x86861791U, 0xc1c19958U, 0x1d1d3a27U, 0x9e9e27b9U, + 0xe1e1d938U, 0xf8f8eb13U, 0x98982bb3U, 0x11112233U, + 0x6969d2bbU, 0xd9d9a970U, 0x8e8e0789U, 0x949433a7U, + 0x9b9b2db6U, 0x1e1e3c22U, 0x87871592U, 0xe9e9c920U, + 0xcece8749U, 0x5555aaffU, 0x28285078U, 0xdfdfa57aU, + 0x8c8c038fU, 0xa1a159f8U, 0x89890980U, 0x0d0d1a17U, + 0xbfbf65daU, 0xe6e6d731U, 0x424284c6U, 0x6868d0b8U, + 0x414182c3U, 0x999929b0U, 0x2d2d5a77U, 0x0f0f1e11U, + 0xb0b07bcbU, 0x5454a8fcU, 0xbbbb6dd6U, 0x16162c3aU, + 0x63c6a563U, 0x7cf8847cU, 0x77ee9977U, 0x7bf68d7bU, + 0xf2ff0df2U, 0x6bd6bd6bU, 0x6fdeb16fU, 0xc59154c5U, + 0x30605030U, 0x01020301U, 0x67cea967U, 0x2b567d2bU, + 0xfee719feU, 0xd7b562d7U, 0xab4de6abU, 0x76ec9a76U, + 0xca8f45caU, 0x821f9d82U, 0xc98940c9U, 0x7dfa877dU, + 0xfaef15faU, 0x59b2eb59U, 0x478ec947U, 0xf0fb0bf0U, + 0xad41ecadU, 0xd4b367d4U, 0xa25ffda2U, 0xaf45eaafU, + 0x9c23bf9cU, 0xa453f7a4U, 0x72e49672U, 0xc09b5bc0U, + 0xb775c2b7U, 0xfde11cfdU, 0x933dae93U, 0x264c6a26U, + 0x366c5a36U, 0x3f7e413fU, 0xf7f502f7U, 0xcc834fccU, + 0x34685c34U, 0xa551f4a5U, 0xe5d134e5U, 0xf1f908f1U, + 0x71e29371U, 0xd8ab73d8U, 0x31625331U, 0x152a3f15U, + 0x04080c04U, 0xc79552c7U, 0x23466523U, 0xc39d5ec3U, + 0x18302818U, 0x9637a196U, 0x050a0f05U, 0x9a2fb59aU, + 0x070e0907U, 0x12243612U, 0x801b9b80U, 0xe2df3de2U, + 0xebcd26ebU, 0x274e6927U, 0xb27fcdb2U, 0x75ea9f75U, + 0x09121b09U, 0x831d9e83U, 0x2c58742cU, 0x1a342e1aU, + 0x1b362d1bU, 0x6edcb26eU, 0x5ab4ee5aU, 0xa05bfba0U, + 0x52a4f652U, 0x3b764d3bU, 0xd6b761d6U, 0xb37dceb3U, + 0x29527b29U, 0xe3dd3ee3U, 0x2f5e712fU, 0x84139784U, + 0x53a6f553U, 0xd1b968d1U, 0x00000000U, 0xedc12cedU, + 0x20406020U, 0xfce31ffcU, 0xb179c8b1U, 0x5bb6ed5bU, + 0x6ad4be6aU, 0xcb8d46cbU, 0xbe67d9beU, 0x39724b39U, + 0x4a94de4aU, 0x4c98d44cU, 0x58b0e858U, 0xcf854acfU, + 0xd0bb6bd0U, 0xefc52aefU, 0xaa4fe5aaU, 0xfbed16fbU, + 0x4386c543U, 0x4d9ad74dU, 0x33665533U, 0x85119485U, + 0x458acf45U, 0xf9e910f9U, 0x02040602U, 0x7ffe817fU, + 0x50a0f050U, 0x3c78443cU, 0x9f25ba9fU, 0xa84be3a8U, + 0x51a2f351U, 0xa35dfea3U, 0x4080c040U, 0x8f058a8fU, + 0x923fad92U, 0x9d21bc9dU, 0x38704838U, 0xf5f104f5U, + 0xbc63dfbcU, 0xb677c1b6U, 0xdaaf75daU, 0x21426321U, + 0x10203010U, 0xffe51affU, 0xf3fd0ef3U, 0xd2bf6dd2U, + 0xcd814ccdU, 0x0c18140cU, 0x13263513U, 0xecc32fecU, + 0x5fbee15fU, 0x9735a297U, 0x4488cc44U, 0x172e3917U, + 0xc49357c4U, 0xa755f2a7U, 0x7efc827eU, 0x3d7a473dU, + 0x64c8ac64U, 0x5dbae75dU, 0x19322b19U, 0x73e69573U, + 0x60c0a060U, 0x81199881U, 0x4f9ed14fU, 0xdca37fdcU, + 0x22446622U, 0x2a547e2aU, 0x903bab90U, 0x880b8388U, + 0x468cca46U, 0xeec729eeU, 0xb86bd3b8U, 0x14283c14U, + 0xdea779deU, 0x5ebce25eU, 0x0b161d0bU, 0xdbad76dbU, + 0xe0db3be0U, 0x32645632U, 0x3a744e3aU, 0x0a141e0aU, + 0x4992db49U, 0x060c0a06U, 0x24486c24U, 0x5cb8e45cU, + 0xc29f5dc2U, 0xd3bd6ed3U, 0xac43efacU, 0x62c4a662U, + 0x9139a891U, 0x9531a495U, 0xe4d337e4U, 0x79f28b79U, + 0xe7d532e7U, 0xc88b43c8U, 0x376e5937U, 0x6ddab76dU, + 0x8d018c8dU, 0xd5b164d5U, 0x4e9cd24eU, 0xa949e0a9U, + 0x6cd8b46cU, 0x56acfa56U, 0xf4f307f4U, 0xeacf25eaU, + 0x65caaf65U, 0x7af48e7aU, 0xae47e9aeU, 0x08101808U, + 0xba6fd5baU, 0x78f08878U, 0x254a6f25U, 0x2e5c722eU, + 0x1c38241cU, 0xa657f1a6U, 0xb473c7b4U, 0xc69751c6U, + 0xe8cb23e8U, 0xdda17cddU, 0x74e89c74U, 0x1f3e211fU, + 0x4b96dd4bU, 0xbd61dcbdU, 0x8b0d868bU, 0x8a0f858aU, + 0x70e09070U, 0x3e7c423eU, 0xb571c4b5U, 0x66ccaa66U, + 0x4890d848U, 0x03060503U, 0xf6f701f6U, 0x0e1c120eU, + 0x61c2a361U, 0x356a5f35U, 0x57aef957U, 0xb969d0b9U, + 0x86179186U, 0xc19958c1U, 0x1d3a271dU, 0x9e27b99eU, + 0xe1d938e1U, 0xf8eb13f8U, 0x982bb398U, 0x11223311U, + 0x69d2bb69U, 0xd9a970d9U, 0x8e07898eU, 0x9433a794U, + 0x9b2db69bU, 0x1e3c221eU, 0x87159287U, 0xe9c920e9U, + 0xce8749ceU, 0x55aaff55U, 0x28507828U, 0xdfa57adfU, + 0x8c038f8cU, 0xa159f8a1U, 0x89098089U, 0x0d1a170dU, + 0xbf65dabfU, 0xe6d731e6U, 0x4284c642U, 0x68d0b868U, + 0x4182c341U, 0x9929b099U, 0x2d5a772dU, 0x0f1e110fU, + 0xb07bcbb0U, 0x54a8fc54U, 0xbb6dd6bbU, 0x162c3a16U, + 0xc6a56363U, 0xf8847c7cU, 0xee997777U, 0xf68d7b7bU, + 0xff0df2f2U, 0xd6bd6b6bU, 0xdeb16f6fU, 0x9154c5c5U, + 0x60503030U, 0x02030101U, 0xcea96767U, 0x567d2b2bU, + 0xe719fefeU, 0xb562d7d7U, 0x4de6ababU, 0xec9a7676U, + 0x8f45cacaU, 0x1f9d8282U, 0x8940c9c9U, 0xfa877d7dU, + 0xef15fafaU, 0xb2eb5959U, 0x8ec94747U, 0xfb0bf0f0U, + 0x41ecadadU, 0xb367d4d4U, 0x5ffda2a2U, 0x45eaafafU, + 0x23bf9c9cU, 0x53f7a4a4U, 0xe4967272U, 0x9b5bc0c0U, + 0x75c2b7b7U, 0xe11cfdfdU, 0x3dae9393U, 0x4c6a2626U, + 0x6c5a3636U, 0x7e413f3fU, 0xf502f7f7U, 0x834fccccU, + 0x685c3434U, 0x51f4a5a5U, 0xd134e5e5U, 0xf908f1f1U, + 0xe2937171U, 0xab73d8d8U, 0x62533131U, 0x2a3f1515U, + 0x080c0404U, 0x9552c7c7U, 0x46652323U, 0x9d5ec3c3U, + 0x30281818U, 0x37a19696U, 0x0a0f0505U, 0x2fb59a9aU, + 0x0e090707U, 0x24361212U, 0x1b9b8080U, 0xdf3de2e2U, + 0xcd26ebebU, 0x4e692727U, 0x7fcdb2b2U, 0xea9f7575U, + 0x121b0909U, 0x1d9e8383U, 0x58742c2cU, 0x342e1a1aU, + 0x362d1b1bU, 0xdcb26e6eU, 0xb4ee5a5aU, 0x5bfba0a0U, + 0xa4f65252U, 0x764d3b3bU, 0xb761d6d6U, 0x7dceb3b3U, + 0x527b2929U, 0xdd3ee3e3U, 0x5e712f2fU, 0x13978484U, + 0xa6f55353U, 0xb968d1d1U, 0x00000000U, 0xc12cededU, + 0x40602020U, 0xe31ffcfcU, 0x79c8b1b1U, 0xb6ed5b5bU, + 0xd4be6a6aU, 0x8d46cbcbU, 0x67d9bebeU, 0x724b3939U, + 0x94de4a4aU, 0x98d44c4cU, 0xb0e85858U, 0x854acfcfU, + 0xbb6bd0d0U, 0xc52aefefU, 0x4fe5aaaaU, 0xed16fbfbU, + 0x86c54343U, 0x9ad74d4dU, 0x66553333U, 0x11948585U, + 0x8acf4545U, 0xe910f9f9U, 0x04060202U, 0xfe817f7fU, + 0xa0f05050U, 0x78443c3cU, 0x25ba9f9fU, 0x4be3a8a8U, + 0xa2f35151U, 0x5dfea3a3U, 0x80c04040U, 0x058a8f8fU, + 0x3fad9292U, 0x21bc9d9dU, 0x70483838U, 0xf104f5f5U, + 0x63dfbcbcU, 0x77c1b6b6U, 0xaf75dadaU, 0x42632121U, + 0x20301010U, 0xe51affffU, 0xfd0ef3f3U, 0xbf6dd2d2U, + 0x814ccdcdU, 0x18140c0cU, 0x26351313U, 0xc32fececU, + 0xbee15f5fU, 0x35a29797U, 0x88cc4444U, 0x2e391717U, + 0x9357c4c4U, 0x55f2a7a7U, 0xfc827e7eU, 0x7a473d3dU, + 0xc8ac6464U, 0xbae75d5dU, 0x322b1919U, 0xe6957373U, + 0xc0a06060U, 0x19988181U, 0x9ed14f4fU, 0xa37fdcdcU, + 0x44662222U, 0x547e2a2aU, 0x3bab9090U, 0x0b838888U, + 0x8cca4646U, 0xc729eeeeU, 0x6bd3b8b8U, 0x283c1414U, + 0xa779dedeU, 0xbce25e5eU, 0x161d0b0bU, 0xad76dbdbU, + 0xdb3be0e0U, 0x64563232U, 0x744e3a3aU, 0x141e0a0aU, + 0x92db4949U, 0x0c0a0606U, 0x486c2424U, 0xb8e45c5cU, + 0x9f5dc2c2U, 0xbd6ed3d3U, 0x43efacacU, 0xc4a66262U, + 0x39a89191U, 0x31a49595U, 0xd337e4e4U, 0xf28b7979U, + 0xd532e7e7U, 0x8b43c8c8U, 0x6e593737U, 0xdab76d6dU, + 0x018c8d8dU, 0xb164d5d5U, 0x9cd24e4eU, 0x49e0a9a9U, + 0xd8b46c6cU, 0xacfa5656U, 0xf307f4f4U, 0xcf25eaeaU, + 0xcaaf6565U, 0xf48e7a7aU, 0x47e9aeaeU, 0x10180808U, + 0x6fd5babaU, 0xf0887878U, 0x4a6f2525U, 0x5c722e2eU, + 0x38241c1cU, 0x57f1a6a6U, 0x73c7b4b4U, 0x9751c6c6U, + 0xcb23e8e8U, 0xa17cddddU, 0xe89c7474U, 0x3e211f1fU, + 0x96dd4b4bU, 0x61dcbdbdU, 0x0d868b8bU, 0x0f858a8aU, + 0xe0907070U, 0x7c423e3eU, 0x71c4b5b5U, 0xccaa6666U, + 0x90d84848U, 0x06050303U, 0xf701f6f6U, 0x1c120e0eU, + 0xc2a36161U, 0x6a5f3535U, 0xaef95757U, 0x69d0b9b9U, + 0x17918686U, 0x9958c1c1U, 0x3a271d1dU, 0x27b99e9eU, + 0xd938e1e1U, 0xeb13f8f8U, 0x2bb39898U, 0x22331111U, + 0xd2bb6969U, 0xa970d9d9U, 0x07898e8eU, 0x33a79494U, + 0x2db69b9bU, 0x3c221e1eU, 0x15928787U, 0xc920e9e9U, + 0x8749ceceU, 0xaaff5555U, 0x50782828U, 0xa57adfdfU, + 0x038f8c8cU, 0x59f8a1a1U, 0x09808989U, 0x1a170d0dU, + 0x65dabfbfU, 0xd731e6e6U, 0x84c64242U, 0xd0b86868U, + 0x82c34141U, 0x29b09999U, 0x5a772d2dU, 0x1e110f0fU, + 0x7bcbb0b0U, 0xa8fc5454U, 0x6dd6bbbbU, 0x2c3a1616U +}; + +#define t_fn0(x) (sharedMemory[ (x)]) +#define t_fn1(x) (sharedMemory[256 + (x)]) +#define t_fn2(x) (sharedMemory[512 + (x)]) +#define t_fn3(x) (sharedMemory[768 + (x)]) + +__device__ __forceinline__ static void cn_aes_gpu_init(uint32_t *sharedMemory) +{ + for(int i = threadIdx.x; i < 1024; i += blockDim.x) + sharedMemory[i] = d_t_fn[i]; +} + +)===" +R"===( + +template< typename T > +__forceinline__ __device__ void unusedVar( const T& ) +{ +} + +template +__forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_t sub,const int val,const uint32_t src) +{ +# if ( __CUDA_ARCH__ < 300 ) + ptr[sub] = val; + return ptr[src & (group_n-1)]; +# else + unusedVar( ptr ); + unusedVar( sub ); +# if (__CUDACC_VER_MAJOR__ >= 9) + return __shfl_sync(__activemask(), val, src, group_n); +# else + return __shfl( val, src, group_n ); +# endif +# endif +} + + +template +__forceinline__ __device__ uint64_t shuffle64(volatile uint32_t* ptr,const uint32_t sub,const uint64_t val,const uint32_t src, const uint32_t src2) +{ + uint64_t tmp; + ((uint32_t*)&tmp)[0] = shuffle(ptr, sub, static_cast(val), src); + ((uint32_t*)&tmp)[1] = shuffle(ptr, sub, static_cast(val >> 32), src2); + return tmp; +} + +struct u64 : public uint2 +{ + + __forceinline__ __device__ u64(){} + + __forceinline__ __device__ u64( const uint32_t x0, const uint32_t x1) + { + uint2::x = x0; + uint2::y = x1; + } + + __forceinline__ __device__ operator uint64_t() const + { + return *((uint64_t*)this); + } + + __forceinline__ __device__ u64( const uint64_t x0) + { + ((uint64_t*)&this->x)[0] = x0; + } + + __forceinline__ __device__ u64 operator^=(const u64& other) + { + uint2::x ^= other.x; + uint2::y ^= other.y; + + return *this; + } + + __forceinline__ __device__ u64 operator+(const u64& other) const + { + u64 tmp; + ((uint64_t*)&tmp.x)[0] = ((uint64_t*)&(this->x))[0] + ((uint64_t*)&(other.x))[0]; + + return tmp; + } + + __forceinline__ __device__ u64 operator+=(const uint64_t& other) + { + return ((uint64_t*)&this->x)[0] += other; + } +}; + +#ifdef RANDOM_MATH_64_BIT + +__device__ __forceinline__ static uint64_t rotate_left(uint64_t a, uint64_t b) +{ + const int shift = b & 63; + return (a << shift) | (a >> (64 - shift)); +} + +__device__ __forceinline__ static uint64_t rotate_right(uint64_t a, uint64_t b) +{ + const int shift = b & 63; + return (a >> shift) | (a << (64 - shift)); +} + +#else + +__device__ __forceinline__ static uint32_t rotate_left(uint32_t a, uint32_t b) { return __funnelshift_l(a, a, b); } +__device__ __forceinline__ static uint32_t rotate_right(uint32_t a, uint32_t b) { return __funnelshift_r(a, a, b); } + +#endif + +#define ITERATIONS 524288 +#define MEM 2097152 + +__global__ void CryptonightR_phase2( + int threads, + int bfactor, + int partidx, + uint32_t *d_long_state, + uint32_t *d_ctx_a, + uint32_t *d_ctx_b, + uint32_t * d_ctx_state, + uint32_t startNonce, + uint32_t * __restrict__ d_input + ) +{ + __shared__ uint32_t sharedMemory[1024]; + + cn_aes_gpu_init( sharedMemory ); + +# if( __CUDA_ARCH__ < 300 ) + extern __shared__ uint64_t externShared[]; + // 8 x 64bit values + volatile uint64_t* myChunks = (volatile uint64_t*)(externShared + (threadIdx.x >> 1) * 8); + volatile uint32_t* sPtr = (volatile uint32_t*)(externShared + (blockDim.x >> 1) * 8) + (threadIdx.x & 0xFFFFFFFE); +# else + extern __shared__ uint64_t chunkMem[]; + volatile uint32_t* sPtr = NULL; + // 8 x 64bit values + volatile uint64_t* myChunks = (volatile uint64_t*)(chunkMem + (threadIdx.x >> 1) * 8); +# endif + + __syncthreads( ); + + const uint64_t tid = (blockDim.x * blockIdx.x + threadIdx.x); + const uint32_t thread = tid >> 1; + const uint32_t sub = tid & 1; + + if (thread >= threads) { + return; + } + + uint8_t *l0 = (uint8_t*)&d_long_state[((uint64_t)thread) * (MEM / sizeof(uint32_t))]; + uint64_t ax0 = ((uint64_t*)(d_ctx_a + thread * 4))[sub]; + uint32_t idx0 = shuffle<2>(sPtr, sub, static_cast(ax0), 0); + uint64_t bx0 = ((uint64_t*)(d_ctx_b + thread * 16))[sub]; + uint64_t bx1 = ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub]; + +#ifdef RANDOM_MATH_64_BIT + uint64_t r0 = ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[0]; + uint64_t r1 = ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[1]; + uint64_t r2 = ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[2]; + uint64_t r3 = ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[3]; +#else + uint32_t r0 = d_ctx_b[thread * 16 + 4 * 2]; + uint32_t r1 = d_ctx_b[thread * 16 + 4 * 2 + 1]; + uint32_t r2 = d_ctx_b[thread * 16 + 4 * 2 + 2]; + uint32_t r3 = d_ctx_b[thread * 16 + 4 * 2 + 3]; +#endif + + const int batchsize = (ITERATIONS * 2) >> ( 1 + bfactor ); + const int start = partidx * batchsize; + const int end = start + batchsize; + + uint64_t* ptr0; + for (int i = start; i < end; ++i) { + ptr0 = (uint64_t *)&l0[idx0 & 0x1FFFC0]; + + ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; + + uint32_t idx1 = (idx0 & 0x30) >> 3; + const u64 cx = myChunks[ idx1 + sub ]; + const u64 cx2 = myChunks[ idx1 + ((sub + 1) & 1) ]; + + u64 cx_aes = ax0 ^ u64( + t_fn0( cx.x & 0xff ) ^ t_fn1( (cx.y >> 8) & 0xff ) ^ t_fn2( (cx2.x >> 16) & 0xff ) ^ t_fn3( (cx2.y >> 24 ) ), + t_fn0( cx.y & 0xff ) ^ t_fn1( (cx2.x >> 8) & 0xff ) ^ t_fn2( (cx2.y >> 16) & 0xff ) ^ t_fn3( (cx.x >> 24 ) ) + ); + + { + const uint64_t chunk1 = myChunks[idx1 ^ 2 + sub]; + const uint64_t chunk2 = myChunks[idx1 ^ 4 + sub]; + const uint64_t chunk3 = myChunks[idx1 ^ 6 + sub]; + +# if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +# else + __syncthreads(); +# endif + + myChunks[idx1 ^ 2 + sub] = chunk3 + bx1; + myChunks[idx1 ^ 4 + sub] = chunk1 + bx0; + myChunks[idx1 ^ 6 + sub] = chunk2 + ax0; + } + + myChunks[idx1 + sub] = cx_aes ^ bx0; + + ((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub]; + + idx0 = shuffle<2>(sPtr, sub, cx_aes.x, 0); + idx1 = (idx0 & 0x30) >> 3; + ptr0 = (uint64_t *)&l0[idx0 & 0x1FFFC0]; + + ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; + + uint64_t cx_mul; + ((uint32_t*)&cx_mul)[0] = shuffle<2>(sPtr, sub, cx_aes.x , 0); + ((uint32_t*)&cx_mul)[1] = shuffle<2>(sPtr, sub, cx_aes.y , 0); + +#ifdef RANDOM_MATH_64_BIT + const uint64_t r4 = shuffle64<2>(sPtr, sub, ax0, 0, 0); + const uint64_t r6 = shuffle64<2>(sPtr, sub, bx0, 0, 0); + const uint64_t r7 = shuffle64<2>(sPtr, sub, bx1, 0, 0); +#else + const uint32_t r4 = shuffle<2>(sPtr, sub, static_cast(ax0), 0); + const uint32_t r6 = shuffle<2>(sPtr, sub, static_cast(bx0), 0); + const uint32_t r7 = shuffle<2>(sPtr, sub, static_cast(bx1), 0); +#endif + + if (sub == 1) { +#ifdef RANDOM_MATH_64_BIT + myChunks[idx1] ^= (r0 + r1) ^ (r2 + r3); + + const uint64_t r5 = ax0; +#else + ((uint32_t*)&myChunks[idx1])[0] ^= r0 + r1; + ((uint32_t*)&myChunks[idx1])[1] ^= r2 + r3; + + const uint32_t r5 = static_cast(ax0); +#endif + + XMRIG_INCLUDE_RANDOM_MATH + } + +# if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +# else + __syncthreads( ); +# endif + + uint64_t c = ((uint64_t*)myChunks)[idx1 + sub]; + + { + uint64_t cl = ((uint64_t*)myChunks)[idx1]; + // sub 0 -> hi, sub 1 -> lo + uint64_t res = sub == 0 ? __umul64hi( cx_mul, cl ) : cx_mul * cl; + + const uint64_t chunk1 = myChunks[ idx1 ^ 2 + sub ] ^ res; + uint64_t chunk2 = myChunks[ idx1 ^ 4 + sub ]; + res ^= ((uint64_t*)&chunk2)[0]; + const uint64_t chunk3 = myChunks[ idx1 ^ 6 + sub ]; + +# if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +# else + __syncthreads( ); +# endif + + myChunks[idx1 ^ 2 + sub] = chunk3 + bx1; + myChunks[idx1 ^ 4 + sub] = chunk1 + bx0; + myChunks[idx1 ^ 6 + sub] = chunk2 + ax0; + + ax0 += res; + } + + bx1 = bx0; + bx0 = cx_aes; + + myChunks[idx1 + sub] = ax0; + + ((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub]; + + ax0 ^= c; + idx0 = shuffle<2>(sPtr, sub, static_cast(ax0), 0); + } + + if (bfactor > 0) { + ((uint64_t*)(d_ctx_a + thread * 4))[sub] = ax0; + ((uint64_t*)(d_ctx_b + thread * 16))[sub] = bx0; + ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub] = bx1; + + if (sub == 1) { + // must be valid only for `sub == 1` +#ifdef RANDOM_MATH_64_BIT + ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[0] = r0; + ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[1] = r1; + ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[2] = r2; + ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[3] = r3; +#else + d_ctx_b[thread * 16 + 4 * 2] = r0; + d_ctx_b[thread * 16 + 4 * 2 + 1] = r1; + d_ctx_b[thread * 16 + 4 * 2 + 2] = r2; + d_ctx_b[thread * 16 + 4 * 2 + 3] = r3; +#endif + } + } +} +)===" diff --git a/src/nvidia/CudaCryptonightR_gen.cpp b/src/nvidia/CudaCryptonightR_gen.cpp new file mode 100644 index 00000000..b2805ca1 --- /dev/null +++ b/src/nvidia/CudaCryptonightR_gen.cpp @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include + +#include "crypto/CryptoNight_monero.h" +#include "nvidia/CudaCryptonightR_gen.h" +#include "common/log/Log.h" + +static std::string get_code(const V4_Instruction* code, int code_size) +{ + std::stringstream s; + + for (int i = 0; i < code_size; ++i) + { + const V4_Instruction inst = code[i]; + + const uint32_t a = inst.dst_index; + const uint32_t b = inst.src_index; + + switch (inst.opcode) + { + case MUL: + s << 'r' << a << "*=r" << b << ';'; + break; + + case ADD: + s << 'r' << a << "+=r" << b << '+' << inst.C << "U;"; + break; + + case SUB: + s << 'r' << a << "-=r" << b << ';'; + break; + + case ROR: + s << 'r' << a << "=rotate_right(r" << a << ",r" << b << ");"; + break; + + case ROL: + s << 'r' << a << "=rotate_left(r" << a << ",r" << b << ");"; + break; + + case XOR: + s << 'r' << a << "^=r" << b << ';'; + break; + } + + s << '\n'; + } + + return s.str(); +} + +struct CacheEntry +{ + CacheEntry(xmrig::Variant variant, uint64_t height, int arch_major, int arch_minor, const std::vector& ptx, const std::string& lowered_name) : + variant(variant), + height(height), + arch_major(arch_major), + arch_minor(arch_minor), + ptx(ptx), + lowered_name(lowered_name) + {} + + xmrig::Variant variant; + uint64_t height; + int arch_major; + int arch_minor; + std::vector ptx; + std::string lowered_name; +}; + +struct BackgroundTaskBase +{ + virtual ~BackgroundTaskBase() {} + virtual void exec() = 0; +}; + +template +struct BackgroundTask : public BackgroundTaskBase +{ + BackgroundTask(T&& func) : m_func(std::move(func)) {} + void exec() override { m_func(); } + + T m_func; +}; + +static std::mutex CryptonightR_cache_mutex; +static std::mutex CryptonightR_build_mutex; +static std::vector CryptonightR_cache; + +static std::mutex background_tasks_mutex; +static std::vector background_tasks; +static std::thread* background_thread = nullptr; + +static void background_thread_proc() +{ + std::vector tasks; + for (;;) { + tasks.clear(); + { + std::lock_guard g(background_tasks_mutex); + background_tasks.swap(tasks); + } + + for (BackgroundTaskBase* task : tasks) { + task->exec(); + delete task; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } +} + +template +static void background_exec(T&& func) +{ + BackgroundTaskBase* task = new BackgroundTask(std::move(func)); + + std::lock_guard g(background_tasks_mutex); + background_tasks.push_back(task); + if (!background_thread) { + background_thread = new std::thread(background_thread_proc); + } +} + +static bool is_64bit(xmrig::Variant variant) +{ + return false; +} + +static void CryptonightR_build_program( + std::vector& ptx, + std::string& lowered_name, + xmrig::Variant variant, + uint64_t height, + int arch_major, + int arch_minor, + std::string source) +{ + { + std::lock_guard g(CryptonightR_cache_mutex); + + // Remove old programs from cache + for (size_t i = 0; i < CryptonightR_cache.size();) + { + const CacheEntry& entry = CryptonightR_cache[i]; + if ((entry.variant == variant) && (entry.height + 2 < height)) + { + //LOG_INFO("CryptonightR: program for height %llu released (old program)", entry.height); + CryptonightR_cache[i] = std::move(CryptonightR_cache.back()); + CryptonightR_cache.pop_back(); + } + else + { + ++i; + } + } + } + + ptx.clear(); + ptx.reserve(65536); + + std::lock_guard g1(CryptonightR_build_mutex); + { + std::lock_guard g(CryptonightR_cache_mutex); + + // Check if the cache already has this program (some other thread might have added it first) + for (const CacheEntry& entry : CryptonightR_cache) + { + if ((entry.variant == variant) && (entry.height == height) && (entry.arch_major == arch_major) && (entry.arch_minor == arch_minor)) + { + ptx = entry.ptx; + lowered_name = entry.lowered_name; + return; + } + } + } + + nvrtcProgram prog; + nvrtcResult result = nvrtcCreateProgram(&prog, source.c_str(), "CryptonightR.cu", 0, NULL, NULL); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcCreateProgram failed: %s", nvrtcGetErrorString(result)); + return; + } + + result = nvrtcAddNameExpression(prog, "CryptonightR_phase2"); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcAddNameExpression failed: %s", nvrtcGetErrorString(result)); + nvrtcDestroyProgram(&prog); + return; + } + + char buf[64]; + sprintf(buf, "--gpu-architecture=compute_%d%d", arch_major, arch_minor); + + const char* opts[2] = { buf, is_64bit(variant) ? "--define-macro=RANDOM_MATH_64_BIT" : nullptr }; + result = nvrtcCompileProgram(prog, is_64bit(variant) ? 2 : 1, opts); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcCompileProgram failed: %s", nvrtcGetErrorString(result)); + + size_t logSize; + if (nvrtcGetProgramLogSize(prog, &logSize) == NVRTC_SUCCESS) { + char *log = new char[logSize]; + if (nvrtcGetProgramLog(prog, log) == NVRTC_SUCCESS) { + LOG_INFO("Program compile log: %s", log); + } + delete[]log; + } + nvrtcDestroyProgram(&prog); + return; + } + + + const char* name; + result = nvrtcGetLoweredName(prog, "CryptonightR_phase2", &name); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcGetLoweredName failed: %s", nvrtcGetErrorString(result)); + nvrtcDestroyProgram(&prog); + return; + } + + size_t ptxSize; + result = nvrtcGetPTXSize(prog, &ptxSize); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcGetPTXSize failed: %s", nvrtcGetErrorString(result)); + nvrtcDestroyProgram(&prog); + return; + } + + ptx.resize(ptxSize); + result = nvrtcGetPTX(prog, ptx.data()); + if (result != NVRTC_SUCCESS) { + LOG_ERR("nvrtcGetPTX failed: %s", nvrtcGetErrorString(result)); + nvrtcDestroyProgram(&prog); + return; + } + + lowered_name = name; + + nvrtcDestroyProgram(&prog); + + //LOG_INFO("CryptonightR: program for height %llu compiled", height); + + { + std::lock_guard g(CryptonightR_cache_mutex); + CryptonightR_cache.emplace_back(variant, height, arch_major, arch_minor, ptx, lowered_name); + } +} + +void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, xmrig::Variant variant, uint64_t height, int arch_major, int arch_minor, bool background) +{ + if (background) { + background_exec([=]() { std::vector tmp; std::string s; CryptonightR_get_program(tmp, s, variant, height, arch_major, arch_minor, false); }); + return; + } + + ptx.clear(); + + if (variant != xmrig::VARIANT_WOW) + { + LOG_ERR("CryptonightR_get_program: invalid variant %d", variant); + return; + } + + const char* source_code_template = + #include "CryptonightR.cu" + ; + const char include_name[] = "XMRIG_INCLUDE_RANDOM_MATH"; + const char* offset = strstr(source_code_template, include_name); + if (!offset) + { + LOG_ERR("CryptonightR_get_program: XMRIG_INCLUDE_RANDOM_MATH not found in CryptonightR.cu"); + return; + } + + V4_Instruction code[256]; + const int code_size = v4_random_math_init(code, height); + + std::string source_code(source_code_template, offset); + source_code.append(get_code(code, code_size)); + source_code.append(offset + sizeof(include_name) - 1); + + { + std::lock_guard g(CryptonightR_cache_mutex); + + // Check if the cache has this program + for (const CacheEntry& entry : CryptonightR_cache) + { + if ((entry.variant == variant) && (entry.height == height) && (entry.arch_major == arch_major) && (entry.arch_minor == arch_minor)) + { + //LOG_INFO("CryptonightR: program for height %llu found in cache", height); + ptx = entry.ptx; + lowered_name = entry.lowered_name; + return; + } + } + } + + CryptonightR_build_program(ptx, lowered_name, variant, height, arch_major, arch_minor, source_code); +} diff --git a/src/nvidia/CudaCryptonightR_gen.h b/src/nvidia/CudaCryptonightR_gen.h new file mode 100644 index 00000000..126dab82 --- /dev/null +++ b/src/nvidia/CudaCryptonightR_gen.h @@ -0,0 +1,11 @@ +#ifndef __CUDACRYPTONIGHTR_GEN_H__ +#define __CUDACRYPTONIGHTR_GEN_H__ + +#include +#include +#include +#include "common/xmrig.h" + +void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, xmrig::Variant variant, uint64_t height, int arch_major, int arch_minor, bool background = false); + +#endif diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h index c1e0bd05..673c7ae8 100644 --- a/src/nvidia/cryptonight.h +++ b/src/nvidia/cryptonight.h @@ -29,9 +29,17 @@ #include "../common/xmrig.h" +#include typedef struct { + CUdevice cuDevice; + CUcontext cuContext; + CUmodule module; + CUfunction kernel; + xmrig::Variant kernel_variant; + uint64_t kernel_height; + int device_id; const char *device_name; int device_arch[2]; @@ -69,6 +77,6 @@ int cuda_get_deviceinfo(nvid_ctx *ctx, xmrig::Algo algo, bool isCNv2); int cryptonight_gpu_init(nvid_ctx *ctx, xmrig::Algo algo); void cryptonight_extra_cpu_set_data(nvid_ctx *ctx, const void *data, size_t len); void cryptonight_extra_cpu_prepare(nvid_ctx *ctx, uint32_t startNonce, xmrig::Algo algo, xmrig::Variant variant); -void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant variant, uint32_t startNonce); +void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant variant, uint64_t height, uint32_t startNonce); void cryptonight_extra_cpu_final(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, xmrig::Algo algo, xmrig::Variant variant); void cryptonight_extra_cpu_free(nvid_ctx *ctx, xmrig::Algo algo); diff --git a/src/nvidia/cuda_core.cu b/src/nvidia/cuda_core.cu index f1970561..c910ce7a 100644 --- a/src/nvidia/cuda_core.cu +++ b/src/nvidia/cuda_core.cu @@ -28,6 +28,9 @@ #include #include #include +#include "CudaCryptonightR_gen.h" +#include "common/log/Log.h" +#include "common/utils/timestamp.h" #ifdef _WIN32 #include @@ -319,10 +322,10 @@ __global__ void cryptonight_core_gpu_phase2_double( uint8_t *l0 = (uint8_t*)&d_long_state[(IndexType) thread * MEM]; uint64_t ax0 = ((uint64_t*)(d_ctx_a + thread * 4))[sub]; uint32_t idx0 = shuffle<2>(sPtr, sub, static_cast(ax0), 0); - uint64_t bx0 = ((uint64_t*)(d_ctx_b + thread * 12))[sub]; - uint64_t bx1 = ((uint64_t*)(d_ctx_b + thread * 12 + 4))[sub]; - uint64_t division_result = ((uint64_t*)(d_ctx_b + thread * 12 + 4 * 2))[0]; - uint32_t sqrt_result = (d_ctx_b + thread * 12 + 4 * 2 + 2)[0]; + uint64_t bx0 = ((uint64_t*)(d_ctx_b + thread * 16))[sub]; + uint64_t bx1 = ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub]; + uint64_t division_result = ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[0]; + uint32_t sqrt_result = (d_ctx_b + thread * 16 + 4 * 2 + 2)[0]; const int batchsize = (ITERATIONS * 2) >> ( 1 + bfactor ); const int start = partidx * batchsize; @@ -429,13 +432,13 @@ __global__ void cryptonight_core_gpu_phase2_double( if (bfactor > 0) { ((uint64_t*)(d_ctx_a + thread * 4))[sub] = ax0; - ((uint64_t*)(d_ctx_b + thread * 12))[sub] = bx0; - ((uint64_t*)(d_ctx_b + thread * 12 + 4))[sub] = bx1; + ((uint64_t*)(d_ctx_b + thread * 16))[sub] = bx0; + ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub] = bx1; if (sub == 1) { // must be valid only for `sub == 1` - ((uint64_t*)(d_ctx_b + thread * 12 + 4 * 2))[0] = division_result; - (d_ctx_b + thread * 12 + 4 * 2 + 2)[0] = sqrt_result; + ((uint64_t*)(d_ctx_b + thread * 16 + 4 * 2))[0] = division_result; + (d_ctx_b + thread * 16 + 4 * 2 + 2)[0] = sqrt_result; } } } @@ -724,11 +727,22 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) } for (int i = 0; i < partcount; i++) { - if (BASE == xmrig::VARIANT_2) { + if (VARIANT == xmrig::VARIANT_WOW) { + int threads = ctx->device_blocks * ctx->device_threads; + void* args[] = { &threads, &ctx->device_bfactor, &i, &ctx->d_long_state, &ctx->d_ctx_a, &ctx->d_ctx_b, &ctx->d_ctx_state, &nonce, &ctx->d_input }; + CU_CHECK(ctx->device_id, cuLaunchKernel( + ctx->kernel, + grid.x, grid.y, grid.z, + block2.x, block2.y, block2.z, + sizeof(uint64_t) * block.x * 8 + block.x * sizeof(uint32_t) * static_cast(ctx->device_arch[0] < 3), nullptr, + args, 0 + )); + CU_CHECK(ctx->device_id, cuCtxSynchronize()); + } else if (BASE == xmrig::VARIANT_2) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase2_double<<< grid, block2, - sizeof(uint64_t) * block2.x * 8 + block2.x * sizeof(uint32_t) * static_cast(ctx->device_arch[0] < 3) + sizeof(uint64_t) * block.x * 8 + block.x * sizeof(uint32_t) * static_cast(ctx->device_arch[0] < 3) >>>( ctx->device_blocks * ctx->device_threads, ctx->device_bfactor, @@ -842,11 +856,40 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce) } } -void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant variant, uint32_t startNonce) +void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant variant, uint64_t height, uint32_t startNonce) { using namespace xmrig; if (algo == CRYPTONIGHT) { + if (variant == VARIANT_WOW) { + if ((ctx->kernel_variant != variant) || (ctx->kernel_height != height)) { +# ifdef APP_DEBUG + const int64_t timeStart = xmrig::steadyTimestamp(); +# endif + + if (ctx->module) { + cuModuleUnload(ctx->module); + } + + std::vector ptx; + std::string lowered_name; + CryptonightR_get_program(ptx, lowered_name, variant, height, ctx->device_arch[0], ctx->device_arch[1]); + + CU_CHECK(ctx->device_id, cuModuleLoadDataEx(&ctx->module, ptx.data(), 0, 0, 0)); + CU_CHECK(ctx->device_id, cuModuleGetFunction(&ctx->kernel, ctx->module, lowered_name.c_str())); + + ctx->kernel_variant = variant; + ctx->kernel_height = height; + + CryptonightR_get_program(ptx, lowered_name, variant, height + 1, ctx->device_arch[0], ctx->device_arch[1], true); + +# ifdef APP_DEBUG + const int64_t timeFinish = xmrig::steadyTimestamp(); + LOG_INFO("GPU #%d updated CryptonightR in %.3fs", ctx->device_id, (timeFinish - timeStart) / 1000.0); +# endif + } + } + switch (variant) { case VARIANT_0: cryptonight_core_gpu_hash(ctx, startNonce); @@ -884,6 +927,10 @@ void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant varian cryptonight_core_gpu_hash_gpu(ctx, startNonce); break; + case VARIANT_WOW: + cryptonight_core_gpu_hash(ctx, startNonce); + break; + default: break; } diff --git a/src/nvidia/cuda_device.hpp b/src/nvidia/cuda_device.hpp index 84f8afec..19904a77 100644 --- a/src/nvidia/cuda_device.hpp +++ b/src/nvidia/cuda_device.hpp @@ -27,3 +27,15 @@ #define CUDA_CHECK_KERNEL(id, ...) \ __VA_ARGS__; \ CUDA_CHECK(id, cudaGetLastError()) + +#define CU_CHECK(id, ...) { \ + CUresult result = __VA_ARGS__; \ + if(result != CUDA_SUCCESS){ \ + const char* s; \ + cuGetErrorString(result, &s); \ + std::cerr << "[CUDA] Error gpu " << ctx->device_id << ": <" << __FUNCTION__ << ">:" << __LINE__ << " \"" << (s ? s : "unknown error") << "\"" << std::endl; \ + throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(s ? s : "unknown error")); \ + } \ +} \ +( (void) 0 ) + diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index a1f92d5c..fa12e455 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -162,15 +162,22 @@ __global__ void cryptonight_extra_gpu_prepare( XOR_BLOCKS_DST(ctx_state + 4, ctx_state + 12, ctx_b); memcpy(d_ctx_a + thread * 4, ctx_a, 4 * 4); - if (VARIANT == xmrig::VARIANT_2) { - memcpy(d_ctx_b + thread * 12, ctx_b, 4 * 4); + if (VARIANT == xmrig::VARIANT_WOW) { + memcpy(d_ctx_b + thread * 16, ctx_b, 4 * 4); // bx1 XOR_BLOCKS_DST(ctx_state + 16, ctx_state + 20, ctx_b); - memcpy(d_ctx_b + thread * 12 + 4, ctx_b, 4 * 4); + memcpy(d_ctx_b + thread * 16 + 4, ctx_b, 4 * 4); + // r0, r1, r2, r3 + memcpy(d_ctx_b + thread * 16 + 2 * 4, ctx_state + 24, 4 * 8); + } else if (VARIANT == xmrig::VARIANT_2 || VARIANT == xmrig::VARIANT_HALF) { + memcpy(d_ctx_b + thread * 16, ctx_b, 4 * 4); + // bx1 + XOR_BLOCKS_DST(ctx_state + 16, ctx_state + 20, ctx_b); + memcpy(d_ctx_b + thread * 16 + 4, ctx_b, 4 * 4); // division_result - memcpy(d_ctx_b + thread * 12 + 2 * 4, ctx_state + 24, 4 * 2); + memcpy(d_ctx_b + thread * 16 + 2 * 4, ctx_state + 24, 4 * 2); // sqrt_result - memcpy(d_ctx_b + thread * 12 + 2 * 4 + 2, ctx_state + 26, 4 * 2); + memcpy(d_ctx_b + thread * 16 + 2 * 4 + 2, ctx_state + 26, 4 * 2); } else { memcpy(d_ctx_b + thread * 4, ctx_b, 4 * 4); } @@ -322,6 +329,9 @@ void cryptonight_extra_cpu_set_data(nvid_ctx *ctx, const void *data, size_t len) int cryptonight_extra_cpu_init(nvid_ctx *ctx, xmrig::Algo algo, size_t hashMemSize) { + CU_CHECK(ctx->device_id, cuDeviceGet(&ctx->cuDevice, ctx->device_id)); + CU_CHECK(ctx->device_id, cuCtxCreate(&ctx->cuContext, 0, ctx->cuDevice)); + cudaError_t err; err = cudaSetDevice(ctx->device_id); if (err != cudaSuccess) { @@ -359,7 +369,7 @@ int cryptonight_extra_cpu_init(nvid_ctx *ctx, xmrig::Algo algo, size_t hashMemSi CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state2, 50 * sizeof(uint32_t) * wsize)); } else { - ctx_b_size *= 3; + ctx_b_size *= 4; ctx->d_ctx_state2 = ctx->d_ctx_state; } @@ -392,6 +402,9 @@ void cryptonight_extra_cpu_prepare(nvid_ctx *ctx, uint32_t startNonce, xmrig::Al if (algo == xmrig::CRYPTONIGHT_HEAVY) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>(wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); + } else if (variant == xmrig::VARIANT_WOW) { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare << > > (wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); } else if (variant == xmrig::VARIANT_2 || variant == xmrig::VARIANT_HALF || variant == xmrig::VARIANT_TRTL) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>(wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); diff --git a/src/version.h b/src/version.h index 5609ff2f..33c847d2 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig-nvidia" #define APP_NAME "XMRig-NVIDIA" #define APP_DESC "XMRig CUDA miner" -#define APP_VERSION "2.11.0" +#define APP_VERSION "2.11.1-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 2 #define APP_VER_MINOR 11 -#define APP_VER_PATCH 0 +#define APP_VER_PATCH 1 #ifdef _MSC_VER # if (_MSC_VER >= 1910) diff --git a/src/workers/CudaWorker.cpp b/src/workers/CudaWorker.cpp index 012392ee..8b44ed97 100644 --- a/src/workers/CudaWorker.cpp +++ b/src/workers/CudaWorker.cpp @@ -47,6 +47,11 @@ CudaWorker::CudaWorker(Handle *handle) : { const CudaThread *thread = static_cast(handle->config()); + m_ctx.module = nullptr; + m_ctx.kernel = nullptr; + m_ctx.kernel_variant = xmrig::VARIANT_AUTO; + m_ctx.kernel_height = 0; + m_ctx.device_id = static_cast(thread->index()); m_ctx.device_blocks = thread->blocks(); m_ctx.device_threads = thread->threads(); @@ -86,9 +91,9 @@ void CudaWorker::start() while (!Workers::isOutdated(m_sequence)) { uint32_t foundNonce[10]; uint32_t foundCount; - + cryptonight_extra_cpu_prepare(&m_ctx, m_nonce, m_algorithm, m_job.algorithm().variant()); - cryptonight_gpu_hash(&m_ctx, m_algorithm, m_job.algorithm().variant(), m_nonce); + cryptonight_gpu_hash(&m_ctx, m_algorithm, m_job.algorithm().variant(), m_job.height(), m_nonce); cryptonight_extra_cpu_final(&m_ctx, m_nonce, m_job.target(), &foundCount, foundNonce, m_algorithm, m_job.algorithm().variant()); for (size_t i = 0; i < foundCount; i++) { diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index bd5bb3b6..7053a679 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -39,6 +39,7 @@ #include "workers/Handle.h" #include "workers/Hashrate.h" #include "workers/Workers.h" +#include "Mem.h" bool Workers::m_active = false; @@ -327,7 +328,8 @@ void Workers::onResult(uv_async_t *) return; } - cryptonight_ctx *ctx = CryptoNight::createCtx(baton->jobs[0].algorithm().algo()); + cryptonight_ctx *ctx; + MemInfo info = Mem::create(&ctx, baton->jobs[0].algorithm().algo(), 1); for (const Job &job : baton->jobs) { JobResult result(job); @@ -340,7 +342,7 @@ void Workers::onResult(uv_async_t *) } } - CryptoNight::freeCtx(ctx); + Mem::release(&ctx, 1, info); }, [](uv_work_t* req, int) { JobBaton *baton = static_cast(req->data);