diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..9bce637 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,33 @@ +name: Build & Test Package +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + linux-node10: + runs-on: ubuntu-16.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '10.x' + registry-url: 'https://npm.pkg.github.com' + - run: sudo apt install libboost-dev + - run: npm install + - run: npm test + + linux-node12: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '12.x' + registry-url: 'https://npm.pkg.github.com' + - run: sudo apt install libboost-dev + - run: npm install + - run: npm test \ No newline at end of file diff --git a/.github/workflows/publish-release.yaml b/.github/workflows/publish-release.yaml new file mode 100644 index 0000000..722513a --- /dev/null +++ b/.github/workflows/publish-release.yaml @@ -0,0 +1,18 @@ +name: Publish Package +on: + release: + types: [created] +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '10.x' + registry-url: 'https://npm.pkg.github.com' + - run: sudo apt install libboost-all-dev + - run: npm install + - run: npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..23d0199 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.vscode/ +.idea/ +*.iml +node_modules/ +build/ +cmake-build-debug/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1ad037c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,40 @@ +cmake_minimum_required(VERSION 3.5.1) +project(hasherbeamhash) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +file(GLOB hasher_beamhash_SRC + "src/*/*.c" + "src/*.c" + "src/*.cpp" + "src/*/*.cpp" + ) + +#ff building Node JS Addon +if (CMAKE_JS_VERSION) + + set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++ -static") + + # add node include directories, fix potential backslashes + foreach(CMAKE_JS_INC_ITEM ${CMAKE_JS_INC}) + string(REPLACE "\\" "/" CMAKE_JS_INC_ITEM ${CMAKE_JS_INC_ITEM}) + message(STATUS "include_directories ${CMAKE_JS_INC_ITEM}") + include_directories(${CMAKE_JS_INC_ITEM}) + endforeach(CMAKE_JS_INC_ITEM) + + string(REPLACE "\\" "/" PROJECT_SOURCE_DIR ${PROJECT_SOURCE_DIR}) + message(STATUS "include_directories ${PROJECT_SOURCE_DIR}") + include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}) + include_directories(${PROJECT_NAME} PUBLIC "./src") + + # include static libraries + + add_library(${PROJECT_NAME} SHARED ${hasher_beamhash_SRC} "hasherbeamhash.cc") + set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") + +endif() + + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5777b3d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 JCThePants, contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..83a91fa --- /dev/null +++ b/README.md @@ -0,0 +1,91 @@ +hasher-beamhash +=============== + +This is a Node module for simple hashing and verifying [Beam coin](https://beam.mw) proof-of-work solutions. +Most of the native code comes from or is adapted from [Beam source code](https://github.com/BeamMW/beam/tree/master/3rdparty/crypto). + +This module has been developed and tested on [Node v10.17](https://nodejs.org/) and +[Ubuntu 16.04](http://releases.ubuntu.com/16.04/) for the [Beam mining pool](https://mintpond.com/#!/beam) at [MintPond](https://mintpond.com). + +## Install ## +__Install as Dependency in NodeJS Project__ +```bash +# Install from Github NPM repository + +sudo apt-get install build-essential +sudo apt-get install libboost-dev +npm config set @mintpond:registry https://npm.pkg.github.com/mintpond +npm config set //npm.pkg.github.com/:_authToken + +npm install @mintpond/hasher-beamhash@0.1.0 --save +``` +[Creating a personal access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) + +__Install & Test__ +```bash +# Install nodejs v10 +curl -sL https://deb.nodesource.com/setup_10.x | sudo -E bash - +sudo apt-get install nodejs -y + +# Download hasher-beamhash +git clone https://github.com/MintPond/hasher-beamhash + +# build +cd hasher-beamhash +npm install + +# test +npm test +``` + +## Usage ## +__Verify BeamHashII__ +```javascript +const beamhash = require('@mintpond/hasher-beamhash'); + +/** + * Verify a BeamHashII solution. + * + * @param inputBuf {Buffer} + * @param nonceBuf {Buffer} + * @param solutionBuf {Buffer} + * @returns {boolean} True if valid, otherwise false. + */ +const isValid = beamhash.verify2(inputBuf, nonceBuf, solutionBuf); + +if (isValid) { + console.log('Valid solution'); +} +else { + console.log('Invalid solution'); +} +``` + +__Verify BeamHashIII__ +```javascript +const beamhash = require('@mintpond/hasher-beamhash'); + +/** + * Verify a BeamHashIII solution. + * + * @param inputBuf {Buffer} + * @param nonceBuf {Buffer} + * @param solutionBuf {Buffer} + * @returns {boolean} True if valid, otherwise false. + */ +const isValid = beamhash.verify3(inputBuf, nonceBuf, solutionBuf); + +if (isValid) { + console.log('Valid solution'); +} +else { + console.log('Invalid solution'); +} +``` + +## Dependencies ## +In Ubuntu: +``` + sudo apt-get install build-essential + sudo apt-get install libboost-dev +``` \ No newline at end of file diff --git a/binding.gyp b/binding.gyp new file mode 100644 index 0000000..6acd32e --- /dev/null +++ b/binding.gyp @@ -0,0 +1,26 @@ +{ + "targets": [ + { + "target_name": "hasherbeamhash", + "sources": [ + "src/blake/blake2b.cpp", + "src/beamHashIII_imp.cpp", + "src/equihashR_imp.cpp", + "hasherbeamhash.cc" + ], + "include_dirs": [ + ".", + "src", + " +#include +#include +#include +#include +#include "nan.h" +#include "src/equihashR.h" +#include "src/beamHashIII.h" + +using namespace node; +using namespace v8; + +#define THROW_ERROR_EXCEPTION(x) Nan::ThrowError(x) + +static BeamHash_III BeamHashIII; + + +bool verifyPoWScheme(PoWScheme &scheme, const char *input_ptr, const char *nonce64_ptr, Local solution) { + + const char *solution_ptr = (char *) Buffer::Data(solution); + + blake2b_state state; + scheme.InitialiseState(state); + blake2b_update(&state, (const unsigned char *) input_ptr, 32); + blake2b_update(&state, (const unsigned char *) nonce64_ptr, 8); + + std::vector solution_vec(solution_ptr, solution_ptr + node::Buffer::Length(solution)); + + return scheme.IsValidSolution(state, solution_vec); +} + + +NAN_METHOD(verify1) { + + if (info.Length() < 3) { + return THROW_ERROR_EXCEPTION("hasher-beamhash.verify1 - 3 arguments expected."); + } + + const char* input_ptr = (char*)Buffer::Data(Nan::To(info[0]).ToLocalChecked()); + const char* nonce64_ptr = (char*)Buffer::Data(Nan::To(info[1]).ToLocalChecked()); + Local solution = Nan::To(info[2]).ToLocalChecked(); + + bool isValid = verifyPoWScheme(BeamHashI, input_ptr, nonce64_ptr, solution); + + if (isValid) { + info.GetReturnValue().Set(Nan::True()); + } + else { + info.GetReturnValue().Set(Nan::False()); + } +} + + +NAN_METHOD(verify2) { + + if (info.Length() < 3) { + return THROW_ERROR_EXCEPTION("hasher-beamhash.verify2 - 3 arguments expected."); + } + + const char* input_ptr = (char*)Buffer::Data(Nan::To(info[0]).ToLocalChecked()); + const char* nonce64_ptr = (char*)Buffer::Data(Nan::To(info[1]).ToLocalChecked()); + Local solution = Nan::To(info[2]).ToLocalChecked(); + + bool isValid = verifyPoWScheme(BeamHashII, input_ptr, nonce64_ptr, solution); + + if (isValid) { + info.GetReturnValue().Set(Nan::True()); + } + else { + info.GetReturnValue().Set(Nan::False()); + } +} + + +NAN_METHOD(verify3) { + + if (info.Length() < 3) { + return THROW_ERROR_EXCEPTION("hasher-beamhash.verify3 - 3 arguments expected."); + } + + const char* input_ptr = (char*)Buffer::Data(Nan::To(info[0]).ToLocalChecked()); + const char* nonce64_ptr = (char*)Buffer::Data(Nan::To(info[1]).ToLocalChecked()); + Local solution = Nan::To(info[2]).ToLocalChecked(); + + bool isValid = verifyPoWScheme(BeamHashIII, input_ptr, nonce64_ptr, solution); + + if (isValid) { + info.GetReturnValue().Set(Nan::True()); + } + else { + info.GetReturnValue().Set(Nan::False()); + } +} + + +NAN_MODULE_INIT(init) { + + Nan::Set(target, Nan::New("verify1").ToLocalChecked(), + Nan::GetFunction(Nan::New(verify1)).ToLocalChecked()); + + Nan::Set(target, Nan::New("verify2").ToLocalChecked(), + Nan::GetFunction(Nan::New(verify2)).ToLocalChecked()); + + Nan::Set(target, Nan::New("verify3").ToLocalChecked(), + Nan::GetFunction(Nan::New(verify3)).ToLocalChecked()); +} + + +NODE_MODULE(hasherbeamhash, init) \ No newline at end of file diff --git a/index.js b/index.js new file mode 100644 index 0000000..4a1ab1b --- /dev/null +++ b/index.js @@ -0,0 +1,55 @@ +'use strict' + +const beamhash = require('bindings')('hasherbeamhash.node'); + +module.exports = { + + /** + * Verify BeamHashII solution. + * + * @param inputBuf {Buffer} + * @param nonceBuf {Buffer} + * @param solutionBuf {Buffer} + * @returns {boolean} True if valid, otherwise false. + */ + verify2: verify2, + + /** + * Verify BeamHashIII solution. + * + * @param inputBuf {Buffer} + * @param nonceBuf {Buffer} + * @param solutionBuf {Buffer} + * @returns {boolean} True if valid, otherwise false. + */ + verify3: verify3 +}; + + +function verify2(inputBuf, nonceBuf, solutionBuf) { + + _expectBuffer(inputBuf, 'inputBuf', 32); + _expectBuffer(nonceBuf, 'nonceBuf', 8); + _expectBuffer(solutionBuf, 'solutionBuf', 104); + + return beamhash.verify2(inputBuf, nonceBuf, solutionBuf); +} + + +function verify3(inputBuf, nonceBuf, solutionBuf) { + + _expectBuffer(inputBuf, 'inputBuf', 32); + _expectBuffer(nonceBuf, 'nonceBuf', 8); + _expectBuffer(solutionBuf, 'solutionBuf'); + + return beamhash.verify3(inputBuf, nonceBuf, solutionBuf); +} + + +function _expectBuffer(buffer, name, size) { + if (!Buffer.isBuffer(buffer)) + throw new Error(`"${name}" is expected to be a Buffer. Got ${(typeof buffer)} instead.`); + + if (size && buffer.length !== size) + throw new Error(`"${name}" is expected to be exactly ${size} bytes. Got ${buffer.length} instead.`); +} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..9b9a905 --- /dev/null +++ b/package.json @@ -0,0 +1,29 @@ +{ + "name": "@mintpond/hasher-beamhash", + "version": "0.1.0", + "description": "BeamHash algorithm hashing and verification functions for NodeJS.", + "main": "index.js", + "author": "JCThePants", + "license": "MIT", + "dependencies": { + "bindings": "^1.3.0", + "nan": "^2.6.2" + }, + "scripts": { + "test": "node test" + }, + "homepage": "https://github.com/MintPond/hasher-beamhash", + "bugs": { + "url": "https://github.com/MintPond/hasher-beamhash/issues" + }, + "repository": { + "type": "git", + "url": "https://github.com/MintPond/hasher-beamhash.git" + }, + "publishConfig": { + "registry": "https://npm.pkg.github.com/" + }, + "engines": { + "node": ">=10.17.0" + } +} \ No newline at end of file diff --git a/src/beamHashIII.h b/src/beamHashIII.h new file mode 100644 index 0000000..f8fbb92 --- /dev/null +++ b/src/beamHashIII.h @@ -0,0 +1,54 @@ +// Copyright (c) 2020 The Beam Team + +#ifndef BEAMHASH_H +#define BEAMHASH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powScheme.h" + +const uint32_t workBitSize=448; +const uint32_t collisionBitSize=24; +const uint32_t numRounds=5; + +class stepElem { + friend class BeamHash_III; + + private: + std::bitset workBits; + std::vector indexTree; + + public: + stepElem(const uint64_t * prePow, uint32_t index); + stepElem(const stepElem &a, const stepElem &b, uint32_t remLen); + + void applyMix(uint32_t remLen); + uint32_t getCollisionBits() const; + bool isZero(); + + friend bool hasCollision(stepElem &a, stepElem &b); + friend bool distinctIndices(stepElem &a, stepElem &b); + friend bool indexAfter(stepElem &a, stepElem &b); + friend uint64_t getLowBits(stepElem test); +}; + +class BeamHash_III : public PoWScheme { + public: + int InitialiseState(blake2b_state& base_state); + bool IsValidSolution(const blake2b_state& base_state, std::vector soln); + + + bool OptimisedSolve(const blake2b_state& base_state, + const std::function&)> validBlock, + const std::function cancelled); +}; + +#endif \ No newline at end of file diff --git a/src/beamHashIII_imp.cpp b/src/beamHashIII_imp.cpp new file mode 100644 index 0000000..6582e20 --- /dev/null +++ b/src/beamHashIII_imp.cpp @@ -0,0 +1,377 @@ + +#include "beamHashIII.h" + + +namespace sipHash { + +static uint64_t rotl(uint64_t x, uint64_t b) { + return (x << b) | (x >> (64 - b)); +} + +#define sipRound() { \ + v0 += v1; v2 += v3; \ + v1 = rotl(v1,13); \ + v3 = rotl(v3,16); \ + v1 ^= v0; v3 ^= v2; \ + v0 = rotl(v0,32); \ + v2 += v1; v0 += v3; \ + v1 = rotl(v1,17); \ + v3 = rotl(v3,21); \ + v1 ^= v2; v3 ^= v0; \ + v2 = rotl(v2,32); \ +} + +uint64_t siphash24(uint64_t state0, uint64_t state1, uint64_t state2, uint64_t state3, uint64_t nonce) { + uint64_t v0, v1, v2, v3; + + v0 = state0; v1=state1; v2=state2; v3=state3; + v3 ^= nonce; + sipRound(); + sipRound(); + v0 ^= nonce; + v2 ^= 0xff; + sipRound(); + sipRound(); + sipRound(); + sipRound(); + + return (v0 ^ v1 ^ v2 ^ v3); +} + +} //end namespace sipHash + + +stepElem::stepElem(const uint64_t * prePow, uint32_t index) { + workBits.reset(); + + for (int32_t i=6; i>=0; i--) { + workBits = (workBits << 64); + uint64_t hash=sipHash::siphash24(prePow[0],prePow[1],prePow[2],prePow[3],(index << 3)+i); + workBits |= hash; + } + + indexTree.assign(1, index); +} + +stepElem::stepElem(const stepElem &a, const stepElem &b, uint32_t remLen) { + // Create a new rounds step element from matching two ancestors + workBits.reset(); + + workBits = a.workBits ^ b.workBits; + workBits = (workBits >> collisionBitSize); + + std::bitset mask; + mask.set(); + mask = (mask >> (workBitSize-remLen)); + workBits &= mask; + + if (a.indexTree[0] < b.indexTree[0]) { + indexTree.insert(indexTree.end(), a.indexTree.begin(), a.indexTree.end()); + indexTree.insert(indexTree.end(), b.indexTree.begin(), b.indexTree.end()); + } else { + indexTree.insert(indexTree.end(), b.indexTree.begin(), b.indexTree.end()); + indexTree.insert(indexTree.end(), a.indexTree.begin(), a.indexTree.end()); + } +} + +void stepElem::applyMix(uint32_t remLen) { + std::bitset<512> tempBits(workBits.to_string()); + + // Add in the bits of the index tree to the end of work bits + uint32_t padNum = ((512-remLen) + collisionBitSize) / (collisionBitSize + 1); + padNum = std::min(padNum, static_cast(indexTree.size())); + + for (uint32_t i=0; i tmp(indexTree[i]); + tmp = tmp << (remLen+i*(collisionBitSize + 1)); + tempBits |= tmp; + } + + + // Applyin the mix from the lined up bits + std::bitset<512> mask(0xFFFFFFFFFFFFFFFFUL); + uint64_t result = 0; + for (uint32_t i=0; i<8; i++) { + uint64_t tmp = (tempBits & mask).to_ullong(); + tempBits = tempBits >> 64; + + result += sipHash::rotl(tmp, (29*(i+1)) & 0x3F); + } + result = sipHash::rotl(result, 24); + + + // Wipe out lowest 64 bits in favor of the mixed bits + workBits = (workBits >> 64); + workBits = (workBits << 64); + workBits |= std::bitset(result); +} + +uint32_t stepElem::getCollisionBits() const { + std::bitset mask((1 << collisionBitSize) - 1); + return (uint32_t) (workBits & mask).to_ullong(); +} + +bool stepElem::isZero() { + return workBits.none(); +} + +uint64_t getLowBits(stepElem test) { + std::bitset mask(~0ULL); + return (uint64_t) (test.workBits & mask).to_ullong(); +} +/******** + + Friend Functions to compare step elements + +********/ + + +bool hasCollision(stepElem &a, stepElem &b) { + return (a.getCollisionBits() == b.getCollisionBits()); +} + +bool distinctIndices(stepElem &a, stepElem &b) { + for (uint32_t indexA : a.indexTree) { + for (uint32_t indexB : b.indexTree) { + if (indexA == indexB) return false; + } + } + return true; +} + +bool indexAfter(stepElem &a, stepElem &b) { + return (a.indexTree[0] < b.indexTree[0]); +} + +bool sortStepElement(const stepElem &a, const stepElem &b) { + return (a.getCollisionBits() < b.getCollisionBits()); +} + + +/******** + + Beam Hash III Verify Functions & CPU Miner + +********/ + +std::vector GetIndicesFromMinimal(std::vector soln) { + std::bitset<800> inStream; + std::bitset<800> mask((1 << (collisionBitSize+1))-1); + + inStream.reset(); + for (int32_t i = 99; i>=0; i--) { + inStream = (inStream << 8); + inStream |= (uint64_t) soln[i]; + } + + std::vector res; + for (uint32_t i=0; i<32; i++) { + res.push_back((uint32_t) (inStream & mask).to_ullong() ); + inStream = (inStream >> (collisionBitSize+1)); + } + + return res; +} + +std::vector GetMinimalFromIndices(std::vector sol) { + std::bitset<800> inStream; + std::bitset<800> mask(0xFF); + + inStream.reset(); + for (int32_t i = static_cast(sol.size()); i>=0; i--) { + inStream = (inStream << (collisionBitSize+1)); + inStream |= (uint64_t) sol[i]; + } + + std::vector res; + for (uint32_t i=0; i<100; i++) { + res.push_back((uint8_t) (inStream & mask).to_ullong() ); + inStream = (inStream >> 8); + } + + return res; +} + +int BeamHash_III::InitialiseState(blake2b_state& base_state) { + unsigned char personalization[BLAKE2B_PERSONALBYTES] = {}; + memcpy(personalization, "Beam-PoW", 8); + memcpy(personalization+8, &workBitSize, 4); + memcpy(personalization+12, &numRounds, 4); + + const uint8_t outlen = 32; + + blake2b_param param = {0}; + param.digest_length = outlen; + param.fanout = 1; + param.depth = 1; + + memcpy(¶m.personal, personalization, BLAKE2B_PERSONALBYTES); + return blake2b_init_param(&base_state, ¶m); +} + + +bool BeamHash_III::IsValidSolution(const blake2b_state& base_state, std::vector soln) { + + if (soln.size() != 104) { + return false; + } + + uint64_t prePow[4]; + blake2b_state state = base_state; + // Last 4 bytes of solution are our extra nonce + blake2b_update(&state, (uint8_t*) &soln[100], 4); + blake2b_final(&state, (uint8_t*) &prePow[0], static_cast(32)); + + // This will only evaluate bytes 0..99 + std::vector indices = GetIndicesFromMinimal(soln); + + std::vector X; + for (uint32_t i=0; i 1) { + std::vector Xtmp; + + for (size_t i = 0; i < X.size(); i += 2) { + uint32_t remLen = workBitSize-(round-1)*collisionBitSize; + if (round == 5) remLen -= 64; + + X[i].applyMix(remLen); + X[i+1].applyMix(remLen); + + if (!hasCollision(X[i], X[i+1])) { + //std::cout << "Collision Error" << i << " " << X.size() << " " << X[i].getCollisionBits() << " " << X[i+1].getCollisionBits() << std::endl; + return false; + } + + if (!distinctIndices(X[i], X[i+1])) { + //std::cout << "Non-Distinct" << i << " " << X.size() << std::endl; + return false; + } + + if (!indexAfter(X[i], X[i+1])) { + //std::cout << "Index Order" << i << " " << X.size() << std::endl; + return false; + } + + remLen = workBitSize-round*collisionBitSize; + if (round == 4) remLen -= 64; + if (round == 5) remLen = collisionBitSize; + + Xtmp.emplace_back(X[i], X[i+1], remLen); + } + + X = Xtmp; + round++; + } + + return X[0].isZero(); +} + + +SolverCancelledException beamSolverCancelled; + +bool BeamHash_III::OptimisedSolve(const blake2b_state& base_state, + const std::function&)> validBlock, + const std::function cancelled) { + + uint64_t prePow[4]; + blake2b_state state = base_state; + + uint8_t extraNonce[4] = {0}; + + blake2b_update(&state, (uint8_t*) &extraNonce, 4); + blake2b_final(&state, (uint8_t*) &prePow[0], static_cast(32)); + + std::vector elements; + elements.reserve(1 << (collisionBitSize+1)); + + // Seeding + for (uint32_t i=0; i<(1 << (collisionBitSize+1)); i++) { + elements.emplace_back(&prePow[0], i); + if (cancelled(ListGeneration)) throw beamSolverCancelled; + } + + // Round 1 to 5 + uint32_t round; + for (round=1; round<5; round++) { + + uint32_t remLen = workBitSize-(round-1)*collisionBitSize; + + // Mixing of elements + for (uint32_t i=0; i outElements; + outElements.reserve(1 << (collisionBitSize+1)); + + for (uint32_t i=0; i sol = GetMinimalFromIndices(temp.indexTree); + + // Adding the extra nonce + for (uint32_t k=0; k<4; k++) sol.push_back(extraNonce[k]); + + if (validBlock(sol)) return true; + } + } else { + break; + } + j++; + } + if (cancelled(ListColliding)) throw beamSolverCancelled; + } + + return false; +} diff --git a/src/blake/blake2-config.h b/src/blake/blake2-config.h new file mode 100644 index 0000000..327a9b7 --- /dev/null +++ b/src/blake/blake2-config.h @@ -0,0 +1,82 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2_CONFIG_H__ +#define __BLAKE2_CONFIG_H__ + +#if defined(_M_IX86_FP) + #if _M_IX86_FP == 2 + #define HAVE_SSE2 + #ifndef HAVE_AVX + #define HAVE_AVX + #endif + #endif +#elif defined(_M_AMD64) || defined(_M_X64) + #define HAVE_SSSE3 +#endif + +// These don't work everywhere +#if defined(__SSE2__) +#define HAVE_SSE2 +#endif + +#if defined(__SSSE3__) +#define HAVE_SSSE3 +#endif + +#if defined(__SSE4_1__) +#define HAVE_SSE41 +#endif + +#if defined(__AVX__) || defined(__AVX2__) +#define HAVE_AVX +#endif + +#if defined(__XOP__) +#define HAVE_XOP +#endif + + +#ifdef HAVE_AVX2 +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_XOP +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_AVX +#ifndef HAVE_SSE41 +#define HAVE_SSE41 +#endif +#endif + +#ifdef HAVE_SSE41 +#ifndef HAVE_SSSE3 +#define HAVE_SSSE3 +#endif +#endif + +#ifdef HAVE_SSSE3 +#define HAVE_SSE2 +#endif + +#if !defined(HAVE_SSE2) +#error "This code requires at least SSE2." +#endif + +#endif diff --git a/src/blake/blake2-impl.h b/src/blake/blake2-impl.h new file mode 100644 index 0000000..971c3b9 --- /dev/null +++ b/src/blake/blake2-impl.h @@ -0,0 +1,132 @@ +/* + BLAKE2 reference source code package - optimized C implementations + Written in 2012 by Samuel Neves + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2_IMPL_H__ +#define __BLAKE2_IMPL_H__ + +#include + +static inline uint32_t load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint32_t w = *p++; + w |= ( uint32_t )( *p++ ) << 8; + w |= ( uint32_t )( *p++ ) << 16; + w |= ( uint32_t )( *p++ ) << 24; + return w; +#endif +} + +static inline uint64_t load64( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + w |= ( uint64_t )( *p++ ) << 48; + w |= ( uint64_t )( *p++ ) << 56; + return w; +#endif +} + +static inline void store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static inline void store64( void *dst, uint64_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static inline uint64_t load48( const void *src ) +{ + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + return w; +} + +static inline void store48( void *dst, uint64_t w ) +{ + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +} + +static inline uint32_t rotl32( const uint32_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 32 - c ) ); +} + +static inline uint64_t rotl64( const uint64_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 64 - c ) ); +} + +static inline uint32_t rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +static inline uint64_t rotr64( const uint64_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +static inline void secure_zero_memory( void *v, size_t n ) +{ + volatile uint8_t *p = ( volatile uint8_t * )v; + while( n-- ) *p++ = 0; +} + +#endif \ No newline at end of file diff --git a/src/blake/blake2-round.h b/src/blake/blake2-round.h new file mode 100644 index 0000000..2972043 --- /dev/null +++ b/src/blake/blake2-round.h @@ -0,0 +1,85 @@ +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + row1l = _mm_add_epi64(row1l, row2l); \ + row1h = _mm_add_epi64(row1h, row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + row1l = _mm_add_epi64(row1l, row2l); \ + row1h = _mm_add_epi64(row1h, row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; + +#define BLAKE2_ROUND(row1l,row1h,row2l,row2h,row3l,row3h,row4l,row4h) \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ No newline at end of file diff --git a/src/blake/blake2.h b/src/blake/blake2.h new file mode 100644 index 0000000..8e529c7 --- /dev/null +++ b/src/blake/blake2.h @@ -0,0 +1,161 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2_H__ +#define __BLAKE2_H__ + +#include +#include + +#if defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#else +#define ALIGN(x) __attribute__ ((__aligned__(x))) +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + enum blake2b_constant + { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + +#ifdef _MSC_VER +# pragma warning (disable: 4324) // structure was padded due to alignment specifier +#endif // _MSC_VER +#pragma pack(push, 1) + typedef struct __blake2s_param + { + uint8_t digest_length; // 1 + uint8_t key_length; // 2 + uint8_t fanout; // 3 + uint8_t depth; // 4 + uint32_t leaf_length; // 8 + uint8_t node_offset[6];// 14 + uint8_t node_depth; // 15 + uint8_t inner_length; // 16 + // uint8_t reserved[0]; + uint8_t salt[BLAKE2S_SALTBYTES]; // 24 + uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32 + } blake2s_param; + + ALIGN( 64 ) typedef struct __blake2s_state + { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; + } blake2s_state; + + typedef struct __blake2b_param + { + uint8_t digest_length; // 1 + uint8_t key_length; // 2 + uint8_t fanout; // 3 + uint8_t depth; // 4 + uint32_t leaf_length; // 8 + uint64_t node_offset; // 16 + uint8_t node_depth; // 17 + uint8_t inner_length; // 18 + uint8_t reserved[14]; // 32 + uint8_t salt[BLAKE2B_SALTBYTES]; // 48 + uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64 + } blake2b_param; + + ALIGN( 64 ) typedef struct __blake2b_state + { + uint64_t h[8]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + uint16_t counter; + uint8_t buflen; + uint8_t lastblock; + } blake2b_state; + + ALIGN( 64 ) typedef struct __blake2sp_state + { + blake2s_state S[8][1]; + blake2s_state R[1]; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + } blake2sp_state; + + ALIGN( 64 ) typedef struct __blake2bp_state + { + blake2b_state S[4][1]; + blake2b_state R[1]; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + } blake2bp_state; +#pragma pack(pop) +#ifdef _MSC_VER +# pragma warning (default: 4324) +#endif // _MSC_VER + + // Streaming API + int blake2s_init( blake2s_state *S, const uint8_t outlen ); + int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ); + int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ); + + int blake2b_init( blake2b_state *S, const uint8_t outlen ); + int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ); + int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ); + + int blake2sp_init( blake2sp_state *S, const uint8_t outlen ); + int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen ); + + int blake2bp_init( blake2bp_state *S, const uint8_t outlen ); + int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen ); + + // Simple API + int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2b_long(uint8_t *out, const void *in, const uint32_t outlen, const uint64_t inlen); + + int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + + static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) + { + return blake2b( out, in, key, outlen, inlen, keylen ); + } + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/blake/blake2b-load-sse2.h b/src/blake/blake2b-load-sse2.h new file mode 100644 index 0000000..143cea0 --- /dev/null +++ b/src/blake/blake2b-load-sse2.h @@ -0,0 +1,67 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE2_H__ +#define __BLAKE2B_LOAD_SSE2_H__ + +#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) + + +#endif diff --git a/src/blake/blake2b-load-sse41.h b/src/blake/blake2b-load-sse41.h new file mode 100644 index 0000000..03b63d7 --- /dev/null +++ b/src/blake/blake2b-load-sse41.h @@ -0,0 +1,401 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE41_H__ +#define __BLAKE2B_LOAD_SSE41_H__ + +#define LOAD_MSG_0_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_0_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_1_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_1_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_1_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_1_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#define LOAD_MSG_2_1(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m5, 8); \ +b1 = _mm_unpackhi_epi64(m2, m7); \ +} while(0) + + +#define LOAD_MSG_2_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m0); \ +b1 = _mm_blend_epi16(m1, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_2_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m5, m1, 0xF0); \ +b1 = _mm_unpackhi_epi64(m3, m4); \ +} while(0) + + +#define LOAD_MSG_2_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m3); \ +b1 = _mm_alignr_epi8(m2, m0, 8); \ +} while(0) + + +#define LOAD_MSG_3_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_unpackhi_epi64(m6, m5); \ +} while(0) + + +#define LOAD_MSG_3_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m0); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_3_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m2, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_3_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m5); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_4_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m2); \ +b1 = _mm_unpacklo_epi64(m1, m5); \ +} while(0) + + +#define LOAD_MSG_4_2(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m0, m3, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m7, m5, 0xF0); \ +b1 = _mm_blend_epi16(m3, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m0, 8); \ +b1 = _mm_blend_epi16(m4, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_5_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m3); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_5_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m5); \ +b1 = _mm_unpackhi_epi64(m5, m1); \ +} while(0) + + +#define LOAD_MSG_5_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m2, m3, 0xF0); \ +b1 = _mm_unpackhi_epi64(m7, m0); \ +} while(0) + + +#define LOAD_MSG_5_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m2); \ +b1 = _mm_blend_epi16(m7, m4, 0xF0); \ +} while(0) + + +#define LOAD_MSG_6_1(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m6, m0, 0xF0); \ +b1 = _mm_unpacklo_epi64(m7, m2); \ +} while(0) + + +#define LOAD_MSG_6_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_alignr_epi8(m5, m6, 8); \ +} while(0) + + +#define LOAD_MSG_6_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m3); \ +b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ +} while(0) + + +#define LOAD_MSG_6_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_blend_epi16(m1, m5, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m3); \ +b1 = _mm_blend_epi16(m6, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_2(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpackhi_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_7_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_unpacklo_epi64(m4, m1); \ +} while(0) + + +#define LOAD_MSG_7_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m2); \ +b1 = _mm_unpacklo_epi64(m3, m5); \ +} while(0) + + +#define LOAD_MSG_8_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m7); \ +b1 = _mm_alignr_epi8(m0, m5, 8); \ +} while(0) + + +#define LOAD_MSG_8_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_alignr_epi8(m4, m1, 8); \ +} while(0) + + +#define LOAD_MSG_8_3(b0, b1) \ +do \ +{ \ +b0 = m6; \ +b1 = _mm_alignr_epi8(m5, m0, 8); \ +} while(0) + + +#define LOAD_MSG_8_4(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m3, 0xF0); \ +b1 = m2; \ +} while(0) + + +#define LOAD_MSG_9_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_unpackhi_epi64(m3, m0); \ +} while(0) + + +#define LOAD_MSG_9_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m2); \ +b1 = _mm_blend_epi16(m3, m2, 0xF0); \ +} while(0) + + +#define LOAD_MSG_9_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_unpackhi_epi64(m1, m6); \ +} while(0) + + +#define LOAD_MSG_9_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpacklo_epi64(m6, m0); \ +} while(0) + + +#define LOAD_MSG_10_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_10_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_11_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_11_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_11_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_11_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#endif diff --git a/src/blake/blake2b-round.h b/src/blake/blake2b-round.h new file mode 100644 index 0000000..5cafe79 --- /dev/null +++ b/src/blake/blake2b-round.h @@ -0,0 +1,170 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +#pragma once +#ifndef __BLAKE2B_ROUND_H__ +#define __BLAKE2B_ROUND_H__ + +#define LOAD(p) _mm_load_si128( (const __m128i *)(p) ) +#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-c) )) +#endif +#else +/* ... */ +#endif + + + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE41) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif + +#define BLAKE2_ROUND(row1l,row1h,row2l,row2h,row3l,row3h,row4l,row4h) \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + \ + DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + \ + UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ No newline at end of file diff --git a/src/blake/blake2b.cpp b/src/blake/blake2b.cpp new file mode 100644 index 0000000..5e27442 --- /dev/null +++ b/src/blake/blake2b.cpp @@ -0,0 +1,339 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +#include "blake2-config.h" + +#include +#if defined(HAVE_SSSE3) +#include +#endif +#if defined(HAVE_SSE41) +#include +#endif +#if defined(HAVE_AVX) +#include +#endif +#if defined(HAVE_XOP) +#include +#endif + +#include "blake2b-round.h" + +ALIGN( 64 ) static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + //blake2b_init0( S ); + const uint8_t * v = ( const uint8_t * )( blake2b_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + return 0; +} + +/* Some sort of default parameter block initialization, for sequential blake2b */ +int blake2b_init( blake2b_state *S, const uint8_t outlen ) +{ + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + const blake2b_param P = + { + outlen, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + return blake2b_init_param( S, &P ); +} + +int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; + + const blake2b_param P = + { + outlen, + keylen, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + + if( blake2b_init_param( S, &P ) < 0 ) + return 0; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + __m128i row1l, row1h; + __m128i row2l, row2h; + __m128i row3l, row3h; + __m128i row4l, row4h; + __m128i b0, b1; + __m128i t0, t1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); + const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); + const __m128i m4 = LOADU( block + 64 ); + const __m128i m5 = LOADU( block + 80 ); + const __m128i m6 = LOADU( block + 96 ); + const __m128i m7 = LOADU( block + 112 ); +#else + const uint64_t m0 = ( ( uint64_t * )block )[ 0]; + const uint64_t m1 = ( ( uint64_t * )block )[ 1]; + const uint64_t m2 = ( ( uint64_t * )block )[ 2]; + const uint64_t m3 = ( ( uint64_t * )block )[ 3]; + const uint64_t m4 = ( ( uint64_t * )block )[ 4]; + const uint64_t m5 = ( ( uint64_t * )block )[ 5]; + const uint64_t m6 = ( ( uint64_t * )block )[ 6]; + const uint64_t m7 = ( ( uint64_t * )block )[ 7]; + const uint64_t m8 = ( ( uint64_t * )block )[ 8]; + const uint64_t m9 = ( ( uint64_t * )block )[ 9]; + const uint64_t m10 = ( ( uint64_t * )block )[10]; + const uint64_t m11 = ( ( uint64_t * )block )[11]; + const uint64_t m12 = ( ( uint64_t * )block )[12]; + const uint64_t m13 = ( ( uint64_t * )block )[13]; + const uint64_t m14 = ( ( uint64_t * )block )[14]; + const uint64_t m15 = ( ( uint64_t * )block )[15]; +#endif + row1l = LOADU( &S->h[0] ); + row1h = LOADU( &S->h[2] ); + row2l = LOADU( &S->h[4] ); + row2h = LOADU( &S->h[6] ); + row3l = LOADU( &blake2b_IV[0] ); + row3h = LOADU( &blake2b_IV[2] ); + row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), _mm_set_epi32(0,0,0,S->counter) ); + row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), _mm_set_epi32(0,0,0L-S->lastblock,0L-S->lastblock) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + row1l = _mm_xor_si128( row3l, row1l ); + row1h = _mm_xor_si128( row3h, row1h ); + STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); + STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); + row2l = _mm_xor_si128( row4l, row2l ); + row2h = _mm_xor_si128( row4h, row2h ); + STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); + STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); + return 0; +} + + +int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); // Fill buffer + in += fill; + inlen -= fill; + S->counter += BLAKE2B_BLOCKBYTES; + blake2b_compress( S, S->buf ); // Compress + S->buflen = 0; + } + else // inlen <= fill + { + memcpy( S->buf + left, in, inlen ); + S->buflen += (uint8_t)inlen; // not enough to compress + in += inlen; + inlen = 0; + } + } + + return 0; +} + + +int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +{ + if( outlen > BLAKE2B_OUTBYTES ) + return -1; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + S->counter += BLAKE2B_BLOCKBYTES; + blake2b_compress( S, S->buf ); + S->buflen -= BLAKE2B_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); + } + + S->counter += S->buflen; + S->lastblock = 1; + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + memcpy( out, &S->h[0], outlen ); + S->lastblock = 0; + return 0; +} + + +int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key ) keylen = 0; + + if( keylen ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + +int blake2b_long(uint8_t *out, const void *in, const uint32_t outlen, const uint64_t inlen) +{ + blake2b_state blake_state; + if (outlen <= BLAKE2B_OUTBYTES) + { + blake2b_init(&blake_state, (uint8_t)outlen); + blake2b_update(&blake_state, (const uint8_t*)&outlen, sizeof(uint32_t)); + blake2b_update(&blake_state, (const uint8_t *)in, inlen); + blake2b_final(&blake_state, out, (uint8_t)outlen); + } + else + { + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + uint8_t in_buffer[BLAKE2B_OUTBYTES]; + blake2b_init(&blake_state, BLAKE2B_OUTBYTES); + blake2b_update(&blake_state, (const uint8_t*)&outlen, sizeof(uint32_t)); + blake2b_update(&blake_state, (const uint8_t *)in, inlen); + blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + uint32_t toproduce = outlen - BLAKE2B_OUTBYTES / 2; + while (toproduce > BLAKE2B_OUTBYTES) + { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + blake2b(out_buffer, in_buffer, NULL, BLAKE2B_OUTBYTES, BLAKE2B_OUTBYTES, 0); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + blake2b(out_buffer, in_buffer, NULL, (uint8_t)toproduce, BLAKE2B_OUTBYTES, 0); + memcpy(out, out_buffer, toproduce); + + } + return 0; +} \ No newline at end of file diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..2a6341c --- /dev/null +++ b/src/common.h @@ -0,0 +1,69 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_CRYPTO_COMMON_H +#define BITCOIN_CRYPTO_COMMON_H + +#if defined(HAVE_CONFIG_H) +#include "bitcoin-config.h" +#endif + +#include +#include + +#if defined(_MSC_VER) || defined(__APPLE__) || defined(__ANDROID__) +#include "compat/endian.h" +#endif + +uint16_t static inline ReadLE16(const unsigned char* ptr) +{ + return le16toh(*((uint16_t*)ptr)); +} + +uint32_t static inline ReadLE32(const unsigned char* ptr) +{ + return le32toh(*((uint32_t*)ptr)); +} + +uint64_t static inline ReadLE64(const unsigned char* ptr) +{ + return le64toh(*((uint64_t*)ptr)); +} + +void static inline WriteLE16(unsigned char* ptr, uint16_t x) +{ + *((uint16_t*)ptr) = htole16(x); +} + +void static inline WriteLE32(unsigned char* ptr, uint32_t x) +{ + *((uint32_t*)ptr) = htole32(x); +} + +void static inline WriteLE64(unsigned char* ptr, uint64_t x) +{ + *((uint64_t*)ptr) = htole64(x); +} + +uint32_t static inline ReadBE32(const unsigned char* ptr) +{ + return be32toh(*((uint32_t*)ptr)); +} + +uint64_t static inline ReadBE64(const unsigned char* ptr) +{ + return be64toh(*((uint64_t*)ptr)); +} + +void static inline WriteBE32(unsigned char* ptr, uint32_t x) +{ + *((uint32_t*)ptr) = htobe32(x); +} + +void static inline WriteBE64(unsigned char* ptr, uint64_t x) +{ + *((uint64_t*)ptr) = htobe64(x); +} + +#endif // BITCOIN_CRYPTO_COMMON_H \ No newline at end of file diff --git a/src/compat/byteswap.h b/src/compat/byteswap.h new file mode 100644 index 0000000..9f97f90 --- /dev/null +++ b/src/compat/byteswap.h @@ -0,0 +1,47 @@ +// Copyright (c) 2014 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_COMPAT_BYTESWAP_H +#define BITCOIN_COMPAT_BYTESWAP_H + +#if defined(HAVE_CONFIG_H) +#include "config/bitcoin-config.h" +#endif + +#include + +#if defined(HAVE_BYTESWAP_H) +#include +#endif + +#if HAVE_DECL_BSWAP_16 == 0 +inline uint16_t bswap_16(uint16_t x) +{ + return (x >> 8) | ((x & 0x00ff) << 8); +} +#endif // HAVE_DECL_BSWAP16 + +#if HAVE_DECL_BSWAP_32 == 0 +inline uint32_t bswap_32(uint32_t x) +{ + return (((x & 0xff000000U) >> 24) | ((x & 0x00ff0000U) >> 8) | + ((x & 0x0000ff00U) << 8) | ((x & 0x000000ffU) << 24)); +} +#endif // HAVE_DECL_BSWAP32 + +#if HAVE_DECL_BSWAP_64 == 0 +inline uint64_t bswap_64(uint64_t x) +{ + return (((x & 0xff00000000000000ull) >> 56) + | ((x & 0x00ff000000000000ull) >> 40) + | ((x & 0x0000ff0000000000ull) >> 24) + | ((x & 0x000000ff00000000ull) >> 8) + | ((x & 0x00000000ff000000ull) << 8) + | ((x & 0x0000000000ff0000ull) << 24) + | ((x & 0x000000000000ff00ull) << 40) + | ((x & 0x00000000000000ffull) << 56)); +} +#endif // HAVE_DECL_BSWAP64 + +#endif // BITCOIN_COMPAT_BYTESWAP_H \ No newline at end of file diff --git a/src/compat/endian.h b/src/compat/endian.h new file mode 100644 index 0000000..2422c17 --- /dev/null +++ b/src/compat/endian.h @@ -0,0 +1,196 @@ +// Copyright (c) 2014 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_COMPAT_ENDIAN_H +#define BITCOIN_COMPAT_ENDIAN_H + +#if defined(HAVE_CONFIG_H) +#include "config/bitcoin-config.h" +#endif + +#include + +#include "compat/byteswap.h" + +#if defined(HAVE_ENDIAN_H) +#include +#elif defined(HAVE_SYS_ENDIAN_H) +#include +#endif + +#if defined(WORDS_BIGENDIAN) + +#if HAVE_DECL_HTOBE16 == 0 +inline uint16_t htobe16(uint16_t host_16bits) +{ + return host_16bits; +} +#endif // HAVE_DECL_HTOBE16 + +#if HAVE_DECL_HTOLE16 == 0 +inline uint16_t htole16(uint16_t host_16bits) +{ + return bswap_16(host_16bits); +} +#endif // HAVE_DECL_HTOLE16 + +#if HAVE_DECL_BE16TOH == 0 +inline uint16_t be16toh(uint16_t big_endian_16bits) +{ + return big_endian_16bits; +} +#endif // HAVE_DECL_BE16TOH + +#if HAVE_DECL_LE16TOH == 0 +inline uint16_t le16toh(uint16_t little_endian_16bits) +{ + return bswap_16(little_endian_16bits); +} +#endif // HAVE_DECL_LE16TOH + +#if HAVE_DECL_HTOBE32 == 0 +inline uint32_t htobe32(uint32_t host_32bits) +{ + return host_32bits; +} +#endif // HAVE_DECL_HTOBE32 + +#if HAVE_DECL_HTOLE32 == 0 +inline uint32_t htole32(uint32_t host_32bits) +{ + return bswap_32(host_32bits); +} +#endif // HAVE_DECL_HTOLE32 + +#if HAVE_DECL_BE32TOH == 0 +inline uint32_t be32toh(uint32_t big_endian_32bits) +{ + return big_endian_32bits; +} +#endif // HAVE_DECL_BE32TOH + +#if HAVE_DECL_LE32TOH == 0 +inline uint32_t le32toh(uint32_t little_endian_32bits) +{ + return bswap_32(little_endian_32bits); +} +#endif // HAVE_DECL_LE32TOH + +#if HAVE_DECL_HTOBE64 == 0 +inline uint64_t htobe64(uint64_t host_64bits) +{ + return host_64bits; +} +#endif // HAVE_DECL_HTOBE64 + +#if HAVE_DECL_HTOLE64 == 0 +inline uint64_t htole64(uint64_t host_64bits) +{ + return bswap_64(host_64bits); +} +#endif // HAVE_DECL_HTOLE64 + +#if HAVE_DECL_BE64TOH == 0 +inline uint64_t be64toh(uint64_t big_endian_64bits) +{ + return big_endian_64bits; +} +#endif // HAVE_DECL_BE64TOH + +#if HAVE_DECL_LE64TOH == 0 +inline uint64_t le64toh(uint64_t little_endian_64bits) +{ + return bswap_64(little_endian_64bits); +} +#endif // HAVE_DECL_LE64TOH + +#else // WORDS_BIGENDIAN + +#if HAVE_DECL_HTOBE16 == 0 +inline uint16_t htobe16(uint16_t host_16bits) +{ + return bswap_16(host_16bits); +} +#endif // HAVE_DECL_HTOBE16 + +#if HAVE_DECL_HTOLE16 == 0 +inline uint16_t htole16(uint16_t host_16bits) +{ + return host_16bits; +} +#endif // HAVE_DECL_HTOLE16 + +#if HAVE_DECL_BE16TOH == 0 +inline uint16_t be16toh(uint16_t big_endian_16bits) +{ + return bswap_16(big_endian_16bits); +} +#endif // HAVE_DECL_BE16TOH + +#if HAVE_DECL_LE16TOH == 0 +inline uint16_t le16toh(uint16_t little_endian_16bits) +{ + return little_endian_16bits; +} +#endif // HAVE_DECL_LE16TOH + +#if HAVE_DECL_HTOBE32 == 0 +inline uint32_t htobe32(uint32_t host_32bits) +{ + return bswap_32(host_32bits); +} +#endif // HAVE_DECL_HTOBE32 + +#if HAVE_DECL_HTOLE32 == 0 +inline uint32_t htole32(uint32_t host_32bits) +{ + return host_32bits; +} +#endif // HAVE_DECL_HTOLE32 + +#if HAVE_DECL_BE32TOH == 0 +inline uint32_t be32toh(uint32_t big_endian_32bits) +{ + return bswap_32(big_endian_32bits); +} +#endif // HAVE_DECL_BE32TOH + +#if HAVE_DECL_LE32TOH == 0 +inline uint32_t le32toh(uint32_t little_endian_32bits) +{ + return little_endian_32bits; +} +#endif // HAVE_DECL_LE32TOH + +#if HAVE_DECL_HTOBE64 == 0 +inline uint64_t htobe64(uint64_t host_64bits) +{ + return bswap_64(host_64bits); +} +#endif // HAVE_DECL_HTOBE64 + +#if HAVE_DECL_HTOLE64 == 0 +inline uint64_t htole64(uint64_t host_64bits) +{ + return host_64bits; +} +#endif // HAVE_DECL_HTOLE64 + +#if HAVE_DECL_BE64TOH == 0 +inline uint64_t be64toh(uint64_t big_endian_64bits) +{ + return bswap_64(big_endian_64bits); +} +#endif // HAVE_DECL_BE64TOH + +#if HAVE_DECL_LE64TOH == 0 +inline uint64_t le64toh(uint64_t little_endian_64bits) +{ + return little_endian_64bits; +} +#endif // HAVE_DECL_LE64TOH + +#endif // WORDS_BIGENDIAN + +#endif // BITCOIN_COMPAT_ENDIAN_H \ No newline at end of file diff --git a/src/equihashR.h b/src/equihashR.h new file mode 100644 index 0000000..3e68344 --- /dev/null +++ b/src/equihashR.h @@ -0,0 +1,246 @@ +// Copyright (c) 2019 The Beam Team + +// Based on Reference Implementation of the Equihash Proof-of-Work algorithm. +// Copyright (c) 2016 Jack Grigg +// Copyright (c) 2016 The Zcash developers + +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +// Resources: +// Alex Biryukov and Dmitry Khovratovich +// Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem +// NDSS ’16, 21-24 February 2016, San Diego, CA, USA +// https://www.internetsociety.org/sites/default/files/blogs-media/equihash-asymmetric-proof-of-work-based-generalized-birthday-problem.pdf + +#ifndef EQUIHASHR_H +#define EQUIHASHR_H + +#include "blake/blake2.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "powScheme.h" + +typedef blake2b_state eh_HashState; +typedef uint32_t eh_index; +typedef uint8_t eh_trunc; + +void ExpandArray(const unsigned char* in, size_t in_len, + unsigned char* out, size_t out_len, + size_t bit_len, size_t byte_pad=0); +void CompressArray(const unsigned char* in, size_t in_len, + unsigned char* out, size_t out_len, + size_t bit_len, size_t byte_pad=0); + +eh_index ArrayToEhIndex(const unsigned char* array); +eh_trunc TruncateIndex(const eh_index i, const unsigned int ilen); + +std::vector GetIndicesFromMinimal(std::vector minimal, + size_t cBitLen); +std::vector GetMinimalFromIndices(std::vector indices, + size_t cBitLen); + +template +class StepRow +{ + template + friend class StepRow; + friend class CompareSR; + +protected: + unsigned char hash[WIDTH]; + +public: + StepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen); + ~StepRow() { } + + template + StepRow(const StepRow& a); + + bool IsZero(size_t len); + + template + friend bool HasCollision(StepRow& a, StepRow& b, size_t l); +}; + +class CompareSR +{ +private: + size_t len; + +public: + CompareSR(size_t l) : len {l} { } + + template + inline bool operator()(const StepRow& a, const StepRow& b) { return memcmp(a.hash, b.hash, len) < 0; } +}; + +template +bool HasCollision(StepRow& a, StepRow& b, size_t l); + +template +class FullStepRow : public StepRow +{ + template + friend class FullStepRow; + + using StepRow::hash; + +public: + FullStepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen, eh_index i); + ~FullStepRow() { } + + FullStepRow(const FullStepRow& a) : StepRow {a} { } + template + FullStepRow(const FullStepRow& a, const FullStepRow& b, size_t len, size_t lenIndices, size_t trim); + FullStepRow& operator=(const FullStepRow& a); + + inline bool IndicesBefore(const FullStepRow& a, size_t len, size_t lenIndices) const { return memcmp(hash+len, a.hash+len, lenIndices) < 0; } + std::vector GetIndices(size_t len, size_t lenIndices, + size_t cBitLen) const; + + template + friend bool DistinctIndices(const FullStepRow& a, const FullStepRow& b, + size_t len, size_t lenIndices); + template + friend bool IsValidBranch(const FullStepRow& a, const size_t len, const unsigned int ilen, const eh_trunc t); +}; + +template +class TruncatedStepRow : public StepRow +{ + template + friend class TruncatedStepRow; + + using StepRow::hash; + +public: + TruncatedStepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen, + eh_index i, unsigned int ilen); + ~TruncatedStepRow() { } + + TruncatedStepRow(const TruncatedStepRow& a) : StepRow {a} { } + template + TruncatedStepRow(const TruncatedStepRow& a, const TruncatedStepRow& b, size_t len, size_t lenIndices, int trim); + TruncatedStepRow& operator=(const TruncatedStepRow& a); + + inline bool IndicesBefore(const TruncatedStepRow& a, size_t len, size_t lenIndices) const { return memcmp(hash+len, a.hash+len, lenIndices) < 0; } + std::shared_ptr GetTruncatedIndices(size_t len, size_t lenIndices) const; +}; + + + +inline constexpr const size_t max(const size_t A, const size_t B) { return A > B ? A : B; } + +inline constexpr size_t beamhash_solution_size(unsigned int N, unsigned int K) { + return (1 << K)*(N/(K+1)+1)/8; +} + +constexpr uint8_t GetSizeInBytes(size_t N) +{ + return static_cast((N + 7) / 8); +} + + + +template +bool DistinctIndices(const FullStepRow& a, const FullStepRow& b, size_t len, size_t lenIndices) +{ + for(size_t i = 0; i < lenIndices; i += sizeof(eh_index)) { + for(size_t j = 0; j < lenIndices; j += sizeof(eh_index)) { + if (memcmp(a.hash+len+i, b.hash+len+j, sizeof(eh_index)) == 0) { + return false; + } + } + } + return true; +} + +template +bool IsProbablyDuplicate(std::shared_ptr indices, size_t lenIndices) +{ + bool checked_index[MAX_INDICES] = {false}; + size_t count_checked = 0; + for (size_t z = 0; z < lenIndices; z++) { + // Skip over indices we have already paired + if (!checked_index[z]) { + for (size_t y = z+1; y < lenIndices; y++) { + if (!checked_index[y] && indices.get()[z] == indices.get()[y]) { + // Pair found + checked_index[y] = true; + count_checked += 2; + break; + } + } + } + } + return count_checked == lenIndices; +} + +template +bool IsValidBranch(const FullStepRow& a, const size_t len, const unsigned int ilen, const eh_trunc t) +{ + return TruncateIndex(ArrayToEhIndex(a.hash+len), ilen) == t; +} + + + +template +class EquihashR : public PoWScheme +{ + +public: + enum : size_t { IndicesPerHashOutput=512/N }; + enum : size_t { HashOutput = IndicesPerHashOutput * GetSizeInBytes(N) }; + enum : size_t { CollisionBitLength=N/(K+1) }; + enum : size_t { CollisionByteLength=(CollisionBitLength+7)/8 }; + enum : size_t { HashLength=(K+1)*CollisionByteLength }; + enum : size_t { FullWidth=2*CollisionByteLength+sizeof(eh_index)*(1 << (K-1)) }; + enum : size_t { FinalFullWidth=2*CollisionByteLength+sizeof(eh_index)*(1 << (K)) }; + enum : size_t { TruncatedWidth=max(HashLength+sizeof(eh_trunc), 2*CollisionByteLength+sizeof(eh_trunc)*(1 << (K-1))) }; + enum : size_t { FinalTruncatedWidth=max(HashLength+sizeof(eh_trunc), 2*CollisionByteLength+sizeof(eh_trunc)*(1 << (K))) }; + enum : size_t { SolutionWidth=(1 << K)*(CollisionBitLength+1)/8 }; + + EquihashR() { } + + int InitialiseState(eh_HashState& base_state); + bool IsValidSolution(const eh_HashState& base_state, std::vector soln); + bool OptimisedSolve(const eh_HashState& base_state, + const std::function&)> validBlock, + const std::function cancelled); +}; + +static EquihashR<150,5,0> BeamHashI; +static EquihashR<150,5,3> BeamHashII; + + +#define EhRInitialiseState(n, k, r, base_state) \ + if (n == 150 && k == 5 && r == 0) { \ + BeamHashI.InitialiseState(base_state); \ + } else if (n == 150 && k == 5 && r == 3) { \ + BeamHashII.InitialiseState(base_state); \ + } else { \ + throw std::invalid_argument("Unsupported Equihash parameters"); \ + } + +#define EhRIsValidSolution(n, k, r, base_state, soln, ret) \ + if (n == 150 && k == 5 && r == 0) { \ + ret = BeamHashI.IsValidSolution(base_state, soln); \ + } else if (n == 150 && k == 5 && r == 3) { \ + ret = BeamHashII.IsValidSolution(base_state, soln); \ + } else { \ + throw std::invalid_argument("Unsupported Equihash parameters"); \ + } + + +#endif \ No newline at end of file diff --git a/src/equihashR_imp.cpp b/src/equihashR_imp.cpp new file mode 100644 index 0000000..981c013 --- /dev/null +++ b/src/equihashR_imp.cpp @@ -0,0 +1,698 @@ +// Copyright (c) 2019 The Beam Team + +// Based on Reference Implementation of the Equihash Proof-of-Work algorithm. +// Copyright (c) 2016 Jack Grigg +// Copyright (c) 2016 The Zcash developers + +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +// Resources: +// Alex Biryukov and Dmitry Khovratovich +// Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem +// NDSS ’16, 21-24 February 2016, San Diego, CA, USA +// https://www.internetsociety.org/sites/default/files/blogs-media/equihash-asymmetric-proof-of-work-based-generalized-birthday-problem.pdf + +#include "compat/endian.h" +#include "equihashR.h" +//#include "util.h" + +#include +#include +#include +#include + +SolverCancelledException solver_cancelled; + +namespace +{ + constexpr void ZeroizeUnusedBits(size_t N, size_t R, unsigned char* hash, size_t hLen) + { + uint8_t rem = N % 8; + const size_t step = GetSizeInBytes(N); + + if (rem) + { + // clear lowest 8-rem bits + for (size_t i = step - 1; i < hLen; i += step) { + uint8_t b = 0xff << (8-rem); + hash[i] &= b; + } + } + + if (R) { + for (size_t i = 0; i < hLen; i += step) { + uint8_t b = 0xff >> (2*R); + hash[i] &= b; + } + } + } +} + +template +int EquihashR::InitialiseState(eh_HashState& base_state) +{ + uint32_t le_N = htole32(N); + uint32_t le_K = htole32(K); + + unsigned char personalization[BLAKE2B_PERSONALBYTES] = {}; + memcpy(personalization, "Beam-PoW", 8); + memcpy(personalization+8, &le_N, 4); + memcpy(personalization+12, &le_K, 4); + + const uint8_t outlen = (512 / N) * GetSizeInBytes(N); + + //static_assert(!((!outlen) || (outlen > BLAKE2B_OUTBYTES))); + + blake2b_param param = {0}; + param.digest_length = outlen; + param.fanout = 1; + param.depth = 1; + + memcpy(¶m.personal, personalization, BLAKE2B_PERSONALBYTES); + + return blake2b_init_param(&base_state, ¶m); +} + +void GenerateHash(const eh_HashState& base_state, eh_index g, + unsigned char* hash, size_t hLen, size_t N, size_t R ) +{ + + + uint32_t myHash[16] = {0}; + uint32_t startIndex = g & 0xFFFFFFF0; + + for (uint32_t g2 = startIndex; g2 <= g; g2++) { + uint32_t tmpHash[16] = {0}; + + eh_HashState state; + state = base_state; + eh_index lei = htole32(g2); + blake2b_update(&state, (const unsigned char*) &lei, + sizeof(eh_index)); + + blake2b_final(&state, (unsigned char*)&tmpHash[0], static_cast(hLen)); + + for (uint32_t idx = 0; idx < 16; idx++) myHash[idx] += tmpHash[idx]; + } + + memcpy(hash, &myHash[0], hLen); + ZeroizeUnusedBits(N, R, hash, hLen); +} + +void ExpandArray(const unsigned char* in, size_t in_len, + unsigned char* out, size_t out_len, + size_t bit_len, size_t byte_pad) +{ + assert(bit_len >= 8); + assert(8*sizeof(uint32_t) >= bit_len); + + size_t out_width { (bit_len+7)/8 + byte_pad }; + assert(out_len == 8*out_width*in_len/bit_len); + + uint32_t bit_len_mask { ((uint32_t)1 << bit_len) - 1 }; + + // The acc_bits least-significant bits of acc_value represent a bit sequence + // in big-endian order. + size_t acc_bits = 0; + uint32_t acc_value = 0; + + size_t j = 0; + for (size_t i = 0; i < in_len; i++) { + acc_value = (acc_value << 8) | in[i]; + acc_bits += 8; + + // When we have bit_len or more bits in the accumulator, write the next + // output element. + if (acc_bits >= bit_len) { + acc_bits -= bit_len; + for (size_t x = 0; x < byte_pad; x++) { + out[j+x] = 0; + } + for (size_t x = byte_pad; x < out_width; x++) { + out[j+x] = ( + // Big-endian + acc_value >> (acc_bits+(8*(out_width-x-1))) + ) & ( + // Apply bit_len_mask across byte boundaries + (bit_len_mask >> (8*(out_width-x-1))) & 0xFF + ); + } + j += out_width; + } + } +} + +void CompressArray(const unsigned char* in, size_t in_len, + unsigned char* out, size_t out_len, + size_t bit_len, size_t byte_pad) +{ + assert(bit_len >= 8); + assert(8*sizeof(uint32_t) >= bit_len); + + size_t in_width { (bit_len+7)/8 + byte_pad }; + assert(out_len == (bit_len*in_len/in_width + 7)/8); + + uint32_t bit_len_mask { ((uint32_t)1 << bit_len) - 1 }; + + // The acc_bits least-significant bits of acc_value represent a bit sequence + // in big-endian order. + size_t acc_bits = 0; + uint32_t acc_value = 0; + + size_t j = 0; + for (size_t i = 0; i < out_len; i++) { + // When we have fewer than 8 bits left in the accumulator, read the next + // input element. + if (acc_bits < 8) { + if (j < in_len) { + acc_value = acc_value << bit_len; + for (size_t x = byte_pad; x < in_width; x++) { + acc_value = acc_value | ( + ( + // Apply bit_len_mask across byte boundaries + in[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) & 0xFF) + ) << (8 * (in_width - x - 1))); // Big-endian + } + j += in_width; + acc_bits += bit_len; + } + else { + acc_value <<= 8 - acc_bits; + acc_bits += 8 - acc_bits;; + } + } + + acc_bits -= 8; + out[i] = (acc_value >> acc_bits) & 0xFF; + } +} + +// Big-endian so that lexicographic array comparison is equivalent to integer +// comparison +void EhIndexToArray(const eh_index i, unsigned char* array) +{ + //static_assert(sizeof(eh_index) == 4); + eh_index bei = htobe32(i); + memcpy(array, &bei, sizeof(eh_index)); +} + +// Big-endian so that lexicographic array comparison is equivalent to integer +// comparison +eh_index ArrayToEhIndex(const unsigned char* array) +{ + //static_assert(sizeof(eh_index) == 4); + eh_index bei; + memcpy(&bei, array, sizeof(eh_index)); + return be32toh(bei); +} + +eh_trunc TruncateIndex(const eh_index i, const unsigned int ilen) +{ + // Truncate to 8 bits + //static_assert(sizeof(eh_trunc) == 1); + return (i >> (ilen - 8)) & 0xff; +} + +eh_index UntruncateIndex(const eh_trunc t, const eh_index r, const unsigned int ilen) +{ + eh_index i{t}; + return (i << (ilen - 8)) | r; +} + +std::vector GetIndicesFromMinimal(std::vector minimal, + size_t cBitLen) +{ + assert(((cBitLen+1)+7)/8 <= sizeof(eh_index)); + size_t lenIndices { 8*sizeof(eh_index)*minimal.size()/(cBitLen+1) }; + size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 }; + std::vector array(lenIndices); + ExpandArray(minimal.data(), minimal.size(), + array.data(), lenIndices, cBitLen+1, bytePad); + std::vector ret; + for (size_t i = 0; i < lenIndices; i += sizeof(eh_index)) { + ret.push_back(ArrayToEhIndex(array.data()+i)); + } + return ret; +} + +std::vector GetMinimalFromIndices(std::vector indices, + size_t cBitLen) +{ + assert(((cBitLen+1)+7)/8 <= sizeof(eh_index)); + size_t lenIndices { indices.size()*sizeof(eh_index) }; + size_t minLen { (cBitLen+1)*lenIndices/(8*sizeof(eh_index)) }; + size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 }; + std::vector array(lenIndices); + for (size_t i = 0; i < indices.size(); i++) { + EhIndexToArray(indices[i], array.data()+(i*sizeof(eh_index))); + } + std::vector ret(minLen); + CompressArray(array.data(), lenIndices, + ret.data(), minLen, cBitLen+1, bytePad); + return ret; +} + +template +StepRow::StepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen) +{ + assert(hLen <= WIDTH); + ExpandArray(hashIn, hInLen, hash, hLen, cBitLen); +} + +template template +StepRow::StepRow(const StepRow& a) +{ + //static_assert(W <= WIDTH); + std::copy(a.hash, a.hash+W, hash); +} + +template +FullStepRow::FullStepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen, eh_index i) : + StepRow {hashIn, hInLen, hLen, cBitLen} +{ + EhIndexToArray(i, hash+hLen); +} + +template template +FullStepRow::FullStepRow(const FullStepRow& a, const FullStepRow& b, size_t len, size_t lenIndices, size_t trim) : + StepRow {a} +{ + assert(len+lenIndices <= W); + assert(len-trim+(2*lenIndices) <= WIDTH); + for (size_t i = trim; i < len; i++) + hash[i-trim] = a.hash[i] ^ b.hash[i]; + if (a.IndicesBefore(b, len, lenIndices)) { + std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim); + std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim+lenIndices); + } else { + std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim); + std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim+lenIndices); + } +} + +template +FullStepRow& FullStepRow::operator=(const FullStepRow& a) +{ + std::copy(a.hash, a.hash+WIDTH, hash); + return *this; +} + +template +bool StepRow::IsZero(size_t len) +{ + // This doesn't need to be constant time. + for (size_t i = 0; i < len; i++) { + if (hash[i] != 0) + return false; + } + return true; +} + +template +std::vector FullStepRow::GetIndices(size_t len, size_t lenIndices, + size_t cBitLen) const +{ + assert(((cBitLen+1)+7)/8 <= sizeof(eh_index)); + size_t minLen { (cBitLen+1)*lenIndices/(8*sizeof(eh_index)) }; + size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 }; + std::vector ret(minLen); + CompressArray(hash+len, lenIndices, ret.data(), minLen, cBitLen+1, bytePad); + return ret; +} + +template +bool HasCollision(StepRow& a, StepRow& b, size_t l) +{ + // This doesn't need to be constant time. + for (size_t j = 0; j < l; j++) { + if (a.hash[j] != b.hash[j]) + return false; + } + return true; +} + +template +TruncatedStepRow::TruncatedStepRow(const unsigned char* hashIn, size_t hInLen, + size_t hLen, size_t cBitLen, + eh_index i, unsigned int ilen) : + StepRow {hashIn, hInLen, hLen, cBitLen} +{ + hash[hLen] = TruncateIndex(i, ilen); +} + +template template +TruncatedStepRow::TruncatedStepRow(const TruncatedStepRow& a, const TruncatedStepRow& b, size_t len, size_t lenIndices, int trim) : + StepRow {a} +{ + assert(len+lenIndices <= W); + assert(len-trim+(2*lenIndices) <= WIDTH); + for (size_t i = static_cast(trim); i < len; i++) + hash[i-trim] = a.hash[i] ^ b.hash[i]; + if (a.IndicesBefore(b, len, lenIndices)) { + std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim); + std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim+lenIndices); + } else { + std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim); + std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim+lenIndices); + } +} + +template +TruncatedStepRow& TruncatedStepRow::operator=(const TruncatedStepRow& a) +{ + std::copy(a.hash, a.hash+WIDTH, hash); + return *this; +} + +template +std::shared_ptr TruncatedStepRow::GetTruncatedIndices(size_t len, size_t lenIndices) const +{ + std::shared_ptr p (new eh_trunc[lenIndices], std::default_delete()); + std::copy(hash+len, hash+len+lenIndices, p.get()); + return p; +} + +template +void CollideBranches(std::vector>& X, const size_t hlen, const size_t lenIndices, const unsigned int clen, const unsigned int ilen, const eh_trunc lt, const eh_trunc rt) +{ + size_t i = 0; + size_t posFree = 0; + assert(X.size() > 0); + std::vector> Xc; + while (i < X.size() - 1) { + // 2b) Find next set of unordered pairs with collisions on the next n/(k+1) bits + size_t j = 1; + while (i+j < X.size() && + HasCollision(X[i], X[i+j], clen)) { + j++; + } + + // 2c) Calculate tuples (X_i ^ X_j, (i, j)) + for (size_t l = 0; l < j - 1; l++) { + for (size_t m = l + 1; m < j; m++) { + if (DistinctIndices(X[i+l], X[i+m], hlen, lenIndices)) { + if (IsValidBranch(X[i+l], hlen, ilen, lt) && IsValidBranch(X[i+m], hlen, ilen, rt)) { + Xc.emplace_back(X[i+l], X[i+m], hlen, lenIndices, clen); + } else if (IsValidBranch(X[i+m], hlen, ilen, lt) && IsValidBranch(X[i+l], hlen, ilen, rt)) { + Xc.emplace_back(X[i+m], X[i+l], hlen, lenIndices, clen); + } + } + } + } + + // 2d) Store tuples on the table in-place if possible + while (posFree < i+j && Xc.size() > 0) { + X[posFree++] = Xc.back(); + Xc.pop_back(); + } + + i += j; + } + + // 2e) Handle edge case where final table entry has no collision + while (posFree < X.size() && Xc.size() > 0) { + X[posFree++] = Xc.back(); + Xc.pop_back(); + } + + if (Xc.size() > 0) { + // 2f) Add overflow to end of table + X.insert(X.end(), Xc.begin(), Xc.end()); + } else if (posFree < X.size()) { + // 2g) Remove empty space at the end + X.erase(X.begin()+posFree, X.end()); + X.shrink_to_fit(); + } +} + +template +bool EquihashR::OptimisedSolve(const eh_HashState& base_state, + const std::function&)> validBlock, + const std::function cancelled) +{ + eh_index init_size { 1U << (CollisionBitLength + 1 - R) }; + eh_index recreate_size { UntruncateIndex(1, 0, CollisionBitLength + 1) }; + + // First run the algorithm with truncated indices + + const eh_index soln_size { 1 << K }; + std::vector> partialSolns; + int invalidCount = 0; + { + + // 1) Generate first list + size_t hashLen = HashLength; + size_t lenIndices = sizeof(eh_trunc); + std::vector> Xt; + Xt.reserve(init_size); + unsigned char tmpHash[HashOutput]; + for (eh_index g = 0; Xt.size() < init_size; g++) { + GenerateHash(base_state, g, tmpHash, HashOutput, N, R); + for (eh_index i = 0; i < IndicesPerHashOutput && Xt.size() < init_size; i++) { + Xt.emplace_back(tmpHash+(i*GetSizeInBytes(N)), GetSizeInBytes(N), HashLength, CollisionBitLength, + static_cast(g*IndicesPerHashOutput)+i, static_cast(CollisionBitLength + 1)); + } + if (cancelled(ListGeneration)) throw solver_cancelled; + } + + // 3) Repeat step 2 until 2n/(k+1) bits remain + for (unsigned int r = 1; r < K && Xt.size() > 0; r++) { + // 2a) Sort the list + std::sort(Xt.begin(), Xt.end(), CompareSR(CollisionByteLength)); + if (cancelled(ListSorting)) throw solver_cancelled; + + size_t i = 0; + size_t posFree = 0; + std::vector> Xc; + while (i < Xt.size() - 1) { + // 2b) Find next set of unordered pairs with collisions on the next n/(k+1) bits + size_t j = 1; + while (i+j < Xt.size() && + HasCollision(Xt[i], Xt[i+j], CollisionByteLength)) { + j++; + } + + // 2c) Calculate tuples (X_i ^ X_j, (i, j)) + //bool checking_for_zero = (i == 0 && Xt[0].IsZero(hashLen)); + for (size_t l = 0; l < j - 1; l++) { + for (size_t m = l + 1; m < j; m++) { + // We truncated, so don't check for distinct indices here + TruncatedStepRow Xi {Xt[i+l], Xt[i+m], + hashLen, lenIndices, + CollisionByteLength}; + if (!(Xi.IsZero(hashLen-CollisionByteLength) && + IsProbablyDuplicate(Xi.GetTruncatedIndices(hashLen-CollisionByteLength, 2*lenIndices), + 2*lenIndices))) { + Xc.emplace_back(Xi); + } + } + } + + // 2d) Store tuples on the table in-place if possible + while (posFree < i+j && Xc.size() > 0) { + Xt[posFree++] = Xc.back(); + Xc.pop_back(); + } + + i += j; + if (cancelled(ListColliding)) throw solver_cancelled; + } + + // 2e) Handle edge case where final table entry has no collision + while (posFree < Xt.size() && Xc.size() > 0) { + Xt[posFree++] = Xc.back(); + Xc.pop_back(); + } + + if (Xc.size() > 0) { + // 2f) Add overflow to end of table + Xt.insert(Xt.end(), Xc.begin(), Xc.end()); + } else if (posFree < Xt.size()) { + // 2g) Remove empty space at the end + Xt.erase(Xt.begin()+posFree, Xt.end()); + Xt.shrink_to_fit(); + } + + hashLen -= CollisionByteLength; + lenIndices *= 2; + if (cancelled(RoundEnd)) throw solver_cancelled; + } + + // k+1) Find a collision on last 2n(k+1) bits + if (Xt.size() > 1) { + std::sort(Xt.begin(), Xt.end(), CompareSR(hashLen)); + if (cancelled(FinalSorting)) throw solver_cancelled; + size_t i = 0; + while (i < Xt.size() - 1) { + size_t j = 1; + while (i+j < Xt.size() && + HasCollision(Xt[i], Xt[i+j], hashLen)) { + j++; + } + + for (size_t l = 0; l < j - 1; l++) { + for (size_t m = l + 1; m < j; m++) { + TruncatedStepRow res(Xt[i+l], Xt[i+m], + hashLen, lenIndices, 0); + auto soln = res.GetTruncatedIndices(hashLen, 2*lenIndices); + if (!IsProbablyDuplicate(soln, 2*lenIndices)) { + partialSolns.push_back(soln); + } + } + } + + i += j; + if (cancelled(FinalColliding)) throw solver_cancelled; + } + } + + } // Ensure Xt goes out of scope and is destroyed + + + // Now for each solution run the algorithm again to recreate the indices + for (std::shared_ptr partialSoln : partialSolns) { + std::set> solns; + size_t hashLen; + size_t lenIndices; + unsigned char tmpHash[HashOutput]; + std::vector>>> X; + X.reserve(K+1); + + // 3) Repeat steps 1 and 2 for each partial index + for (eh_index i = 0; i < soln_size; i++) { + // 1) Generate first list of possibilities + std::vector> icv; + icv.reserve(recreate_size); + for (eh_index j = 0; j < recreate_size; j++) { + eh_index newIndex { UntruncateIndex(partialSoln.get()[i], j, CollisionBitLength + 1) }; + if (j == 0 || newIndex % IndicesPerHashOutput == 0) { + GenerateHash(base_state, newIndex/IndicesPerHashOutput, + tmpHash, HashOutput, N, R); + } + icv.emplace_back(tmpHash+((newIndex % IndicesPerHashOutput) * GetSizeInBytes(N)), + GetSizeInBytes(N), HashLength, CollisionBitLength, newIndex); + if (cancelled(PartialGeneration)) throw solver_cancelled; + } + boost::optional>> ic = icv; + + // 2a) For each pair of lists: + hashLen = HashLength; + lenIndices = sizeof(eh_index); + size_t rti = i; + for (size_t r = 0; r <= K; r++) { + // 2b) Until we are at the top of a subtree: + if (r < X.size()) { + if (X[r]) { + // 2c) Merge the lists + ic->reserve(ic->size() + X[r]->size()); + ic->insert(ic->end(), X[r]->begin(), X[r]->end()); + std::sort(ic->begin(), ic->end(), CompareSR(hashLen)); + if (cancelled(PartialSorting)) throw solver_cancelled; + size_t lti = rti-(static_cast(1)<size() == 0) + goto invalidsolution; + + X[r] = boost::none; + hashLen -= CollisionByteLength; + lenIndices *= 2; + rti = lti; + } else { + X[r] = *ic; + break; + } + } else { + X.push_back(ic); + break; + } + if (cancelled(PartialSubtreeEnd)) throw solver_cancelled; + } + if (cancelled(PartialIndexEnd)) throw solver_cancelled; + } + + // We are at the top of the tree + assert(X.size() == K+1); + for (FullStepRow row : *X[K]) { + auto soln = row.GetIndices(hashLen, lenIndices, CollisionBitLength); + assert(soln.size() == beamhash_solution_size(N, K)); + solns.insert(soln); + } + for (auto soln : solns) { + if (validBlock(soln)) + return true; + } + if (cancelled(PartialEnd)) throw solver_cancelled; + continue; + +invalidsolution: + invalidCount++; + } + + return false; +} + +template +bool EquihashR::IsValidSolution(const eh_HashState& base_state, std::vector soln) +{ + if (soln.size() != SolutionWidth) { + return false; + } + + std::vector> X; + X.reserve(1 << K); + unsigned char tmpHash[HashOutput]; + for (eh_index i : GetIndicesFromMinimal(soln, CollisionBitLength)) { + if (i >= (1U << (CollisionBitLength + 1 - R))) { + return false; + } + GenerateHash(base_state, i/IndicesPerHashOutput, tmpHash, HashOutput, N, R); + X.emplace_back(tmpHash+((i % IndicesPerHashOutput) * GetSizeInBytes(N)), + GetSizeInBytes(N), HashLength, CollisionBitLength, i); + } + + size_t hashLen = HashLength; + size_t lenIndices = sizeof(eh_index); + while (X.size() > 1) { + std::vector> Xc; + for (size_t i = 0; i < X.size(); i += 2) { + if (!HasCollision(X[i], X[i+1], CollisionByteLength)) { + return false; + } + if (X[i+1].IndicesBefore(X[i], hashLen, lenIndices)) { + return false; + } + if (!DistinctIndices(X[i], X[i+1], hashLen, lenIndices)) { + return false; + } + Xc.emplace_back(X[i], X[i+1], hashLen, lenIndices, CollisionByteLength); + } + X = Xc; + hashLen -= CollisionByteLength; + lenIndices *= 2; + } + + assert(X.size() == 1); + return X[0].IsZero(hashLen); +} + +// Explicit instantiations for BeamHashI +template int EquihashR<150,5,0>::InitialiseState(eh_HashState& base_state); +template bool EquihashR<150,5,0>::IsValidSolution(const eh_HashState& base_state, std::vector soln); +template bool EquihashR<150,5,0>::OptimisedSolve(const eh_HashState& base_state, + const std::function&)> validBlock, + const std::function cancelled); + +// Explicit instantiations for BeamHashII +template int EquihashR<150,5,3>::InitialiseState(eh_HashState& base_state); +template bool EquihashR<150,5,3>::IsValidSolution(const eh_HashState& base_state, std::vector soln); +template bool EquihashR<150,5,3>::OptimisedSolve(const eh_HashState& base_state, + const std::function&)> validBlock, + const std::function cancelled); diff --git a/src/powScheme.h b/src/powScheme.h new file mode 100644 index 0000000..0164e38 --- /dev/null +++ b/src/powScheme.h @@ -0,0 +1,42 @@ +#include + +#ifndef POWSCHEME_H +#define POWSCHEME_H + +#include "blake/blake2.h" + + +enum SolverCancelCheck +{ + ListGeneration, + ListSorting, + ListColliding, + RoundEnd, + FinalSorting, + FinalColliding, + PartialGeneration, + PartialSorting, + PartialSubtreeEnd, + PartialIndexEnd, + PartialEnd, + MixElements +}; + +class SolverCancelledException : public std::exception +{ + virtual const char* what() const throw() { + return "BeamHash solver was cancelled"; + } +}; + + +class PoWScheme { +public: + virtual int InitialiseState(blake2b_state& base_state) = 0; + virtual bool IsValidSolution(const blake2b_state& base_state, std::vector soln) = 0; + virtual bool OptimisedSolve(const blake2b_state& base_state, + const std::function&)> validBlock, + const std::function cancelled) = 0; +}; + +#endif \ No newline at end of file diff --git a/src/sha256.h b/src/sha256.h new file mode 100644 index 0000000..606cd71 --- /dev/null +++ b/src/sha256.h @@ -0,0 +1,32 @@ +// Copyright (c) 2014 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_CRYPTO_SHA256_H +#define BITCOIN_CRYPTO_SHA256_H + +#include +#include + +/** A hasher class for SHA-256. */ +class CSHA256 +{ +public: + static const size_t OUTPUT_SIZE = 32; + + CSHA256(); + CSHA256& Write(const unsigned char* data, size_t len); + void Finalize(unsigned char hash[OUTPUT_SIZE]); + void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE]) { + FinalizeNoPadding(hash, true); + }; + CSHA256& Reset(); + +private: + uint32_t s[8]; + unsigned char buf[64]; + size_t bytes; + void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression); +}; + +#endif // BITCOIN_CRYPTO_SHA256_H \ No newline at end of file diff --git a/test.js b/test.js new file mode 100644 index 0000000..2124718 --- /dev/null +++ b/test.js @@ -0,0 +1,43 @@ +const beamhash = require('./index'); + +verify2Valid(); +verify2Invalid(); + + +function verify2Valid() { + + console.log('Verify valid BeamHashII'); + + const inputBuf = Buffer.from('f08e259aa23e1f517393a9e8d4634f9893b4058bf474233a9102e94ffee5fbb6', 'hex'); + const nonceBuf = Buffer.from('17dfb4348b000000', 'hex'); + const outputBuf = Buffer.from('01eb634242b17097be184f8225097425449eaad1167b585a1ee2053f5ec67529c07abec863f2c10c979dc7b41b01c258f8733fdd01f78bc60a7dd08de06465cd86025c6946c6ded03300c419323f08158c872f6a9168bc1467317509929d4500807102a69c48f7f4', 'hex'); + + const isValid = beamhash.verify2(inputBuf, nonceBuf, outputBuf); + + if (isValid) { + console.log('PASS'); + } + else { + console.log('FAIL'); + process.exit(-1); + } +} + +function verify2Invalid() { + + console.log('Verify invalid BeamHashII'); + + const inputBuf = Buffer.from('f08e259aa23e1f517393a9e8d4634f9893b4058bf474233a9102e94ffee5fbb6', 'hex'); + const nonceBuf = Buffer.from('17dfb4348b000000', 'hex'); + const outputBuf = Buffer.alloc(104, 0); + + const isValid = beamhash.verify2(inputBuf, nonceBuf, outputBuf); + + if (isValid) { + console.log('FAIL'); + process.exit(-1); + } + else { + console.log('PASS'); + } +} \ No newline at end of file