diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 0000000..9bce637
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,33 @@
+name: Build & Test Package
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  linux-node10:
+    runs-on: ubuntu-16.04
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '10.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: sudo apt install libboost-dev
+      - run: npm install
+      - run: npm test
+
+  linux-node12:
+    runs-on: ubuntu-18.04
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '12.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: sudo apt install libboost-dev
+      - run: npm install
+      - run: npm test
\ No newline at end of file
diff --git a/.github/workflows/publish-release.yaml b/.github/workflows/publish-release.yaml
new file mode 100644
index 0000000..722513a
--- /dev/null
+++ b/.github/workflows/publish-release.yaml
@@ -0,0 +1,18 @@
+name: Publish Package
+on:
+  release:
+    types: [created]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '10.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: sudo apt install libboost-all-dev
+      - run: npm install
+      - run: npm publish
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..23d0199
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.vscode/
+.idea/
+*.iml
+node_modules/
+build/
+cmake-build-debug/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..1ad037c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,40 @@
+cmake_minimum_required(VERSION 3.5.1)
+project(hasherbeamhash)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+file(GLOB hasher_beamhash_SRC
+        "src/*/*.c"
+        "src/*.c"
+        "src/*.cpp"
+        "src/*/*.cpp"
+        )
+
+#ff building Node JS Addon
+if (CMAKE_JS_VERSION)
+
+    set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++ -static")
+
+    # add node include directories, fix potential backslashes
+    foreach(CMAKE_JS_INC_ITEM ${CMAKE_JS_INC})
+        string(REPLACE "\\" "/" CMAKE_JS_INC_ITEM ${CMAKE_JS_INC_ITEM})
+        message(STATUS "include_directories ${CMAKE_JS_INC_ITEM}")
+        include_directories(${CMAKE_JS_INC_ITEM})
+    endforeach(CMAKE_JS_INC_ITEM)
+
+    string(REPLACE "\\" "/" PROJECT_SOURCE_DIR ${PROJECT_SOURCE_DIR})
+    message(STATUS "include_directories ${PROJECT_SOURCE_DIR}")
+    include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR})
+    include_directories(${PROJECT_NAME} PUBLIC "./src")
+
+    # include static libraries
+
+    add_library(${PROJECT_NAME} SHARED ${hasher_beamhash_SRC} "hasherbeamhash.cc")
+    set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
+
+endif()
+
+
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5777b3d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 JCThePants, contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..83a91fa
--- /dev/null
+++ b/README.md
@@ -0,0 +1,91 @@
+hasher-beamhash
+===============
+
+This is a Node module for simple hashing and verifying [Beam coin](https://beam.mw) proof-of-work solutions. 
+Most of the native code comes from or is adapted from [Beam source code](https://github.com/BeamMW/beam/tree/master/3rdparty/crypto).
+
+This module has been developed and tested on [Node v10.17](https://nodejs.org/) and 
+[Ubuntu 16.04](http://releases.ubuntu.com/16.04/) for the [Beam mining pool](https://mintpond.com/#!/beam) at [MintPond](https://mintpond.com).
+
+## Install ##
+__Install as Dependency in NodeJS Project__
+```bash
+# Install from Github NPM repository
+
+sudo apt-get install build-essential
+sudo apt-get install libboost-dev
+npm config set @mintpond:registry https://npm.pkg.github.com/mintpond
+npm config set //npm.pkg.github.com/:_authToken <MY_GITHUB_AUTH_TOKEN>
+
+npm install @mintpond/hasher-beamhash@0.1.0 --save
+```
+[Creating a personal access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line)
+
+__Install & Test__
+```bash
+# Install nodejs v10
+curl -sL https://deb.nodesource.com/setup_10.x | sudo -E bash -
+sudo apt-get install nodejs -y
+
+# Download hasher-beamhash
+git clone https://github.com/MintPond/hasher-beamhash
+
+# build
+cd hasher-beamhash
+npm install
+
+# test
+npm test
+``` 
+
+## Usage ##
+__Verify BeamHashII__
+```javascript
+const beamhash = require('@mintpond/hasher-beamhash');
+
+/**
+ * Verify a BeamHashII solution.
+ *
+ * @param inputBuf {Buffer}
+ * @param nonceBuf {Buffer}
+ * @param solutionBuf {Buffer}
+ * @returns {boolean} True if valid, otherwise false.
+ */
+const isValid = beamhash.verify2(inputBuf, nonceBuf, solutionBuf);
+
+if (isValid) {
+    console.log('Valid solution');
+}
+else {
+    console.log('Invalid solution');
+}
+```
+
+__Verify BeamHashIII__
+```javascript
+const beamhash = require('@mintpond/hasher-beamhash');
+
+/**
+ * Verify a BeamHashIII solution.
+ *
+ * @param inputBuf {Buffer}
+ * @param nonceBuf {Buffer}
+ * @param solutionBuf {Buffer}
+ * @returns {boolean} True if valid, otherwise false.
+ */
+const isValid = beamhash.verify3(inputBuf, nonceBuf, solutionBuf);
+
+if (isValid) {
+    console.log('Valid solution');
+}
+else {
+    console.log('Invalid solution');
+}
+```
+
+## Dependencies ##
+In Ubuntu:
+```
+   sudo apt-get install build-essential
+   sudo apt-get install libboost-dev
+```
\ No newline at end of file
diff --git a/binding.gyp b/binding.gyp
new file mode 100644
index 0000000..6acd32e
--- /dev/null
+++ b/binding.gyp
@@ -0,0 +1,26 @@
+{
+    "targets": [
+        {
+            "target_name": "hasherbeamhash",
+            "sources": [
+                "src/blake/blake2b.cpp",
+                "src/beamHashIII_imp.cpp",
+                "src/equihashR_imp.cpp",
+                "hasherbeamhash.cc"
+            ],
+            "include_dirs": [
+                ".",
+                "src",
+                "<!(node -e \"require('nan')\")"
+            ],
+            "cflags": [
+                "-fexceptions",
+                "-std=c++17"
+            ],
+            "cflags_cc": [
+                "-fexceptions",
+                "-std=c++17"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/hasherbeamhash.cc b/hasherbeamhash.cc
new file mode 100644
index 0000000..6c7ab6f
--- /dev/null
+++ b/hasherbeamhash.cc
@@ -0,0 +1,109 @@
+#include <node.h>
+#include <node_buffer.h>
+#include <v8.h>
+#include <stdint.h>
+#include <iostream>
+#include "nan.h"
+#include "src/equihashR.h"
+#include "src/beamHashIII.h"
+
+using namespace node;
+using namespace v8;
+
+#define THROW_ERROR_EXCEPTION(x) Nan::ThrowError(x)
+
+static BeamHash_III BeamHashIII;
+
+
+bool verifyPoWScheme(PoWScheme &scheme, const char *input_ptr, const char *nonce64_ptr, Local <Object> solution) {
+
+    const char *solution_ptr = (char *) Buffer::Data(solution);
+
+    blake2b_state state;
+    scheme.InitialiseState(state);
+    blake2b_update(&state, (const unsigned char *) input_ptr, 32);
+    blake2b_update(&state, (const unsigned char *) nonce64_ptr, 8);
+
+    std::vector<unsigned char> solution_vec(solution_ptr, solution_ptr + node::Buffer::Length(solution));
+
+    return scheme.IsValidSolution(state, solution_vec);
+}
+
+
+NAN_METHOD(verify1) {
+
+        if (info.Length() < 3) {
+            return THROW_ERROR_EXCEPTION("hasher-beamhash.verify1 - 3 arguments expected.");
+        }
+
+        const char* input_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[0]).ToLocalChecked());
+        const char* nonce64_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[1]).ToLocalChecked());
+        Local<Object> solution = Nan::To<v8::Object>(info[2]).ToLocalChecked();
+
+        bool isValid = verifyPoWScheme(BeamHashI, input_ptr, nonce64_ptr, solution);
+
+        if (isValid) {
+            info.GetReturnValue().Set(Nan::True());
+        }
+        else {
+            info.GetReturnValue().Set(Nan::False());
+        }
+}
+
+
+NAN_METHOD(verify2) {
+
+        if (info.Length() < 3) {
+            return THROW_ERROR_EXCEPTION("hasher-beamhash.verify2 - 3 arguments expected.");
+        }
+
+        const char* input_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[0]).ToLocalChecked());
+        const char* nonce64_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[1]).ToLocalChecked());
+        Local<Object> solution = Nan::To<v8::Object>(info[2]).ToLocalChecked();
+
+        bool isValid = verifyPoWScheme(BeamHashII, input_ptr, nonce64_ptr, solution);
+
+        if (isValid) {
+            info.GetReturnValue().Set(Nan::True());
+        }
+        else {
+            info.GetReturnValue().Set(Nan::False());
+        }
+}
+
+
+NAN_METHOD(verify3) {
+
+        if (info.Length() < 3) {
+            return THROW_ERROR_EXCEPTION("hasher-beamhash.verify3 - 3 arguments expected.");
+        }
+
+        const char* input_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[0]).ToLocalChecked());
+        const char* nonce64_ptr = (char*)Buffer::Data(Nan::To<v8::Object>(info[1]).ToLocalChecked());
+        Local<Object> solution = Nan::To<v8::Object>(info[2]).ToLocalChecked();
+
+        bool isValid = verifyPoWScheme(BeamHashIII, input_ptr, nonce64_ptr, solution);
+
+        if (isValid) {
+            info.GetReturnValue().Set(Nan::True());
+        }
+        else {
+            info.GetReturnValue().Set(Nan::False());
+        }
+}
+
+
+NAN_MODULE_INIT(init) {
+
+        Nan::Set(target, Nan::New("verify1").ToLocalChecked(),
+                 Nan::GetFunction(Nan::New<FunctionTemplate>(verify1)).ToLocalChecked());
+
+        Nan::Set(target, Nan::New("verify2").ToLocalChecked(),
+                Nan::GetFunction(Nan::New<FunctionTemplate>(verify2)).ToLocalChecked());
+
+        Nan::Set(target, Nan::New("verify3").ToLocalChecked(),
+                Nan::GetFunction(Nan::New<FunctionTemplate>(verify3)).ToLocalChecked());
+}
+
+
+NODE_MODULE(hasherbeamhash, init)
\ No newline at end of file
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..4a1ab1b
--- /dev/null
+++ b/index.js
@@ -0,0 +1,55 @@
+'use strict'
+
+const beamhash = require('bindings')('hasherbeamhash.node');
+
+module.exports = {
+
+    /**
+     * Verify BeamHashII solution.
+     *
+     * @param inputBuf {Buffer}
+     * @param nonceBuf {Buffer}
+     * @param solutionBuf {Buffer}
+     * @returns {boolean} True if valid, otherwise false.
+     */
+    verify2: verify2,
+
+    /**
+     * Verify BeamHashIII solution.
+     *
+     * @param inputBuf {Buffer}
+     * @param nonceBuf {Buffer}
+     * @param solutionBuf {Buffer}
+     * @returns {boolean} True if valid, otherwise false.
+     */
+    verify3: verify3
+};
+
+
+function verify2(inputBuf, nonceBuf, solutionBuf) {
+
+    _expectBuffer(inputBuf, 'inputBuf', 32);
+    _expectBuffer(nonceBuf, 'nonceBuf', 8);
+    _expectBuffer(solutionBuf, 'solutionBuf', 104);
+
+    return beamhash.verify2(inputBuf, nonceBuf, solutionBuf);
+}
+
+
+function verify3(inputBuf, nonceBuf, solutionBuf) {
+
+    _expectBuffer(inputBuf, 'inputBuf', 32);
+    _expectBuffer(nonceBuf, 'nonceBuf', 8);
+    _expectBuffer(solutionBuf, 'solutionBuf');
+
+    return beamhash.verify3(inputBuf, nonceBuf, solutionBuf);
+}
+
+
+function _expectBuffer(buffer, name, size) {
+    if (!Buffer.isBuffer(buffer))
+        throw new Error(`"${name}" is expected to be a Buffer. Got ${(typeof buffer)} instead.`);
+
+    if (size && buffer.length !== size)
+        throw new Error(`"${name}" is expected to be exactly ${size} bytes. Got ${buffer.length} instead.`);
+}
\ No newline at end of file
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..9b9a905
--- /dev/null
+++ b/package.json
@@ -0,0 +1,29 @@
+{
+  "name": "@mintpond/hasher-beamhash",
+  "version": "0.1.0",
+  "description": "BeamHash algorithm hashing and verification functions for NodeJS.",
+  "main": "index.js",
+  "author": "JCThePants",
+  "license": "MIT",
+  "dependencies": {
+    "bindings": "^1.3.0",
+    "nan": "^2.6.2"
+  },
+  "scripts": {
+    "test": "node test"
+  },
+  "homepage": "https://github.com/MintPond/hasher-beamhash",
+  "bugs": {
+    "url": "https://github.com/MintPond/hasher-beamhash/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/MintPond/hasher-beamhash.git"
+  },
+  "publishConfig": {
+    "registry": "https://npm.pkg.github.com/"
+  },
+  "engines": {
+    "node": ">=10.17.0"
+  }
+}
\ No newline at end of file
diff --git a/src/beamHashIII.h b/src/beamHashIII.h
new file mode 100644
index 0000000..f8fbb92
--- /dev/null
+++ b/src/beamHashIII.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2020 The Beam Team
+
+#ifndef BEAMHASH_H
+#define BEAMHASH_H
+
+#include <bitset>
+#include <cstring>
+#include <exception>
+#include <stdexcept>
+#include <functional>
+#include <memory>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "powScheme.h"
+
+const uint32_t workBitSize=448;
+const uint32_t collisionBitSize=24;
+const uint32_t numRounds=5;
+
+class stepElem {
+	friend class BeamHash_III;
+
+	private:
+	std::bitset<workBitSize> workBits;
+	std::vector<uint32_t> indexTree;
+
+	public:
+	stepElem(const uint64_t * prePow, uint32_t index);
+	stepElem(const stepElem &a, const stepElem &b, uint32_t remLen);
+
+	void applyMix(uint32_t remLen);
+	uint32_t getCollisionBits() const;
+	bool isZero();
+
+	friend bool hasCollision(stepElem &a, stepElem &b);
+	friend bool distinctIndices(stepElem &a, stepElem &b);
+	friend bool indexAfter(stepElem &a, stepElem &b);
+	friend uint64_t getLowBits(stepElem test);
+};
+
+class BeamHash_III : public PoWScheme {
+	public:
+	int InitialiseState(blake2b_state& base_state);
+	bool IsValidSolution(const blake2b_state& base_state, std::vector<unsigned char> soln);
+
+
+	bool OptimisedSolve(const blake2b_state& base_state,
+                        const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                        const std::function<bool(SolverCancelCheck)> cancelled);
+};
+
+#endif
\ No newline at end of file
diff --git a/src/beamHashIII_imp.cpp b/src/beamHashIII_imp.cpp
new file mode 100644
index 0000000..6582e20
--- /dev/null
+++ b/src/beamHashIII_imp.cpp
@@ -0,0 +1,377 @@
+
+#include "beamHashIII.h"
+
+
+namespace sipHash {
+
+static uint64_t rotl(uint64_t x, uint64_t b) {
+	return (x << b) | (x >> (64 - b));
+}
+
+#define sipRound() {		\
+	v0 += v1; v2 += v3;	\
+	v1 = rotl(v1,13);	\
+	v3 = rotl(v3,16); 	\
+	v1 ^= v0; v3 ^= v2;	\
+	v0 = rotl(v0,32); 	\
+	v2 += v1; v0 += v3;	\
+	v1 = rotl(v1,17);   	\
+	v3 = rotl(v3,21);	\
+	v1 ^= v2; v3 ^= v0; 	\
+	v2 = rotl(v2,32);	\
+}
+
+uint64_t siphash24(uint64_t state0, uint64_t state1, uint64_t state2, uint64_t state3, uint64_t nonce) {
+	uint64_t v0, v1, v2, v3;
+
+	v0 = state0; v1=state1; v2=state2; v3=state3;
+	v3 ^= nonce;
+	sipRound();
+	sipRound();
+	v0 ^= nonce;
+   	v2 ^= 0xff;
+	sipRound();
+	sipRound();
+	sipRound();
+	sipRound();
+
+	return (v0 ^ v1 ^ v2 ^ v3);
+}
+
+} //end namespace sipHash
+
+
+stepElem::stepElem(const uint64_t * prePow, uint32_t index) {
+	workBits.reset();
+
+	for (int32_t i=6; i>=0; i--) {
+		workBits = (workBits << 64);
+		uint64_t hash=sipHash::siphash24(prePow[0],prePow[1],prePow[2],prePow[3],(index << 3)+i);
+		workBits |= hash;
+	}
+
+	indexTree.assign(1, index);
+}
+
+stepElem::stepElem(const stepElem &a, const stepElem &b, uint32_t remLen) {
+	// Create a new rounds step element from matching two ancestors
+	workBits.reset();
+
+	workBits = a.workBits ^ b.workBits;
+	workBits = (workBits >> collisionBitSize);
+
+	std::bitset<workBitSize> mask;
+	mask.set();
+	mask = (mask >> (workBitSize-remLen));
+	workBits &= mask;
+
+	if (a.indexTree[0] < b.indexTree[0]) {
+		indexTree.insert(indexTree.end(), a.indexTree.begin(), a.indexTree.end());
+		indexTree.insert(indexTree.end(), b.indexTree.begin(), b.indexTree.end());
+	} else {
+		indexTree.insert(indexTree.end(), b.indexTree.begin(), b.indexTree.end());
+		indexTree.insert(indexTree.end(), a.indexTree.begin(), a.indexTree.end());
+	}
+}
+
+void stepElem::applyMix(uint32_t remLen) {
+	std::bitset<512> tempBits(workBits.to_string());
+
+	// Add in the bits of the index tree to the end of work bits
+	uint32_t padNum = ((512-remLen) + collisionBitSize) / (collisionBitSize + 1);
+	padNum = std::min(padNum, static_cast<uint32_t>(indexTree.size()));
+
+	for (uint32_t i=0; i<padNum; i++) {
+		std::bitset<512> tmp(indexTree[i]);
+		tmp = tmp << (remLen+i*(collisionBitSize + 1));
+		tempBits |= tmp;
+	}
+
+
+	// Applyin the mix from the lined up bits
+	std::bitset<512> mask(0xFFFFFFFFFFFFFFFFUL);
+	uint64_t result = 0;
+	for (uint32_t i=0; i<8; i++) {
+		uint64_t tmp = (tempBits & mask).to_ullong();
+		tempBits = tempBits >> 64;
+
+		result += sipHash::rotl(tmp, (29*(i+1)) & 0x3F);
+	}
+	result = sipHash::rotl(result, 24);
+
+
+	// Wipe out lowest 64 bits in favor of the mixed bits
+	workBits = (workBits >> 64);
+	workBits = (workBits << 64);
+	workBits |= std::bitset<workBitSize>(result);
+}
+
+uint32_t stepElem::getCollisionBits() const {
+	std::bitset<workBitSize> mask((1 << collisionBitSize) - 1);
+	return (uint32_t) (workBits & mask).to_ullong();
+}
+
+bool stepElem::isZero() {
+	return workBits.none();
+}
+
+uint64_t getLowBits(stepElem test) {
+	std::bitset<workBitSize> mask(~0ULL);
+	return (uint64_t) (test.workBits & mask).to_ullong();
+}
+/********
+
+    Friend Functions to compare step elements
+
+********/
+
+
+bool hasCollision(stepElem &a, stepElem &b) {
+	return (a.getCollisionBits() == b.getCollisionBits());
+}
+
+bool distinctIndices(stepElem &a, stepElem &b) {
+	for (uint32_t indexA : a.indexTree) {
+		for (uint32_t indexB : b.indexTree) {
+			if (indexA == indexB) return false;
+		}
+	}
+	return true;
+}
+
+bool indexAfter(stepElem &a, stepElem &b) {
+	return (a.indexTree[0] < b.indexTree[0]);
+}
+
+bool sortStepElement(const stepElem &a, const stepElem &b) {
+	return (a.getCollisionBits() < b.getCollisionBits());
+}
+
+
+/********
+
+    Beam Hash III Verify Functions & CPU Miner
+
+********/
+
+std::vector<uint32_t> GetIndicesFromMinimal(std::vector<uint8_t> soln) {
+	std::bitset<800> inStream;
+	std::bitset<800> mask((1 << (collisionBitSize+1))-1);
+
+	inStream.reset();
+	for (int32_t i = 99; i>=0; i--) {
+		inStream = (inStream << 8);
+		inStream |= (uint64_t) soln[i];
+	}
+
+	std::vector<uint32_t> res;
+	for (uint32_t i=0; i<32; i++) {
+		res.push_back((uint32_t) (inStream & mask).to_ullong() );
+		inStream = (inStream >> (collisionBitSize+1));
+	}
+
+	return res;
+}
+
+std::vector<uint8_t> GetMinimalFromIndices(std::vector<uint32_t> sol) {
+	std::bitset<800> inStream;
+	std::bitset<800> mask(0xFF);
+
+	inStream.reset();
+	for (int32_t i = static_cast<uint32_t>(sol.size()); i>=0; i--) {
+		inStream = (inStream << (collisionBitSize+1));
+		inStream |= (uint64_t) sol[i];
+	}
+
+	std::vector<uint8_t> res;
+	for (uint32_t i=0; i<100; i++) {
+		res.push_back((uint8_t) (inStream & mask).to_ullong() );
+		inStream = (inStream >> 8);
+	}
+
+	return res;
+}
+
+int BeamHash_III::InitialiseState(blake2b_state& base_state) {
+	unsigned char personalization[BLAKE2B_PERSONALBYTES] = {};
+	memcpy(personalization, "Beam-PoW", 8);
+	memcpy(personalization+8,  &workBitSize, 4);
+	memcpy(personalization+12, &numRounds, 4);
+
+	const uint8_t outlen = 32;
+
+	blake2b_param param = {0};
+	param.digest_length = outlen;
+	param.fanout = 1;
+	param.depth = 1;
+
+	memcpy(&param.personal, personalization, BLAKE2B_PERSONALBYTES);
+	return blake2b_init_param(&base_state, &param);
+}
+
+
+bool BeamHash_III::IsValidSolution(const blake2b_state& base_state, std::vector<uint8_t> soln) {
+
+	if (soln.size() != 104)  {
+		return false;
+    	}
+
+	uint64_t prePow[4];
+	blake2b_state state = base_state;
+	// Last 4 bytes of solution are our extra nonce
+	blake2b_update(&state, (uint8_t*) &soln[100], 4);
+	blake2b_final(&state, (uint8_t*) &prePow[0], static_cast<uint8_t>(32));
+
+	// This will only evaluate bytes 0..99
+	std::vector<uint32_t> indices = GetIndicesFromMinimal(soln);
+
+	std::vector<stepElem> X;
+	for (uint32_t i=0; i<indices.size(); i++) {
+		X.emplace_back(&prePow[0], indices[i]);
+	}
+
+	uint32_t round=1;
+	while (X.size() > 1) {
+		std::vector<stepElem> Xtmp;
+
+		for (size_t i = 0; i < X.size(); i += 2) {
+			uint32_t remLen = workBitSize-(round-1)*collisionBitSize;
+			if (round == 5) remLen -= 64;
+
+			X[i].applyMix(remLen);
+			X[i+1].applyMix(remLen);
+
+			if (!hasCollision(X[i], X[i+1])) {
+				//std::cout << "Collision Error" << i << " " << X.size() << " " << X[i].getCollisionBits() << " " << X[i+1].getCollisionBits() << std::endl;
+                		return false;
+            		}
+
+			if (!distinctIndices(X[i], X[i+1])) {
+				//std::cout << "Non-Distinct" << i << " " << X.size() << std::endl;
+                		return false;
+            		}
+
+			if (!indexAfter(X[i], X[i+1])) {
+				//std::cout << "Index Order" << i << " " << X.size() << std::endl;
+                		return false;
+            		}
+
+			remLen = workBitSize-round*collisionBitSize;
+			if (round == 4) remLen -= 64;
+			if (round == 5) remLen = collisionBitSize;
+
+			Xtmp.emplace_back(X[i], X[i+1], remLen);
+		}
+
+		X = Xtmp;
+		round++;
+	}
+
+	return X[0].isZero();
+}
+
+
+SolverCancelledException beamSolverCancelled;
+
+bool BeamHash_III::OptimisedSolve(const blake2b_state& base_state,
+                                 const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                                 const std::function<bool(SolverCancelCheck)> cancelled) {
+
+	uint64_t prePow[4];
+	blake2b_state state = base_state;
+
+	uint8_t extraNonce[4] = {0};
+
+	blake2b_update(&state, (uint8_t*) &extraNonce, 4);
+	blake2b_final(&state, (uint8_t*) &prePow[0], static_cast<uint8_t>(32));
+
+	std::vector<stepElem> elements;
+	elements.reserve(1 << (collisionBitSize+1));
+
+	// Seeding
+	for (uint32_t i=0; i<(1 << (collisionBitSize+1)); i++) {
+		elements.emplace_back(&prePow[0], i);
+		if (cancelled(ListGeneration)) throw beamSolverCancelled;
+	}
+
+	// Round 1 to 5
+	uint32_t round;
+	for (round=1; round<5; round++) {
+
+		uint32_t remLen = workBitSize-(round-1)*collisionBitSize;
+
+		// Mixing of elements
+		for (uint32_t i=0; i<elements.size(); i++) {
+			elements[i].applyMix(remLen);
+			if (cancelled(MixElements)) throw beamSolverCancelled;
+		}
+
+		// Sorting
+		std::sort(elements.begin(), elements.end(), sortStepElement);
+		if (cancelled(ListSorting)) throw beamSolverCancelled;
+
+		// Set length of output bits
+		remLen = workBitSize-round*collisionBitSize;
+		if (round == 4) remLen -= 64;
+
+		// Creating matches
+		std::vector<stepElem> outElements;
+		outElements.reserve(1 << (collisionBitSize+1));
+
+		for (uint32_t i=0; i<elements.size()-1; i++) {
+			uint32_t j=i+1;
+			while (j < elements.size()) {
+				if (hasCollision(elements[i], elements[j])) {
+					outElements.emplace_back(elements[i], elements[j], remLen);
+				} else {
+					break;
+				}
+				j++;
+			}
+			if (cancelled(ListColliding)) throw beamSolverCancelled;
+		}
+
+		elements = outElements;
+	}
+
+	// Check the output of the last round for solutions
+	uint32_t remLen = workBitSize-(round-1)*collisionBitSize - 64;
+
+	// Mixing of elements
+	for (uint32_t i=0; i<elements.size(); i++) {
+		elements[i].applyMix(remLen);
+		if (cancelled(MixElements)) throw beamSolverCancelled;
+	}
+
+	// Sorting
+	std::sort(elements.begin(), elements.end(), sortStepElement);
+	if (cancelled(ListSorting)) throw beamSolverCancelled;
+
+	// Set length of output bits
+	remLen = collisionBitSize;
+
+	// Creating matches
+	for (uint32_t i=0; i<elements.size()-1; i++) {
+		uint32_t j=i+1;
+		while (j < elements.size()) {
+			if (hasCollision(elements[i], elements[j])) {
+				stepElem temp(elements[i], elements[j], remLen);
+
+				if (temp.isZero()) {
+					std::vector<uint8_t> sol = GetMinimalFromIndices(temp.indexTree);
+
+					// Adding the extra nonce
+					for (uint32_t k=0; k<4; k++) sol.push_back(extraNonce[k]);
+
+					if (validBlock(sol))  return true;
+				}
+			} else {
+				break;
+			}
+			j++;
+		}
+		if (cancelled(ListColliding)) throw beamSolverCancelled;
+	}
+
+	return false;
+}
diff --git a/src/blake/blake2-config.h b/src/blake/blake2-config.h
new file mode 100644
index 0000000..327a9b7
--- /dev/null
+++ b/src/blake/blake2-config.h
@@ -0,0 +1,82 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2_CONFIG_H__
+#define __BLAKE2_CONFIG_H__
+
+#if defined(_M_IX86_FP)
+    #if _M_IX86_FP == 2
+        #define HAVE_SSE2
+        #ifndef HAVE_AVX
+            #define HAVE_AVX
+        #endif
+    #endif
+#elif defined(_M_AMD64) || defined(_M_X64)
+    #define HAVE_SSSE3
+#endif
+
+// These don't work everywhere
+#if defined(__SSE2__)
+#define HAVE_SSE2
+#endif
+
+#if defined(__SSSE3__)
+#define HAVE_SSSE3
+#endif
+
+#if defined(__SSE4_1__)
+#define HAVE_SSE41
+#endif
+
+#if defined(__AVX__) || defined(__AVX2__)
+#define HAVE_AVX
+#endif
+
+#if defined(__XOP__)
+#define HAVE_XOP
+#endif
+
+
+#ifdef HAVE_AVX2
+#ifndef HAVE_AVX
+#define HAVE_AVX
+#endif
+#endif
+
+#ifdef HAVE_XOP
+#ifndef HAVE_AVX
+#define HAVE_AVX
+#endif
+#endif
+
+#ifdef HAVE_AVX
+#ifndef HAVE_SSE41
+#define HAVE_SSE41
+#endif
+#endif
+
+#ifdef HAVE_SSE41
+#ifndef HAVE_SSSE3
+#define HAVE_SSSE3
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+#define HAVE_SSE2
+#endif
+
+#if !defined(HAVE_SSE2)
+#error "This code requires at least SSE2."
+#endif
+
+#endif
diff --git a/src/blake/blake2-impl.h b/src/blake/blake2-impl.h
new file mode 100644
index 0000000..971c3b9
--- /dev/null
+++ b/src/blake/blake2-impl.h
@@ -0,0 +1,132 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2_IMPL_H__
+#define __BLAKE2_IMPL_H__
+
+#include <stdint.h>
+
+static inline uint32_t load32( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+  uint32_t w;
+  memcpy(&w, src, sizeof w);
+  return w;
+#else
+  const uint8_t *p = ( const uint8_t * )src;
+  uint32_t w = *p++;
+  w |= ( uint32_t )( *p++ ) <<  8;
+  w |= ( uint32_t )( *p++ ) << 16;
+  w |= ( uint32_t )( *p++ ) << 24;
+  return w;
+#endif
+}
+
+static inline uint64_t load64( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+  uint64_t w;
+  memcpy(&w, src, sizeof w);
+  return w;
+#else
+  const uint8_t *p = ( const uint8_t * )src;
+  uint64_t w = *p++;
+  w |= ( uint64_t )( *p++ ) <<  8;
+  w |= ( uint64_t )( *p++ ) << 16;
+  w |= ( uint64_t )( *p++ ) << 24;
+  w |= ( uint64_t )( *p++ ) << 32;
+  w |= ( uint64_t )( *p++ ) << 40;
+  w |= ( uint64_t )( *p++ ) << 48;
+  w |= ( uint64_t )( *p++ ) << 56;
+  return w;
+#endif
+}
+
+static inline void store32( void *dst, uint32_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+  memcpy(dst, &w, sizeof w);
+#else
+  uint8_t *p = ( uint8_t * )dst;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w;
+#endif
+}
+
+static inline void store64( void *dst, uint64_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+  memcpy(dst, &w, sizeof w);
+#else
+  uint8_t *p = ( uint8_t * )dst;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w;
+#endif
+}
+
+static inline uint64_t load48( const void *src )
+{
+  const uint8_t *p = ( const uint8_t * )src;
+  uint64_t w = *p++;
+  w |= ( uint64_t )( *p++ ) <<  8;
+  w |= ( uint64_t )( *p++ ) << 16;
+  w |= ( uint64_t )( *p++ ) << 24;
+  w |= ( uint64_t )( *p++ ) << 32;
+  w |= ( uint64_t )( *p++ ) << 40;
+  return w;
+}
+
+static inline void store48( void *dst, uint64_t w )
+{
+  uint8_t *p = ( uint8_t * )dst;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w; w >>= 8;
+  *p++ = ( uint8_t )w;
+}
+
+static inline uint32_t rotl32( const uint32_t w, const unsigned c )
+{
+  return ( w << c ) | ( w >> ( 32 - c ) );
+}
+
+static inline uint64_t rotl64( const uint64_t w, const unsigned c )
+{
+  return ( w << c ) | ( w >> ( 64 - c ) );
+}
+
+static inline uint32_t rotr32( const uint32_t w, const unsigned c )
+{
+  return ( w >> c ) | ( w << ( 32 - c ) );
+}
+
+static inline uint64_t rotr64( const uint64_t w, const unsigned c )
+{
+  return ( w >> c ) | ( w << ( 64 - c ) );
+}
+
+/* prevents compiler optimizing out memset() */
+static inline void secure_zero_memory( void *v, size_t n )
+{
+  volatile uint8_t *p = ( volatile uint8_t * )v;
+  while( n-- ) *p++ = 0;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/blake/blake2-round.h b/src/blake/blake2-round.h
new file mode 100644
index 0000000..2972043
--- /dev/null
+++ b/src/blake/blake2-round.h
@@ -0,0 +1,85 @@
+#define _mm_roti_epi64(x, c) \
+	(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))  \
+	: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
+	: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
+	: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x)))  \
+	: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
+
+#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+	row1l = _mm_add_epi64(row1l, row2l); \
+	row1h = _mm_add_epi64(row1h, row2h); \
+	\
+	row4l = _mm_xor_si128(row4l, row1l); \
+	row4h = _mm_xor_si128(row4h, row1h); \
+	\
+	row4l = _mm_roti_epi64(row4l, -32); \
+	row4h = _mm_roti_epi64(row4h, -32); \
+	\
+	row3l = _mm_add_epi64(row3l, row4l); \
+	row3h = _mm_add_epi64(row3h, row4h); \
+	\
+	row2l = _mm_xor_si128(row2l, row3l); \
+	row2h = _mm_xor_si128(row2h, row3h); \
+	\
+	row2l = _mm_roti_epi64(row2l, -24); \
+	row2h = _mm_roti_epi64(row2h, -24); \
+
+#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+	row1l = _mm_add_epi64(row1l, row2l); \
+	row1h = _mm_add_epi64(row1h, row2h); \
+	\
+	row4l = _mm_xor_si128(row4l, row1l); \
+	row4h = _mm_xor_si128(row4h, row1h); \
+	\
+	row4l = _mm_roti_epi64(row4l, -16); \
+	row4h = _mm_roti_epi64(row4h, -16); \
+	\
+	row3l = _mm_add_epi64(row3l, row4l); \
+	row3h = _mm_add_epi64(row3h, row4h); \
+	\
+	row2l = _mm_xor_si128(row2l, row3l); \
+	row2h = _mm_xor_si128(row2h, row3h); \
+	\
+	row2l = _mm_roti_epi64(row2l, -63); \
+	row2h = _mm_roti_epi64(row2h, -63); \
+
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+	t0 = _mm_alignr_epi8(row2h, row2l, 8); \
+	t1 = _mm_alignr_epi8(row2l, row2h, 8); \
+	row2l = t0; \
+	row2h = t1; \
+	\
+	t0 = row3l; \
+	row3l = row3h; \
+	row3h = t0;    \
+	\
+	t0 = _mm_alignr_epi8(row4h, row4l, 8); \
+	t1 = _mm_alignr_epi8(row4l, row4h, 8); \
+	row4l = t1; \
+	row4h = t0;
+
+#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+	t0 = _mm_alignr_epi8(row2l, row2h, 8); \
+	t1 = _mm_alignr_epi8(row2h, row2l, 8); \
+	row2l = t0; \
+	row2h = t1; \
+	\
+	t0 = row3l; \
+	row3l = row3h; \
+	row3h = t0; \
+	\
+	t0 = _mm_alignr_epi8(row4l, row4h, 8); \
+	t1 = _mm_alignr_epi8(row4h, row4l, 8); \
+	row4l = t1; \
+	row4h = t0;
+
+#define BLAKE2_ROUND(row1l,row1h,row2l,row2h,row3l,row3h,row4l,row4h) \
+	G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+	G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+	\
+	DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+	\
+	G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+	G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+	\
+	UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
\ No newline at end of file
diff --git a/src/blake/blake2.h b/src/blake/blake2.h
new file mode 100644
index 0000000..8e529c7
--- /dev/null
+++ b/src/blake/blake2.h
@@ -0,0 +1,161 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2_H__
+#define __BLAKE2_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+#define ALIGN(x) __declspec(align(x))
+#else
+#define ALIGN(x) __attribute__ ((__aligned__(x)))
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+  enum blake2s_constant
+  {
+    BLAKE2S_BLOCKBYTES = 64,
+    BLAKE2S_OUTBYTES   = 32,
+    BLAKE2S_KEYBYTES   = 32,
+    BLAKE2S_SALTBYTES  = 8,
+    BLAKE2S_PERSONALBYTES = 8
+  };
+
+  enum blake2b_constant
+  {
+    BLAKE2B_BLOCKBYTES = 128,
+    BLAKE2B_OUTBYTES   = 64,
+    BLAKE2B_KEYBYTES   = 64,
+    BLAKE2B_SALTBYTES  = 16,
+    BLAKE2B_PERSONALBYTES = 16
+  };
+
+#ifdef _MSC_VER
+#	pragma warning (disable: 4324) // structure was padded due to alignment specifier
+#endif // _MSC_VER
+#pragma pack(push, 1)
+  typedef struct __blake2s_param
+  {
+    uint8_t  digest_length; // 1
+    uint8_t  key_length;    // 2
+    uint8_t  fanout;        // 3
+    uint8_t  depth;         // 4
+    uint32_t leaf_length;   // 8
+    uint8_t  node_offset[6];// 14
+    uint8_t  node_depth;    // 15
+    uint8_t  inner_length;  // 16
+    // uint8_t  reserved[0];
+    uint8_t  salt[BLAKE2S_SALTBYTES]; // 24
+    uint8_t  personal[BLAKE2S_PERSONALBYTES];  // 32
+  } blake2s_param;
+
+  ALIGN( 64 ) typedef struct __blake2s_state
+  {
+    uint32_t h[8];
+    uint32_t t[2];
+    uint32_t f[2];
+    uint8_t  buf[2 * BLAKE2S_BLOCKBYTES];
+    size_t   buflen;
+    uint8_t  last_node;
+  } blake2s_state;
+
+  typedef struct __blake2b_param
+  {
+    uint8_t  digest_length; // 1
+    uint8_t  key_length;    // 2
+    uint8_t  fanout;        // 3
+    uint8_t  depth;         // 4
+    uint32_t leaf_length;   // 8
+    uint64_t node_offset;   // 16
+    uint8_t  node_depth;    // 17
+    uint8_t  inner_length;  // 18
+    uint8_t  reserved[14];  // 32
+    uint8_t  salt[BLAKE2B_SALTBYTES]; // 48
+    uint8_t  personal[BLAKE2B_PERSONALBYTES];  // 64
+  } blake2b_param;
+
+  ALIGN( 64 ) typedef struct __blake2b_state
+  {
+    uint64_t h[8];
+    uint8_t  buf[BLAKE2B_BLOCKBYTES];
+    uint16_t counter;
+    uint8_t  buflen;
+    uint8_t  lastblock;
+  } blake2b_state;
+
+  ALIGN( 64 ) typedef struct __blake2sp_state
+  {
+    blake2s_state S[8][1];
+    blake2s_state R[1];
+    uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
+    size_t  buflen;
+  } blake2sp_state;
+
+  ALIGN( 64 ) typedef struct __blake2bp_state
+  {
+    blake2b_state S[4][1];
+    blake2b_state R[1];
+    uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
+    size_t  buflen;
+  } blake2bp_state;
+#pragma pack(pop)
+#ifdef _MSC_VER
+#	pragma warning (default: 4324)
+#endif // _MSC_VER
+
+  // Streaming API
+  int blake2s_init( blake2s_state *S, const uint8_t outlen );
+  int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
+  int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
+  int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
+  int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
+
+  int blake2b_init( blake2b_state *S, const uint8_t outlen );
+  int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
+  int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
+  int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen );
+  int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen );
+
+  int blake2sp_init( blake2sp_state *S, const uint8_t outlen );
+  int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
+  int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen );
+  int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen );
+
+  int blake2bp_init( blake2bp_state *S, const uint8_t outlen );
+  int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
+  int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
+  int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen );
+
+  // Simple API
+  int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
+  int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
+  int blake2b_long(uint8_t *out, const void *in, const uint32_t outlen, const uint64_t inlen);
+
+  int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
+  int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
+
+  static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
+  {
+    return blake2b( out, in, key, outlen, inlen, keylen );
+  }
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/blake/blake2b-load-sse2.h b/src/blake/blake2b-load-sse2.h
new file mode 100644
index 0000000..143cea0
--- /dev/null
+++ b/src/blake/blake2b-load-sse2.h
@@ -0,0 +1,67 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2B_LOAD_SSE2_H__
+#define __BLAKE2B_LOAD_SSE2_H__
+
+#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
+#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
+#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
+#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
+#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
+#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
+#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
+#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
+#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
+#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
+#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
+#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
+#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
+#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
+#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
+#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
+#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
+#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
+#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
+#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
+#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
+#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
+#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
+#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
+#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
+#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
+#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
+#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
+#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
+
+
+#endif
diff --git a/src/blake/blake2b-load-sse41.h b/src/blake/blake2b-load-sse41.h
new file mode 100644
index 0000000..03b63d7
--- /dev/null
+++ b/src/blake/blake2b-load-sse41.h
@@ -0,0 +1,401 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2B_LOAD_SSE41_H__
+#define __BLAKE2B_LOAD_SSE41_H__
+
+#define LOAD_MSG_0_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m1); \
+b1 = _mm_unpacklo_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_0_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m0, m1); \
+b1 = _mm_unpackhi_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_0_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m5); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_0_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m5); \
+b1 = _mm_unpackhi_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_1_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m2); \
+b1 = _mm_unpackhi_epi64(m4, m6); \
+} while(0)
+
+
+#define LOAD_MSG_1_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_alignr_epi8(m3, m7, 8); \
+} while(0)
+
+
+#define LOAD_MSG_1_3(b0, b1) \
+do \
+{ \
+b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
+b1 = _mm_unpackhi_epi64(m5, m2); \
+} while(0)
+
+
+#define LOAD_MSG_1_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m1); \
+b1 = _mm_unpackhi_epi64(m3, m1); \
+} while(0)
+
+
+#define LOAD_MSG_2_1(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m6, m5, 8); \
+b1 = _mm_unpackhi_epi64(m2, m7); \
+} while(0)
+
+
+#define LOAD_MSG_2_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m0); \
+b1 = _mm_blend_epi16(m1, m6, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_2_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m5, m1, 0xF0); \
+b1 = _mm_unpackhi_epi64(m3, m4); \
+} while(0)
+
+
+#define LOAD_MSG_2_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m3); \
+b1 = _mm_alignr_epi8(m2, m0, 8); \
+} while(0)
+
+
+#define LOAD_MSG_3_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m3, m1); \
+b1 = _mm_unpackhi_epi64(m6, m5); \
+} while(0)
+
+
+#define LOAD_MSG_3_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m0); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_3_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m1, m2, 0xF0); \
+b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_3_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m3, m5); \
+b1 = _mm_unpacklo_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_4_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m2); \
+b1 = _mm_unpacklo_epi64(m1, m5); \
+} while(0)
+
+
+#define LOAD_MSG_4_2(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m0, m3, 0xF0); \
+b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_4_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m7, m5, 0xF0); \
+b1 = _mm_blend_epi16(m3, m1, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_4_4(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m6, m0, 8); \
+b1 = _mm_blend_epi16(m4, m6, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_5_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m1, m3); \
+b1 = _mm_unpacklo_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_5_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m5); \
+b1 = _mm_unpackhi_epi64(m5, m1); \
+} while(0)
+
+
+#define LOAD_MSG_5_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m2, m3, 0xF0); \
+b1 = _mm_unpackhi_epi64(m7, m0); \
+} while(0)
+
+
+#define LOAD_MSG_5_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m6, m2); \
+b1 = _mm_blend_epi16(m7, m4, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_6_1(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m6, m0, 0xF0); \
+b1 = _mm_unpacklo_epi64(m7, m2); \
+} while(0)
+
+
+#define LOAD_MSG_6_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m2, m7); \
+b1 = _mm_alignr_epi8(m5, m6, 8); \
+} while(0)
+
+
+#define LOAD_MSG_6_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m3); \
+b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
+} while(0)
+
+
+#define LOAD_MSG_6_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m3, m1); \
+b1 = _mm_blend_epi16(m1, m5, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_7_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m6, m3); \
+b1 = _mm_blend_epi16(m6, m1, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_7_2(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m7, m5, 8); \
+b1 = _mm_unpackhi_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_7_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m2, m7); \
+b1 = _mm_unpacklo_epi64(m4, m1); \
+} while(0)
+
+
+#define LOAD_MSG_7_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m2); \
+b1 = _mm_unpacklo_epi64(m3, m5); \
+} while(0)
+
+
+#define LOAD_MSG_8_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m3, m7); \
+b1 = _mm_alignr_epi8(m0, m5, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m7, m4); \
+b1 = _mm_alignr_epi8(m4, m1, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_3(b0, b1) \
+do \
+{ \
+b0 = m6; \
+b1 = _mm_alignr_epi8(m5, m0, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_4(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m1, m3, 0xF0); \
+b1 = m2; \
+} while(0)
+
+
+#define LOAD_MSG_9_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_unpackhi_epi64(m3, m0); \
+} while(0)
+
+
+#define LOAD_MSG_9_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m1, m2); \
+b1 = _mm_blend_epi16(m3, m2, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_9_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m7, m4); \
+b1 = _mm_unpackhi_epi64(m1, m6); \
+} while(0)
+
+
+#define LOAD_MSG_9_4(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m7, m5, 8); \
+b1 = _mm_unpacklo_epi64(m6, m0); \
+} while(0)
+
+
+#define LOAD_MSG_10_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m1); \
+b1 = _mm_unpacklo_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_10_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m0, m1); \
+b1 = _mm_unpackhi_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_10_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m5); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_10_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m5); \
+b1 = _mm_unpackhi_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_11_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m2); \
+b1 = _mm_unpackhi_epi64(m4, m6); \
+} while(0)
+
+
+#define LOAD_MSG_11_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_alignr_epi8(m3, m7, 8); \
+} while(0)
+
+
+#define LOAD_MSG_11_3(b0, b1) \
+do \
+{ \
+b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
+b1 = _mm_unpackhi_epi64(m5, m2); \
+} while(0)
+
+
+#define LOAD_MSG_11_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m1); \
+b1 = _mm_unpackhi_epi64(m3, m1); \
+} while(0)
+
+
+#endif
diff --git a/src/blake/blake2b-round.h b/src/blake/blake2b-round.h
new file mode 100644
index 0000000..5cafe79
--- /dev/null
+++ b/src/blake/blake2b-round.h
@@ -0,0 +1,170 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#pragma once
+#ifndef __BLAKE2B_ROUND_H__
+#define __BLAKE2B_ROUND_H__
+
+#define LOAD(p)  _mm_load_si128( (const __m128i *)(p) )
+#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
+
+#define LOADU(p)  _mm_loadu_si128( (const __m128i *)(p) )
+#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
+
+#define TOF(reg) _mm_castsi128_ps((reg))
+#define TOI(reg) _mm_castps_si128((reg))
+
+#define LIKELY(x) __builtin_expect((x),1)
+
+
+/* Microarchitecture-specific macros */
+#ifndef HAVE_XOP
+#ifdef HAVE_SSSE3
+#define _mm_roti_epi64(x, c) \
+    (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))  \
+    : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
+    : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
+    : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x)))  \
+    : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
+#else
+#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-c) ))
+#endif
+#else
+/* ... */
+#endif
+
+
+
+#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+  \
+  row4l = _mm_xor_si128(row4l, row1l); \
+  row4h = _mm_xor_si128(row4h, row1h); \
+  \
+  row4l = _mm_roti_epi64(row4l, -32); \
+  row4h = _mm_roti_epi64(row4h, -32); \
+  \
+  row3l = _mm_add_epi64(row3l, row4l); \
+  row3h = _mm_add_epi64(row3h, row4h); \
+  \
+  row2l = _mm_xor_si128(row2l, row3l); \
+  row2h = _mm_xor_si128(row2h, row3h); \
+  \
+  row2l = _mm_roti_epi64(row2l, -24); \
+  row2h = _mm_roti_epi64(row2h, -24); \
+
+#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+  \
+  row4l = _mm_xor_si128(row4l, row1l); \
+  row4h = _mm_xor_si128(row4h, row1h); \
+  \
+  row4l = _mm_roti_epi64(row4l, -16); \
+  row4h = _mm_roti_epi64(row4h, -16); \
+  \
+  row3l = _mm_add_epi64(row3l, row4l); \
+  row3h = _mm_add_epi64(row3h, row4h); \
+  \
+  row2l = _mm_xor_si128(row2l, row3l); \
+  row2h = _mm_xor_si128(row2h, row3h); \
+  \
+  row2l = _mm_roti_epi64(row2l, -63); \
+  row2h = _mm_roti_epi64(row2h, -63); \
+
+#if defined(HAVE_SSSE3)
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = _mm_alignr_epi8(row2h, row2l, 8); \
+  t1 = _mm_alignr_epi8(row2l, row2h, 8); \
+  row2l = t0; \
+  row2h = t1; \
+  \
+  t0 = row3l; \
+  row3l = row3h; \
+  row3h = t0;    \
+  \
+  t0 = _mm_alignr_epi8(row4h, row4l, 8); \
+  t1 = _mm_alignr_epi8(row4l, row4h, 8); \
+  row4l = t1; \
+  row4h = t0;
+
+#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = _mm_alignr_epi8(row2l, row2h, 8); \
+  t1 = _mm_alignr_epi8(row2h, row2l, 8); \
+  row2l = t0; \
+  row2h = t1; \
+  \
+  t0 = row3l; \
+  row3l = row3h; \
+  row3h = t0; \
+  \
+  t0 = _mm_alignr_epi8(row4l, row4h, 8); \
+  t1 = _mm_alignr_epi8(row4h, row4l, 8); \
+  row4l = t1; \
+  row4h = t0;
+#else
+
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = row4l;\
+  t1 = row2l;\
+  row4l = row3l;\
+  row3l = row3h;\
+  row3h = row4l;\
+  row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
+  row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
+  row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
+  row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
+
+#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = row3l;\
+  row3l = row3h;\
+  row3h = t0;\
+  t0 = row2l;\
+  t1 = row4l;\
+  row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
+  row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
+  row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
+  row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
+
+#endif
+
+#if defined(HAVE_SSE41)
+#include "blake2b-load-sse41.h"
+#else
+#include "blake2b-load-sse2.h"
+#endif
+
+#define ROUND(r) \
+  LOAD_MSG_ ##r ##_1(b0, b1); \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  LOAD_MSG_ ##r ##_2(b0, b1); \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+  LOAD_MSG_ ##r ##_3(b0, b1); \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  LOAD_MSG_ ##r ##_4(b0, b1); \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
+
+#endif
+
+#define BLAKE2_ROUND(row1l,row1h,row2l,row2h,row3l,row3h,row4l,row4h) \
+	G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+	G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+	\
+	DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+	\
+	G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+	G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+	\
+	UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
\ No newline at end of file
diff --git a/src/blake/blake2b.cpp b/src/blake/blake2b.cpp
new file mode 100644
index 0000000..5e27442
--- /dev/null
+++ b/src/blake/blake2b.cpp
@@ -0,0 +1,339 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+#include "blake2-config.h"
+
+#include <emmintrin.h>
+#if defined(HAVE_SSSE3)
+#include <tmmintrin.h>
+#endif
+#if defined(HAVE_SSE41)
+#include <smmintrin.h>
+#endif
+#if defined(HAVE_AVX)
+#include <immintrin.h>
+#endif
+#if defined(HAVE_XOP)
+#include <x86intrin.h>
+#endif
+
+#include "blake2b-round.h"
+
+ALIGN( 64 ) static const uint64_t blake2b_IV[8] =
+{
+  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/* init xors IV with input parameter block */
+int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
+{
+  //blake2b_init0( S );
+  const uint8_t * v = ( const uint8_t * )( blake2b_IV );
+  const uint8_t * p = ( const uint8_t * )( P );
+  uint8_t * h = ( uint8_t * )( S->h );
+  /* IV XOR ParamBlock */
+  memset( S, 0, sizeof( blake2b_state ) );
+
+  for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
+
+  return 0;
+}
+
+/* Some sort of default parameter block initialization, for sequential blake2b */
+int blake2b_init( blake2b_state *S, const uint8_t outlen )
+{
+  if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
+
+  const blake2b_param P =
+  {
+    outlen,
+    0,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    {0},
+    {0},
+    {0}
+  };
+  return blake2b_init_param( S, &P );
+}
+
+int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
+{
+  if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
+
+  if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
+
+  const blake2b_param P =
+  {
+    outlen,
+    keylen,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    {0},
+    {0},
+    {0}
+  };
+
+  if( blake2b_init_param( S, &P ) < 0 )
+    return 0;
+
+  {
+    uint8_t block[BLAKE2B_BLOCKBYTES];
+    memset( block, 0, BLAKE2B_BLOCKBYTES );
+    memcpy( block, key, keylen );
+    blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
+    secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
+  }
+  return 0;
+}
+
+static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
+{
+  __m128i row1l, row1h;
+  __m128i row2l, row2h;
+  __m128i row3l, row3h;
+  __m128i row4l, row4h;
+  __m128i b0, b1;
+  __m128i t0, t1;
+#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
+  const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
+  const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
+#endif
+#if defined(HAVE_SSE41)
+  const __m128i m0 = LOADU( block + 00 );
+  const __m128i m1 = LOADU( block + 16 );
+  const __m128i m2 = LOADU( block + 32 );
+  const __m128i m3 = LOADU( block + 48 );
+  const __m128i m4 = LOADU( block + 64 );
+  const __m128i m5 = LOADU( block + 80 );
+  const __m128i m6 = LOADU( block + 96 );
+  const __m128i m7 = LOADU( block + 112 );
+#else
+  const uint64_t  m0 = ( ( uint64_t * )block )[ 0];
+  const uint64_t  m1 = ( ( uint64_t * )block )[ 1];
+  const uint64_t  m2 = ( ( uint64_t * )block )[ 2];
+  const uint64_t  m3 = ( ( uint64_t * )block )[ 3];
+  const uint64_t  m4 = ( ( uint64_t * )block )[ 4];
+  const uint64_t  m5 = ( ( uint64_t * )block )[ 5];
+  const uint64_t  m6 = ( ( uint64_t * )block )[ 6];
+  const uint64_t  m7 = ( ( uint64_t * )block )[ 7];
+  const uint64_t  m8 = ( ( uint64_t * )block )[ 8];
+  const uint64_t  m9 = ( ( uint64_t * )block )[ 9];
+  const uint64_t m10 = ( ( uint64_t * )block )[10];
+  const uint64_t m11 = ( ( uint64_t * )block )[11];
+  const uint64_t m12 = ( ( uint64_t * )block )[12];
+  const uint64_t m13 = ( ( uint64_t * )block )[13];
+  const uint64_t m14 = ( ( uint64_t * )block )[14];
+  const uint64_t m15 = ( ( uint64_t * )block )[15];
+#endif
+  row1l = LOADU( &S->h[0] );
+  row1h = LOADU( &S->h[2] );
+  row2l = LOADU( &S->h[4] );
+  row2h = LOADU( &S->h[6] );
+  row3l = LOADU( &blake2b_IV[0] );
+  row3h = LOADU( &blake2b_IV[2] );
+  row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), _mm_set_epi32(0,0,0,S->counter) );
+  row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), _mm_set_epi32(0,0,0L-S->lastblock,0L-S->lastblock) );
+  ROUND( 0 );
+  ROUND( 1 );
+  ROUND( 2 );
+  ROUND( 3 );
+  ROUND( 4 );
+  ROUND( 5 );
+  ROUND( 6 );
+  ROUND( 7 );
+  ROUND( 8 );
+  ROUND( 9 );
+  ROUND( 10 );
+  ROUND( 11 );
+  row1l = _mm_xor_si128( row3l, row1l );
+  row1h = _mm_xor_si128( row3h, row1h );
+  STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
+  STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
+  row2l = _mm_xor_si128( row4l, row2l );
+  row2h = _mm_xor_si128( row4h, row2h );
+  STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
+  STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
+  return 0;
+}
+
+
+int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen )
+{
+  while( inlen > 0 )
+  {
+    size_t left = S->buflen;
+    size_t fill = BLAKE2B_BLOCKBYTES - left;
+
+    if( inlen > fill )
+    {
+      memcpy( S->buf + left, in, fill ); // Fill buffer
+      in += fill;
+      inlen -= fill;
+      S->counter += BLAKE2B_BLOCKBYTES;
+      blake2b_compress( S, S->buf ); // Compress
+      S->buflen = 0;
+    }
+    else // inlen <= fill
+    {
+      memcpy( S->buf + left, in, inlen );
+      S->buflen += (uint8_t)inlen; // not enough to compress
+      in += inlen;
+      inlen = 0;
+    }
+  }
+
+  return 0;
+}
+
+
+int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen )
+{
+  if( outlen > BLAKE2B_OUTBYTES )
+    return -1;
+
+  if( S->buflen > BLAKE2B_BLOCKBYTES )
+  {
+    S->counter += BLAKE2B_BLOCKBYTES;
+    blake2b_compress( S, S->buf );
+    S->buflen -= BLAKE2B_BLOCKBYTES;
+    memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
+  }
+
+  S->counter += S->buflen;
+  S->lastblock = 1;
+  memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
+  blake2b_compress( S, S->buf );
+  memcpy( out, &S->h[0], outlen );
+  S->lastblock = 0;
+  return 0;
+}
+
+
+int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
+{
+  blake2b_state S[1];
+
+  /* Verify parameters */
+  if ( NULL == in ) return -1;
+
+  if ( NULL == out ) return -1;
+
+  if( NULL == key ) keylen = 0;
+
+  if( keylen )
+  {
+    if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
+  }
+  else
+  {
+    if( blake2b_init( S, outlen ) < 0 ) return -1;
+  }
+
+  blake2b_update( S, ( const uint8_t * )in, inlen );
+  blake2b_final( S, out, outlen );
+  return 0;
+}
+
+#if defined(SUPERCOP)
+int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
+{
+  return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
+}
+#endif
+
+#if defined(BLAKE2B_SELFTEST)
+#include <string.h>
+#include "blake2-kat.h"
+int main( int argc, char **argv )
+{
+  uint8_t key[BLAKE2B_KEYBYTES];
+  uint8_t buf[KAT_LENGTH];
+
+  for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
+    key[i] = ( uint8_t )i;
+
+  for( size_t i = 0; i < KAT_LENGTH; ++i )
+    buf[i] = ( uint8_t )i;
+
+  for( size_t i = 0; i < KAT_LENGTH; ++i )
+  {
+    uint8_t hash[BLAKE2B_OUTBYTES];
+    blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
+
+    if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
+    {
+      puts( "error" );
+      return -1;
+    }
+  }
+
+  puts( "ok" );
+  return 0;
+}
+#endif
+
+int blake2b_long(uint8_t *out, const void *in, const uint32_t outlen, const uint64_t inlen)
+{
+	blake2b_state blake_state;
+	if (outlen <= BLAKE2B_OUTBYTES)
+	{
+		blake2b_init(&blake_state, (uint8_t)outlen);
+		blake2b_update(&blake_state, (const uint8_t*)&outlen, sizeof(uint32_t));
+		blake2b_update(&blake_state, (const uint8_t *)in, inlen);
+		blake2b_final(&blake_state, out, (uint8_t)outlen);
+	}
+	else
+	{
+		uint8_t out_buffer[BLAKE2B_OUTBYTES];
+		uint8_t in_buffer[BLAKE2B_OUTBYTES];
+		blake2b_init(&blake_state, BLAKE2B_OUTBYTES);
+		blake2b_update(&blake_state, (const uint8_t*)&outlen, sizeof(uint32_t));
+		blake2b_update(&blake_state, (const uint8_t *)in, inlen);
+		blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES);
+		memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+		out += BLAKE2B_OUTBYTES / 2;
+		uint32_t toproduce = outlen - BLAKE2B_OUTBYTES / 2;
+		while (toproduce > BLAKE2B_OUTBYTES)
+		{
+			memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+			blake2b(out_buffer, in_buffer, NULL, BLAKE2B_OUTBYTES, BLAKE2B_OUTBYTES, 0);
+			memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+			out += BLAKE2B_OUTBYTES / 2;
+			toproduce -= BLAKE2B_OUTBYTES / 2;
+		}
+		memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+		blake2b(out_buffer, in_buffer, NULL, (uint8_t)toproduce, BLAKE2B_OUTBYTES, 0);
+		memcpy(out, out_buffer, toproduce);
+
+	}
+	return 0;
+}
\ No newline at end of file
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..2a6341c
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2014 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_COMMON_H
+#define BITCOIN_CRYPTO_COMMON_H
+
+#if defined(HAVE_CONFIG_H)
+#include "bitcoin-config.h"
+#endif
+
+#include <stdint.h>
+#include <assert.h>
+
+#if defined(_MSC_VER) || defined(__APPLE__) || defined(__ANDROID__)
+#include "compat/endian.h"
+#endif
+
+uint16_t static inline ReadLE16(const unsigned char* ptr)
+{
+    return le16toh(*((uint16_t*)ptr));
+}
+
+uint32_t static inline ReadLE32(const unsigned char* ptr)
+{
+    return le32toh(*((uint32_t*)ptr));
+}
+
+uint64_t static inline ReadLE64(const unsigned char* ptr)
+{
+    return le64toh(*((uint64_t*)ptr));
+}
+
+void static inline WriteLE16(unsigned char* ptr, uint16_t x)
+{
+    *((uint16_t*)ptr) = htole16(x);
+}
+
+void static inline WriteLE32(unsigned char* ptr, uint32_t x)
+{
+    *((uint32_t*)ptr) = htole32(x);
+}
+
+void static inline WriteLE64(unsigned char* ptr, uint64_t x)
+{
+    *((uint64_t*)ptr) = htole64(x);
+}
+
+uint32_t static inline ReadBE32(const unsigned char* ptr)
+{
+    return be32toh(*((uint32_t*)ptr));
+}
+
+uint64_t static inline ReadBE64(const unsigned char* ptr)
+{
+    return be64toh(*((uint64_t*)ptr));
+}
+
+void static inline WriteBE32(unsigned char* ptr, uint32_t x)
+{
+    *((uint32_t*)ptr) = htobe32(x);
+}
+
+void static inline WriteBE64(unsigned char* ptr, uint64_t x)
+{
+    *((uint64_t*)ptr) = htobe64(x);
+}
+
+#endif // BITCOIN_CRYPTO_COMMON_H
\ No newline at end of file
diff --git a/src/compat/byteswap.h b/src/compat/byteswap.h
new file mode 100644
index 0000000..9f97f90
--- /dev/null
+++ b/src/compat/byteswap.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2014 The Bitcoin developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_COMPAT_BYTESWAP_H
+#define BITCOIN_COMPAT_BYTESWAP_H
+
+#if defined(HAVE_CONFIG_H)
+#include "config/bitcoin-config.h"
+#endif
+
+#include <stdint.h>
+
+#if defined(HAVE_BYTESWAP_H)
+#include <byteswap.h>
+#endif
+
+#if HAVE_DECL_BSWAP_16 == 0
+inline uint16_t bswap_16(uint16_t x)
+{
+    return (x >> 8) | ((x & 0x00ff) << 8);
+}
+#endif // HAVE_DECL_BSWAP16
+
+#if HAVE_DECL_BSWAP_32 == 0
+inline uint32_t bswap_32(uint32_t x)
+{
+    return (((x & 0xff000000U) >> 24) | ((x & 0x00ff0000U) >>  8) |
+            ((x & 0x0000ff00U) <<  8) | ((x & 0x000000ffU) << 24));
+}
+#endif // HAVE_DECL_BSWAP32
+
+#if HAVE_DECL_BSWAP_64 == 0
+inline uint64_t bswap_64(uint64_t x)
+{
+     return (((x & 0xff00000000000000ull) >> 56)
+          | ((x & 0x00ff000000000000ull) >> 40)
+          | ((x & 0x0000ff0000000000ull) >> 24)
+          | ((x & 0x000000ff00000000ull) >> 8)
+          | ((x & 0x00000000ff000000ull) << 8)
+          | ((x & 0x0000000000ff0000ull) << 24)
+          | ((x & 0x000000000000ff00ull) << 40)
+          | ((x & 0x00000000000000ffull) << 56));
+}
+#endif // HAVE_DECL_BSWAP64
+
+#endif // BITCOIN_COMPAT_BYTESWAP_H
\ No newline at end of file
diff --git a/src/compat/endian.h b/src/compat/endian.h
new file mode 100644
index 0000000..2422c17
--- /dev/null
+++ b/src/compat/endian.h
@@ -0,0 +1,196 @@
+// Copyright (c) 2014 The Bitcoin developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_COMPAT_ENDIAN_H
+#define BITCOIN_COMPAT_ENDIAN_H
+
+#if defined(HAVE_CONFIG_H)
+#include "config/bitcoin-config.h"
+#endif
+
+#include <stdint.h>
+
+#include "compat/byteswap.h"
+
+#if defined(HAVE_ENDIAN_H)
+#include <endian.h>
+#elif defined(HAVE_SYS_ENDIAN_H)
+#include <sys/endian.h>
+#endif
+
+#if defined(WORDS_BIGENDIAN)
+
+#if HAVE_DECL_HTOBE16 == 0
+inline uint16_t htobe16(uint16_t host_16bits)
+{
+    return host_16bits;
+}
+#endif // HAVE_DECL_HTOBE16
+
+#if HAVE_DECL_HTOLE16 == 0
+inline uint16_t htole16(uint16_t host_16bits)
+{
+    return bswap_16(host_16bits);
+}
+#endif // HAVE_DECL_HTOLE16
+
+#if HAVE_DECL_BE16TOH == 0
+inline uint16_t be16toh(uint16_t big_endian_16bits)
+{
+    return big_endian_16bits;
+}
+#endif // HAVE_DECL_BE16TOH
+
+#if HAVE_DECL_LE16TOH == 0
+inline uint16_t le16toh(uint16_t little_endian_16bits)
+{
+    return bswap_16(little_endian_16bits);
+}
+#endif // HAVE_DECL_LE16TOH
+
+#if HAVE_DECL_HTOBE32 == 0
+inline uint32_t htobe32(uint32_t host_32bits)
+{
+    return host_32bits;
+}
+#endif // HAVE_DECL_HTOBE32
+
+#if HAVE_DECL_HTOLE32 == 0
+inline uint32_t htole32(uint32_t host_32bits)
+{
+    return bswap_32(host_32bits);
+}
+#endif // HAVE_DECL_HTOLE32
+
+#if HAVE_DECL_BE32TOH == 0
+inline uint32_t be32toh(uint32_t big_endian_32bits)
+{
+    return big_endian_32bits;
+}
+#endif // HAVE_DECL_BE32TOH
+
+#if HAVE_DECL_LE32TOH == 0
+inline uint32_t le32toh(uint32_t little_endian_32bits)
+{
+    return bswap_32(little_endian_32bits);
+}
+#endif // HAVE_DECL_LE32TOH
+
+#if HAVE_DECL_HTOBE64 == 0
+inline uint64_t htobe64(uint64_t host_64bits)
+{
+    return host_64bits;
+}
+#endif // HAVE_DECL_HTOBE64
+
+#if HAVE_DECL_HTOLE64 == 0
+inline uint64_t htole64(uint64_t host_64bits)
+{
+    return bswap_64(host_64bits);
+}
+#endif // HAVE_DECL_HTOLE64
+
+#if HAVE_DECL_BE64TOH == 0
+inline uint64_t be64toh(uint64_t big_endian_64bits)
+{
+    return big_endian_64bits;
+}
+#endif // HAVE_DECL_BE64TOH
+
+#if HAVE_DECL_LE64TOH == 0
+inline uint64_t le64toh(uint64_t little_endian_64bits)
+{
+    return bswap_64(little_endian_64bits);
+}
+#endif // HAVE_DECL_LE64TOH
+
+#else // WORDS_BIGENDIAN
+
+#if HAVE_DECL_HTOBE16 == 0
+inline uint16_t htobe16(uint16_t host_16bits)
+{
+    return bswap_16(host_16bits);
+}
+#endif // HAVE_DECL_HTOBE16
+
+#if HAVE_DECL_HTOLE16 == 0
+inline uint16_t htole16(uint16_t host_16bits)
+{
+    return host_16bits;
+}
+#endif // HAVE_DECL_HTOLE16
+
+#if HAVE_DECL_BE16TOH == 0
+inline uint16_t be16toh(uint16_t big_endian_16bits)
+{
+    return bswap_16(big_endian_16bits);
+}
+#endif // HAVE_DECL_BE16TOH
+
+#if HAVE_DECL_LE16TOH == 0
+inline uint16_t le16toh(uint16_t little_endian_16bits)
+{
+    return little_endian_16bits;
+}
+#endif // HAVE_DECL_LE16TOH
+
+#if HAVE_DECL_HTOBE32 == 0
+inline uint32_t htobe32(uint32_t host_32bits)
+{
+    return bswap_32(host_32bits);
+}
+#endif // HAVE_DECL_HTOBE32
+
+#if HAVE_DECL_HTOLE32 == 0
+inline uint32_t htole32(uint32_t host_32bits)
+{
+    return host_32bits;
+}
+#endif // HAVE_DECL_HTOLE32
+
+#if HAVE_DECL_BE32TOH == 0
+inline uint32_t be32toh(uint32_t big_endian_32bits)
+{
+    return bswap_32(big_endian_32bits);
+}
+#endif // HAVE_DECL_BE32TOH
+
+#if HAVE_DECL_LE32TOH == 0
+inline uint32_t le32toh(uint32_t little_endian_32bits)
+{
+    return little_endian_32bits;
+}
+#endif // HAVE_DECL_LE32TOH
+
+#if HAVE_DECL_HTOBE64 == 0
+inline uint64_t htobe64(uint64_t host_64bits)
+{
+    return bswap_64(host_64bits);
+}
+#endif // HAVE_DECL_HTOBE64
+
+#if HAVE_DECL_HTOLE64 == 0
+inline uint64_t htole64(uint64_t host_64bits)
+{
+    return host_64bits;
+}
+#endif // HAVE_DECL_HTOLE64
+
+#if HAVE_DECL_BE64TOH == 0
+inline uint64_t be64toh(uint64_t big_endian_64bits)
+{
+    return bswap_64(big_endian_64bits);
+}
+#endif // HAVE_DECL_BE64TOH
+
+#if HAVE_DECL_LE64TOH == 0
+inline uint64_t le64toh(uint64_t little_endian_64bits)
+{
+    return little_endian_64bits;
+}
+#endif // HAVE_DECL_LE64TOH
+
+#endif // WORDS_BIGENDIAN
+
+#endif // BITCOIN_COMPAT_ENDIAN_H
\ No newline at end of file
diff --git a/src/equihashR.h b/src/equihashR.h
new file mode 100644
index 0000000..3e68344
--- /dev/null
+++ b/src/equihashR.h
@@ -0,0 +1,246 @@
+// Copyright (c) 2019 The Beam Team
+
+// Based on Reference Implementation of the Equihash Proof-of-Work algorithm.
+// Copyright (c) 2016 Jack Grigg
+// Copyright (c) 2016 The Zcash developers
+
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+// Resources:
+// Alex Biryukov and Dmitry Khovratovich
+// Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem
+// NDSS ’16, 21-24 February 2016, San Diego, CA, USA
+// https://www.internetsociety.org/sites/default/files/blogs-media/equihash-asymmetric-proof-of-work-based-generalized-birthday-problem.pdf
+
+#ifndef EQUIHASHR_H
+#define EQUIHASHR_H
+
+#include "blake/blake2.h"
+
+#include <cstring>
+#include <exception>
+#include <stdexcept>
+#include <functional>
+#include <memory>
+#include <set>
+#include <vector>
+
+#include "powScheme.h"
+
+typedef blake2b_state eh_HashState;
+typedef uint32_t eh_index;
+typedef uint8_t eh_trunc;
+
+void ExpandArray(const unsigned char* in, size_t in_len,
+                 unsigned char* out, size_t out_len,
+                 size_t bit_len, size_t byte_pad=0);
+void CompressArray(const unsigned char* in, size_t in_len,
+                   unsigned char* out, size_t out_len,
+                   size_t bit_len, size_t byte_pad=0);
+
+eh_index ArrayToEhIndex(const unsigned char* array);
+eh_trunc TruncateIndex(const eh_index i, const unsigned int ilen);
+
+std::vector<eh_index> GetIndicesFromMinimal(std::vector<unsigned char> minimal,
+                                            size_t cBitLen);
+std::vector<unsigned char> GetMinimalFromIndices(std::vector<eh_index> indices,
+                                                 size_t cBitLen);
+
+template<size_t WIDTH>
+class StepRow
+{
+    template<size_t W>
+    friend class StepRow;
+    friend class CompareSR;
+
+protected:
+    unsigned char hash[WIDTH];
+
+public:
+    StepRow(const unsigned char* hashIn, size_t hInLen,
+            size_t hLen, size_t cBitLen);
+    ~StepRow() { }
+
+    template<size_t W>
+    StepRow(const StepRow<W>& a);
+
+    bool IsZero(size_t len);
+
+    template<size_t W>
+    friend bool HasCollision(StepRow<W>& a, StepRow<W>& b, size_t l);
+};
+
+class CompareSR
+{
+private:
+    size_t len;
+
+public:
+    CompareSR(size_t l) : len {l} { }
+
+    template<size_t W>
+    inline bool operator()(const StepRow<W>& a, const StepRow<W>& b) { return memcmp(a.hash, b.hash, len) < 0; }
+};
+
+template<size_t WIDTH>
+bool HasCollision(StepRow<WIDTH>& a, StepRow<WIDTH>& b, size_t l);
+
+template<size_t WIDTH>
+class FullStepRow : public StepRow<WIDTH>
+{
+    template<size_t W>
+    friend class FullStepRow;
+
+    using StepRow<WIDTH>::hash;
+
+public:
+    FullStepRow(const unsigned char* hashIn, size_t hInLen,
+                size_t hLen, size_t cBitLen, eh_index i);
+    ~FullStepRow() { }
+
+    FullStepRow(const FullStepRow<WIDTH>& a) : StepRow<WIDTH> {a} { }
+    template<size_t W>
+    FullStepRow(const FullStepRow<W>& a, const FullStepRow<W>& b, size_t len, size_t lenIndices, size_t trim);
+    FullStepRow& operator=(const FullStepRow<WIDTH>& a);
+
+    inline bool IndicesBefore(const FullStepRow<WIDTH>& a, size_t len, size_t lenIndices) const { return memcmp(hash+len, a.hash+len, lenIndices) < 0; }
+    std::vector<unsigned char> GetIndices(size_t len, size_t lenIndices,
+                                          size_t cBitLen) const;
+
+    template<size_t W>
+    friend bool DistinctIndices(const FullStepRow<W>& a, const FullStepRow<W>& b,
+                                size_t len, size_t lenIndices);
+    template<size_t W>
+    friend bool IsValidBranch(const FullStepRow<W>& a, const size_t len, const unsigned int ilen, const eh_trunc t);
+};
+
+template<size_t WIDTH>
+class TruncatedStepRow : public StepRow<WIDTH>
+{
+    template<size_t W>
+    friend class TruncatedStepRow;
+
+    using StepRow<WIDTH>::hash;
+
+public:
+    TruncatedStepRow(const unsigned char* hashIn, size_t hInLen,
+                     size_t hLen, size_t cBitLen,
+                     eh_index i, unsigned int ilen);
+    ~TruncatedStepRow() { }
+
+    TruncatedStepRow(const TruncatedStepRow<WIDTH>& a) : StepRow<WIDTH> {a} { }
+    template<size_t W>
+    TruncatedStepRow(const TruncatedStepRow<W>& a, const TruncatedStepRow<W>& b, size_t len, size_t lenIndices, int trim);
+    TruncatedStepRow& operator=(const TruncatedStepRow<WIDTH>& a);
+
+    inline bool IndicesBefore(const TruncatedStepRow<WIDTH>& a, size_t len, size_t lenIndices) const { return memcmp(hash+len, a.hash+len, lenIndices) < 0; }
+    std::shared_ptr<eh_trunc> GetTruncatedIndices(size_t len, size_t lenIndices) const;
+};
+
+
+
+inline constexpr const size_t max(const size_t A, const size_t B) { return A > B ? A : B; }
+
+inline constexpr size_t beamhash_solution_size(unsigned int N, unsigned int K) {
+    return (1 << K)*(N/(K+1)+1)/8;
+}
+
+constexpr uint8_t GetSizeInBytes(size_t N)
+{
+    return static_cast<uint8_t>((N + 7) / 8);
+}
+
+
+
+template<size_t WIDTH>
+bool DistinctIndices(const FullStepRow<WIDTH>& a, const FullStepRow<WIDTH>& b, size_t len, size_t lenIndices)
+{
+    for(size_t i = 0; i < lenIndices; i += sizeof(eh_index)) {
+        for(size_t j = 0; j < lenIndices; j += sizeof(eh_index)) {
+            if (memcmp(a.hash+len+i, b.hash+len+j, sizeof(eh_index)) == 0) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template<size_t MAX_INDICES>
+bool IsProbablyDuplicate(std::shared_ptr<eh_trunc> indices, size_t lenIndices)
+{
+    bool checked_index[MAX_INDICES] = {false};
+    size_t count_checked = 0;
+    for (size_t z = 0; z < lenIndices; z++) {
+        // Skip over indices we have already paired
+        if (!checked_index[z]) {
+            for (size_t y = z+1; y < lenIndices; y++) {
+                if (!checked_index[y] && indices.get()[z] == indices.get()[y]) {
+                    // Pair found
+                    checked_index[y] = true;
+                    count_checked += 2;
+                    break;
+                }
+            }
+        }
+    }
+    return count_checked == lenIndices;
+}
+
+template<size_t WIDTH>
+bool IsValidBranch(const FullStepRow<WIDTH>& a, const size_t len, const unsigned int ilen, const eh_trunc t)
+{
+    return TruncateIndex(ArrayToEhIndex(a.hash+len), ilen) == t;
+}
+
+
+
+template<unsigned int N, unsigned int K, unsigned int R>
+class EquihashR : public PoWScheme
+{
+
+public:
+    enum : size_t { IndicesPerHashOutput=512/N };
+    enum : size_t { HashOutput = IndicesPerHashOutput * GetSizeInBytes(N) };
+    enum : size_t { CollisionBitLength=N/(K+1) };
+    enum : size_t { CollisionByteLength=(CollisionBitLength+7)/8 };
+    enum : size_t { HashLength=(K+1)*CollisionByteLength };
+    enum : size_t { FullWidth=2*CollisionByteLength+sizeof(eh_index)*(1 << (K-1)) };
+    enum : size_t { FinalFullWidth=2*CollisionByteLength+sizeof(eh_index)*(1 << (K)) };
+    enum : size_t { TruncatedWidth=max(HashLength+sizeof(eh_trunc), 2*CollisionByteLength+sizeof(eh_trunc)*(1 << (K-1))) };
+    enum : size_t { FinalTruncatedWidth=max(HashLength+sizeof(eh_trunc), 2*CollisionByteLength+sizeof(eh_trunc)*(1 << (K))) };
+    enum : size_t { SolutionWidth=(1 << K)*(CollisionBitLength+1)/8 };
+
+    EquihashR() { }
+
+    int InitialiseState(eh_HashState& base_state);
+    bool IsValidSolution(const eh_HashState& base_state, std::vector<unsigned char> soln);
+    bool OptimisedSolve(const eh_HashState& base_state,
+                        const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                        const std::function<bool(SolverCancelCheck)> cancelled);
+};
+
+static EquihashR<150,5,0> BeamHashI;
+static EquihashR<150,5,3> BeamHashII;
+
+
+#define EhRInitialiseState(n, k, r, base_state) \
+    if (n == 150 && k == 5 && r == 0) {         \
+        BeamHashI.InitialiseState(base_state); 	\
+    } else if (n == 150 && k == 5 && r == 3) { \
+        BeamHashII.InitialiseState(base_state);	\
+    } else {                                    \
+        throw std::invalid_argument("Unsupported Equihash parameters"); \
+    }
+
+#define EhRIsValidSolution(n, k, r, base_state, soln, ret) 	\
+    if (n == 150 && k == 5 && r == 0) {                 	\
+        ret = BeamHashI.IsValidSolution(base_state, soln); 	\
+    } else if (n == 150 && k == 5 && r == 3) {            	\
+        ret = BeamHashII.IsValidSolution(base_state, soln);	\
+    } else {                                             	\
+        throw std::invalid_argument("Unsupported Equihash parameters"); \
+    }
+
+
+#endif
\ No newline at end of file
diff --git a/src/equihashR_imp.cpp b/src/equihashR_imp.cpp
new file mode 100644
index 0000000..981c013
--- /dev/null
+++ b/src/equihashR_imp.cpp
@@ -0,0 +1,698 @@
+// Copyright (c) 2019 The Beam Team
+
+// Based on Reference Implementation of the Equihash Proof-of-Work algorithm.
+// Copyright (c) 2016 Jack Grigg
+// Copyright (c) 2016 The Zcash developers
+
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+// Resources:
+// Alex Biryukov and Dmitry Khovratovich
+// Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem
+// NDSS ’16, 21-24 February 2016, San Diego, CA, USA
+// https://www.internetsociety.org/sites/default/files/blogs-media/equihash-asymmetric-proof-of-work-based-generalized-birthday-problem.pdf
+
+#include "compat/endian.h"
+#include "equihashR.h"
+//#include "util.h"
+
+#include <algorithm>
+#include <iostream>
+#include <stdexcept>
+#include <boost/optional.hpp>
+
+SolverCancelledException solver_cancelled;
+
+namespace
+{
+    constexpr void ZeroizeUnusedBits(size_t N, size_t R, unsigned char* hash, size_t hLen)
+    {
+        uint8_t rem = N % 8;
+	const size_t step = GetSizeInBytes(N);
+
+        if (rem)
+        {
+            // clear lowest 8-rem bits
+            for (size_t i = step - 1; i < hLen; i += step) {
+                uint8_t b = 0xff << (8-rem);
+                hash[i] &= b;
+            }
+        }
+
+	if (R) {
+            for (size_t i = 0; i < hLen; i += step) {
+                uint8_t b = 0xff >> (2*R);
+                hash[i] &= b;
+            }
+	}
+    }
+}
+
+template<unsigned int N, unsigned int K, unsigned int R>
+int EquihashR<N,K,R>::InitialiseState(eh_HashState& base_state)
+{
+    uint32_t le_N = htole32(N);
+    uint32_t le_K = htole32(K);
+
+    unsigned char personalization[BLAKE2B_PERSONALBYTES] = {};
+    memcpy(personalization, "Beam-PoW", 8);
+    memcpy(personalization+8,  &le_N, 4);
+    memcpy(personalization+12, &le_K, 4);
+
+    const uint8_t outlen = (512 / N) * GetSizeInBytes(N);
+
+    //static_assert(!((!outlen) || (outlen > BLAKE2B_OUTBYTES)));
+
+    blake2b_param param = {0};
+    param.digest_length = outlen;
+    param.fanout = 1;
+    param.depth = 1;
+
+    memcpy(&param.personal, personalization, BLAKE2B_PERSONALBYTES);
+
+    return blake2b_init_param(&base_state, &param);
+}
+
+void GenerateHash(const eh_HashState& base_state, eh_index g,
+                  unsigned char* hash, size_t hLen, size_t N, size_t R )
+{
+
+
+    uint32_t myHash[16] = {0};
+    uint32_t startIndex = g & 0xFFFFFFF0;
+
+    for (uint32_t g2 = startIndex; g2 <= g; g2++) {
+	    uint32_t tmpHash[16] = {0};
+
+	    eh_HashState state;
+	    state = base_state;
+	    eh_index lei = htole32(g2);
+	    blake2b_update(&state, (const unsigned char*) &lei,
+		                              sizeof(eh_index));
+
+	    blake2b_final(&state, (unsigned char*)&tmpHash[0], static_cast<uint8_t>(hLen));
+
+	    for (uint32_t idx = 0; idx < 16; idx++) myHash[idx] += tmpHash[idx];
+    }
+
+    memcpy(hash, &myHash[0], hLen);
+    ZeroizeUnusedBits(N, R, hash, hLen);
+}
+
+void ExpandArray(const unsigned char* in, size_t in_len,
+                 unsigned char* out, size_t out_len,
+                 size_t bit_len, size_t byte_pad)
+{
+    assert(bit_len >= 8);
+    assert(8*sizeof(uint32_t) >= bit_len);
+
+    size_t out_width { (bit_len+7)/8 + byte_pad };
+    assert(out_len == 8*out_width*in_len/bit_len);
+
+    uint32_t bit_len_mask { ((uint32_t)1 << bit_len) - 1 };
+
+    // The acc_bits least-significant bits of acc_value represent a bit sequence
+    // in big-endian order.
+    size_t acc_bits = 0;
+    uint32_t acc_value = 0;
+
+    size_t j = 0;
+    for (size_t i = 0; i < in_len; i++) {
+        acc_value = (acc_value << 8) | in[i];
+        acc_bits += 8;
+
+        // When we have bit_len or more bits in the accumulator, write the next
+        // output element.
+        if (acc_bits >= bit_len) {
+            acc_bits -= bit_len;
+            for (size_t x = 0; x < byte_pad; x++) {
+                out[j+x] = 0;
+            }
+            for (size_t x = byte_pad; x < out_width; x++) {
+                out[j+x] = (
+                    // Big-endian
+                    acc_value >> (acc_bits+(8*(out_width-x-1)))
+                ) & (
+                    // Apply bit_len_mask across byte boundaries
+                    (bit_len_mask >> (8*(out_width-x-1))) & 0xFF
+                );
+            }
+            j += out_width;
+        }
+    }
+}
+
+void CompressArray(const unsigned char* in, size_t in_len,
+                   unsigned char* out, size_t out_len,
+                   size_t bit_len, size_t byte_pad)
+{
+    assert(bit_len >= 8);
+    assert(8*sizeof(uint32_t) >= bit_len);
+
+    size_t in_width { (bit_len+7)/8 + byte_pad };
+    assert(out_len == (bit_len*in_len/in_width + 7)/8);
+
+    uint32_t bit_len_mask { ((uint32_t)1 << bit_len) - 1 };
+
+    // The acc_bits least-significant bits of acc_value represent a bit sequence
+    // in big-endian order.
+    size_t acc_bits = 0;
+    uint32_t acc_value = 0;
+
+    size_t j = 0;
+    for (size_t i = 0; i < out_len; i++) {
+        // When we have fewer than 8 bits left in the accumulator, read the next
+        // input element.
+        if (acc_bits < 8) {
+            if (j < in_len) {
+            acc_value = acc_value << bit_len;
+            for (size_t x = byte_pad; x < in_width; x++) {
+                acc_value = acc_value | (
+                    (
+                        // Apply bit_len_mask across byte boundaries
+                            in[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) & 0xFF)
+                        ) << (8 * (in_width - x - 1))); // Big-endian
+            }
+            j += in_width;
+            acc_bits += bit_len;
+        }
+            else {
+                acc_value <<= 8 - acc_bits;
+                acc_bits += 8 - acc_bits;;
+            }
+        }
+
+        acc_bits -= 8;
+        out[i] = (acc_value >> acc_bits) & 0xFF;
+    }
+}
+
+// Big-endian so that lexicographic array comparison is equivalent to integer
+// comparison
+void EhIndexToArray(const eh_index i, unsigned char* array)
+{
+    //static_assert(sizeof(eh_index) == 4);
+    eh_index bei = htobe32(i);
+    memcpy(array, &bei, sizeof(eh_index));
+}
+
+// Big-endian so that lexicographic array comparison is equivalent to integer
+// comparison
+eh_index ArrayToEhIndex(const unsigned char* array)
+{
+    //static_assert(sizeof(eh_index) == 4);
+    eh_index bei;
+    memcpy(&bei, array, sizeof(eh_index));
+    return be32toh(bei);
+}
+
+eh_trunc TruncateIndex(const eh_index i, const unsigned int ilen)
+{
+    // Truncate to 8 bits
+    //static_assert(sizeof(eh_trunc) == 1);
+    return (i >> (ilen - 8)) & 0xff;
+}
+
+eh_index UntruncateIndex(const eh_trunc t, const eh_index r, const unsigned int ilen)
+{
+    eh_index i{t};
+    return (i << (ilen - 8)) | r;
+}
+
+std::vector<eh_index> GetIndicesFromMinimal(std::vector<unsigned char> minimal,
+                                            size_t cBitLen)
+{
+    assert(((cBitLen+1)+7)/8 <= sizeof(eh_index));
+    size_t lenIndices { 8*sizeof(eh_index)*minimal.size()/(cBitLen+1) };
+    size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 };
+    std::vector<unsigned char> array(lenIndices);
+    ExpandArray(minimal.data(), minimal.size(),
+                array.data(), lenIndices, cBitLen+1, bytePad);
+    std::vector<eh_index> ret;
+    for (size_t i = 0; i < lenIndices; i += sizeof(eh_index)) {
+        ret.push_back(ArrayToEhIndex(array.data()+i));
+    }
+    return ret;
+}
+
+std::vector<unsigned char> GetMinimalFromIndices(std::vector<eh_index> indices,
+                                                 size_t cBitLen)
+{
+    assert(((cBitLen+1)+7)/8 <= sizeof(eh_index));
+    size_t lenIndices { indices.size()*sizeof(eh_index) };
+    size_t minLen { (cBitLen+1)*lenIndices/(8*sizeof(eh_index)) };
+    size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 };
+    std::vector<unsigned char> array(lenIndices);
+    for (size_t i = 0; i < indices.size(); i++) {
+        EhIndexToArray(indices[i], array.data()+(i*sizeof(eh_index)));
+    }
+    std::vector<unsigned char> ret(minLen);
+    CompressArray(array.data(), lenIndices,
+                  ret.data(), minLen, cBitLen+1, bytePad);
+    return ret;
+}
+
+template<size_t WIDTH>
+StepRow<WIDTH>::StepRow(const unsigned char* hashIn, size_t hInLen,
+                        size_t hLen, size_t cBitLen)
+{
+    assert(hLen <= WIDTH);
+    ExpandArray(hashIn, hInLen, hash, hLen, cBitLen);
+}
+
+template<size_t WIDTH> template<size_t W>
+StepRow<WIDTH>::StepRow(const StepRow<W>& a)
+{
+    //static_assert(W <= WIDTH);
+    std::copy(a.hash, a.hash+W, hash);
+}
+
+template<size_t WIDTH>
+FullStepRow<WIDTH>::FullStepRow(const unsigned char* hashIn, size_t hInLen,
+                                size_t hLen, size_t cBitLen, eh_index i) :
+        StepRow<WIDTH> {hashIn, hInLen, hLen, cBitLen}
+{
+    EhIndexToArray(i, hash+hLen);
+}
+
+template<size_t WIDTH> template<size_t W>
+FullStepRow<WIDTH>::FullStepRow(const FullStepRow<W>& a, const FullStepRow<W>& b, size_t len, size_t lenIndices, size_t trim) :
+        StepRow<WIDTH> {a}
+{
+    assert(len+lenIndices <= W);
+    assert(len-trim+(2*lenIndices) <= WIDTH);
+    for (size_t i = trim; i < len; i++)
+        hash[i-trim] = a.hash[i] ^ b.hash[i];
+    if (a.IndicesBefore(b, len, lenIndices)) {
+        std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim);
+        std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim+lenIndices);
+    } else {
+        std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim);
+        std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim+lenIndices);
+    }
+}
+
+template<size_t WIDTH>
+FullStepRow<WIDTH>& FullStepRow<WIDTH>::operator=(const FullStepRow<WIDTH>& a)
+{
+    std::copy(a.hash, a.hash+WIDTH, hash);
+    return *this;
+}
+
+template<size_t WIDTH>
+bool StepRow<WIDTH>::IsZero(size_t len)
+{
+    // This doesn't need to be constant time.
+    for (size_t i = 0; i < len; i++) {
+        if (hash[i] != 0)
+            return false;
+    }
+    return true;
+}
+
+template<size_t WIDTH>
+std::vector<unsigned char> FullStepRow<WIDTH>::GetIndices(size_t len, size_t lenIndices,
+                                                          size_t cBitLen) const
+{
+    assert(((cBitLen+1)+7)/8 <= sizeof(eh_index));
+    size_t minLen { (cBitLen+1)*lenIndices/(8*sizeof(eh_index)) };
+    size_t bytePad { sizeof(eh_index) - ((cBitLen+1)+7)/8 };
+    std::vector<unsigned char> ret(minLen);
+    CompressArray(hash+len, lenIndices, ret.data(), minLen, cBitLen+1, bytePad);
+    return ret;
+}
+
+template<size_t WIDTH>
+bool HasCollision(StepRow<WIDTH>& a, StepRow<WIDTH>& b, size_t l)
+{
+    // This doesn't need to be constant time.
+    for (size_t j = 0; j < l; j++) {
+        if (a.hash[j] != b.hash[j])
+            return false;
+    }
+    return true;
+}
+
+template<size_t WIDTH>
+TruncatedStepRow<WIDTH>::TruncatedStepRow(const unsigned char* hashIn, size_t hInLen,
+                                          size_t hLen, size_t cBitLen,
+                                          eh_index i, unsigned int ilen) :
+        StepRow<WIDTH> {hashIn, hInLen, hLen, cBitLen}
+{
+    hash[hLen] = TruncateIndex(i, ilen);
+}
+
+template<size_t WIDTH> template<size_t W>
+TruncatedStepRow<WIDTH>::TruncatedStepRow(const TruncatedStepRow<W>& a, const TruncatedStepRow<W>& b, size_t len, size_t lenIndices, int trim) :
+        StepRow<WIDTH> {a}
+{
+    assert(len+lenIndices <= W);
+    assert(len-trim+(2*lenIndices) <= WIDTH);
+    for (size_t i = static_cast<size_t>(trim); i < len; i++)
+        hash[i-trim] = a.hash[i] ^ b.hash[i];
+    if (a.IndicesBefore(b, len, lenIndices)) {
+        std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim);
+        std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim+lenIndices);
+    } else {
+        std::copy(b.hash+len, b.hash+len+lenIndices, hash+len-trim);
+        std::copy(a.hash+len, a.hash+len+lenIndices, hash+len-trim+lenIndices);
+    }
+}
+
+template<size_t WIDTH>
+TruncatedStepRow<WIDTH>& TruncatedStepRow<WIDTH>::operator=(const TruncatedStepRow<WIDTH>& a)
+{
+    std::copy(a.hash, a.hash+WIDTH, hash);
+    return *this;
+}
+
+template<size_t WIDTH>
+std::shared_ptr<eh_trunc> TruncatedStepRow<WIDTH>::GetTruncatedIndices(size_t len, size_t lenIndices) const
+{
+    std::shared_ptr<eh_trunc> p (new eh_trunc[lenIndices], std::default_delete<eh_trunc[]>());
+    std::copy(hash+len, hash+len+lenIndices, p.get());
+    return p;
+}
+
+template<size_t WIDTH>
+void CollideBranches(std::vector<FullStepRow<WIDTH>>& X, const size_t hlen, const size_t lenIndices, const unsigned int clen, const unsigned int ilen, const eh_trunc lt, const eh_trunc rt)
+{
+    size_t i = 0;
+    size_t posFree = 0;
+    assert(X.size() > 0);
+    std::vector<FullStepRow<WIDTH>> Xc;
+    while (i < X.size() - 1) {
+        // 2b) Find next set of unordered pairs with collisions on the next n/(k+1) bits
+        size_t j = 1;
+        while (i+j < X.size() &&
+                HasCollision(X[i], X[i+j], clen)) {
+            j++;
+        }
+
+        // 2c) Calculate tuples (X_i ^ X_j, (i, j))
+        for (size_t l = 0; l < j - 1; l++) {
+            for (size_t m = l + 1; m < j; m++) {
+                if (DistinctIndices(X[i+l], X[i+m], hlen, lenIndices)) {
+                    if (IsValidBranch(X[i+l], hlen, ilen, lt) && IsValidBranch(X[i+m], hlen, ilen, rt)) {
+                        Xc.emplace_back(X[i+l], X[i+m], hlen, lenIndices, clen);
+                    } else if (IsValidBranch(X[i+m], hlen, ilen, lt) && IsValidBranch(X[i+l], hlen, ilen, rt)) {
+                        Xc.emplace_back(X[i+m], X[i+l], hlen, lenIndices, clen);
+                    }
+                }
+            }
+        }
+
+        // 2d) Store tuples on the table in-place if possible
+        while (posFree < i+j && Xc.size() > 0) {
+            X[posFree++] = Xc.back();
+            Xc.pop_back();
+        }
+
+        i += j;
+    }
+
+    // 2e) Handle edge case where final table entry has no collision
+    while (posFree < X.size() && Xc.size() > 0) {
+        X[posFree++] = Xc.back();
+        Xc.pop_back();
+    }
+
+    if (Xc.size() > 0) {
+        // 2f) Add overflow to end of table
+        X.insert(X.end(), Xc.begin(), Xc.end());
+    } else if (posFree < X.size()) {
+        // 2g) Remove empty space at the end
+        X.erase(X.begin()+posFree, X.end());
+        X.shrink_to_fit();
+    }
+}
+
+template<unsigned int N, unsigned int K, unsigned int R>
+bool EquihashR<N,K,R>::OptimisedSolve(const eh_HashState& base_state,
+                                   const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                                   const std::function<bool(SolverCancelCheck)> cancelled)
+{
+    eh_index init_size { 1U << (CollisionBitLength + 1 - R) };
+    eh_index recreate_size { UntruncateIndex(1, 0, CollisionBitLength + 1) };
+
+    // First run the algorithm with truncated indices
+
+    const eh_index soln_size { 1 << K };
+    std::vector<std::shared_ptr<eh_trunc>> partialSolns;
+    int invalidCount = 0;
+    {
+
+        // 1) Generate first list
+        size_t hashLen = HashLength;
+        size_t lenIndices = sizeof(eh_trunc);
+        std::vector<TruncatedStepRow<TruncatedWidth>> Xt;
+        Xt.reserve(init_size);
+        unsigned char tmpHash[HashOutput];
+        for (eh_index g = 0; Xt.size() < init_size; g++) {
+            GenerateHash(base_state, g, tmpHash, HashOutput, N, R);
+            for (eh_index i = 0; i < IndicesPerHashOutput && Xt.size() < init_size; i++) {
+                Xt.emplace_back(tmpHash+(i*GetSizeInBytes(N)), GetSizeInBytes(N), HashLength, CollisionBitLength,
+                    static_cast<eh_index>(g*IndicesPerHashOutput)+i, static_cast<unsigned int>(CollisionBitLength + 1));
+            }
+            if (cancelled(ListGeneration)) throw solver_cancelled;
+        }
+
+        // 3) Repeat step 2 until 2n/(k+1) bits remain
+        for (unsigned int r = 1; r < K && Xt.size() > 0; r++) {
+            // 2a) Sort the list
+            std::sort(Xt.begin(), Xt.end(), CompareSR(CollisionByteLength));
+            if (cancelled(ListSorting)) throw solver_cancelled;
+
+            size_t i = 0;
+            size_t posFree = 0;
+            std::vector<TruncatedStepRow<TruncatedWidth>> Xc;
+            while (i < Xt.size() - 1) {
+                // 2b) Find next set of unordered pairs with collisions on the next n/(k+1) bits
+                size_t j = 1;
+                while (i+j < Xt.size() &&
+                        HasCollision(Xt[i], Xt[i+j], CollisionByteLength)) {
+                    j++;
+                }
+
+                // 2c) Calculate tuples (X_i ^ X_j, (i, j))
+                //bool checking_for_zero = (i == 0 && Xt[0].IsZero(hashLen));
+                for (size_t l = 0; l < j - 1; l++) {
+                    for (size_t m = l + 1; m < j; m++) {
+                        // We truncated, so don't check for distinct indices here
+                        TruncatedStepRow<TruncatedWidth> Xi {Xt[i+l], Xt[i+m],
+                                                             hashLen, lenIndices,
+                                                             CollisionByteLength};
+                        if (!(Xi.IsZero(hashLen-CollisionByteLength) &&
+                              IsProbablyDuplicate<soln_size>(Xi.GetTruncatedIndices(hashLen-CollisionByteLength, 2*lenIndices),
+                                                             2*lenIndices))) {
+                            Xc.emplace_back(Xi);
+                        }
+                    }
+                }
+
+                // 2d) Store tuples on the table in-place if possible
+                while (posFree < i+j && Xc.size() > 0) {
+                    Xt[posFree++] = Xc.back();
+                    Xc.pop_back();
+                }
+
+                i += j;
+                if (cancelled(ListColliding)) throw solver_cancelled;
+            }
+
+            // 2e) Handle edge case where final table entry has no collision
+            while (posFree < Xt.size() && Xc.size() > 0) {
+                Xt[posFree++] = Xc.back();
+                Xc.pop_back();
+            }
+
+            if (Xc.size() > 0) {
+                // 2f) Add overflow to end of table
+                Xt.insert(Xt.end(), Xc.begin(), Xc.end());
+            } else if (posFree < Xt.size()) {
+                // 2g) Remove empty space at the end
+                Xt.erase(Xt.begin()+posFree, Xt.end());
+                Xt.shrink_to_fit();
+            }
+
+            hashLen -= CollisionByteLength;
+            lenIndices *= 2;
+            if (cancelled(RoundEnd)) throw solver_cancelled;
+        }
+
+        // k+1) Find a collision on last 2n(k+1) bits
+        if (Xt.size() > 1) {
+            std::sort(Xt.begin(), Xt.end(), CompareSR(hashLen));
+            if (cancelled(FinalSorting)) throw solver_cancelled;
+            size_t i = 0;
+            while (i < Xt.size() - 1) {
+                size_t j = 1;
+                while (i+j < Xt.size() &&
+                        HasCollision(Xt[i], Xt[i+j], hashLen)) {
+                    j++;
+                }
+
+                for (size_t l = 0; l < j - 1; l++) {
+                    for (size_t m = l + 1; m < j; m++) {
+                        TruncatedStepRow<FinalTruncatedWidth> res(Xt[i+l], Xt[i+m],
+                                                                  hashLen, lenIndices, 0);
+                        auto soln = res.GetTruncatedIndices(hashLen, 2*lenIndices);
+                        if (!IsProbablyDuplicate<soln_size>(soln, 2*lenIndices)) {
+                            partialSolns.push_back(soln);
+                        }
+                    }
+                }
+
+                i += j;
+                if (cancelled(FinalColliding)) throw solver_cancelled;
+            }
+        }
+
+    } // Ensure Xt goes out of scope and is destroyed
+
+
+    // Now for each solution run the algorithm again to recreate the indices
+    for (std::shared_ptr<eh_trunc> partialSoln : partialSolns) {
+        std::set<std::vector<unsigned char>> solns;
+        size_t hashLen;
+        size_t lenIndices;
+        unsigned char tmpHash[HashOutput];
+        std::vector<boost::optional<std::vector<FullStepRow<FinalFullWidth>>>> X;
+        X.reserve(K+1);
+
+        // 3) Repeat steps 1 and 2 for each partial index
+        for (eh_index i = 0; i < soln_size; i++) {
+            // 1) Generate first list of possibilities
+            std::vector<FullStepRow<FinalFullWidth>> icv;
+            icv.reserve(recreate_size);
+            for (eh_index j = 0; j < recreate_size; j++) {
+                eh_index newIndex { UntruncateIndex(partialSoln.get()[i], j, CollisionBitLength + 1) };
+                if (j == 0 || newIndex % IndicesPerHashOutput == 0) {
+                    GenerateHash(base_state, newIndex/IndicesPerHashOutput,
+                                 tmpHash, HashOutput, N, R);
+                }
+                icv.emplace_back(tmpHash+((newIndex % IndicesPerHashOutput) * GetSizeInBytes(N)),
+                                 GetSizeInBytes(N), HashLength, CollisionBitLength, newIndex);
+                if (cancelled(PartialGeneration)) throw solver_cancelled;
+            }
+            boost::optional<std::vector<FullStepRow<FinalFullWidth>>> ic = icv;
+
+            // 2a) For each pair of lists:
+            hashLen = HashLength;
+            lenIndices = sizeof(eh_index);
+            size_t rti = i;
+            for (size_t r = 0; r <= K; r++) {
+                // 2b) Until we are at the top of a subtree:
+                if (r < X.size()) {
+                    if (X[r]) {
+                        // 2c) Merge the lists
+                        ic->reserve(ic->size() + X[r]->size());
+                        ic->insert(ic->end(), X[r]->begin(), X[r]->end());
+                        std::sort(ic->begin(), ic->end(), CompareSR(hashLen));
+                        if (cancelled(PartialSorting)) throw solver_cancelled;
+                        size_t lti = rti-(static_cast<size_t>(1)<<r);
+                        CollideBranches(*ic, hashLen, lenIndices,
+                                        CollisionByteLength,
+                                        CollisionBitLength + 1,
+                                        partialSoln.get()[lti], partialSoln.get()[rti]);
+
+                        // 2d) Check if this has become an invalid solution
+                        if (ic->size() == 0)
+                            goto invalidsolution;
+
+                        X[r] = boost::none;
+                        hashLen -= CollisionByteLength;
+                        lenIndices *= 2;
+                        rti = lti;
+                    } else {
+                        X[r] = *ic;
+                        break;
+                    }
+                } else {
+                    X.push_back(ic);
+                    break;
+                }
+                if (cancelled(PartialSubtreeEnd)) throw solver_cancelled;
+            }
+            if (cancelled(PartialIndexEnd)) throw solver_cancelled;
+        }
+
+        // We are at the top of the tree
+        assert(X.size() == K+1);
+        for (FullStepRow<FinalFullWidth> row : *X[K]) {
+            auto soln = row.GetIndices(hashLen, lenIndices, CollisionBitLength);
+            assert(soln.size() == beamhash_solution_size(N, K));
+            solns.insert(soln);
+        }
+        for (auto soln : solns) {
+            if (validBlock(soln))
+                return true;
+        }
+        if (cancelled(PartialEnd)) throw solver_cancelled;
+        continue;
+
+invalidsolution:
+        invalidCount++;
+    }
+
+    return false;
+}
+
+template<unsigned int N, unsigned int K, unsigned int R>
+bool EquihashR<N,K,R>::IsValidSolution(const eh_HashState& base_state, std::vector<unsigned char> soln)
+{
+    if (soln.size() != SolutionWidth) {
+        return false;
+    }
+
+    std::vector<FullStepRow<FinalFullWidth>> X;
+    X.reserve(1 << K);
+    unsigned char tmpHash[HashOutput];
+    for (eh_index i : GetIndicesFromMinimal(soln, CollisionBitLength)) {
+	if (i >= (1U << (CollisionBitLength + 1 - R))) {
+            return false;
+	}
+        GenerateHash(base_state, i/IndicesPerHashOutput, tmpHash, HashOutput, N, R);
+        X.emplace_back(tmpHash+((i % IndicesPerHashOutput) * GetSizeInBytes(N)),
+                       GetSizeInBytes(N), HashLength, CollisionBitLength, i);
+    }
+
+    size_t hashLen = HashLength;
+    size_t lenIndices = sizeof(eh_index);
+    while (X.size() > 1) {
+        std::vector<FullStepRow<FinalFullWidth>> Xc;
+        for (size_t i = 0; i < X.size(); i += 2) {
+            if (!HasCollision(X[i], X[i+1], CollisionByteLength)) {
+                return false;
+            }
+            if (X[i+1].IndicesBefore(X[i], hashLen, lenIndices)) {
+                return false;
+            }
+            if (!DistinctIndices(X[i], X[i+1], hashLen, lenIndices)) {
+                return false;
+            }
+            Xc.emplace_back(X[i], X[i+1], hashLen, lenIndices, CollisionByteLength);
+        }
+        X = Xc;
+        hashLen -= CollisionByteLength;
+        lenIndices *= 2;
+    }
+
+    assert(X.size() == 1);
+    return X[0].IsZero(hashLen);
+}
+
+// Explicit instantiations for BeamHashI
+template int EquihashR<150,5,0>::InitialiseState(eh_HashState& base_state);
+template bool EquihashR<150,5,0>::IsValidSolution(const eh_HashState& base_state, std::vector<unsigned char> soln);
+template bool EquihashR<150,5,0>::OptimisedSolve(const eh_HashState& base_state,
+                                             const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                                             const std::function<bool(SolverCancelCheck)> cancelled);
+
+// Explicit instantiations for BeamHashII
+template int EquihashR<150,5,3>::InitialiseState(eh_HashState& base_state);
+template bool EquihashR<150,5,3>::IsValidSolution(const eh_HashState& base_state, std::vector<unsigned char> soln);
+template bool EquihashR<150,5,3>::OptimisedSolve(const eh_HashState& base_state,
+                                             const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                                             const std::function<bool(SolverCancelCheck)> cancelled);
diff --git a/src/powScheme.h b/src/powScheme.h
new file mode 100644
index 0000000..0164e38
--- /dev/null
+++ b/src/powScheme.h
@@ -0,0 +1,42 @@
+#include <vector>
+
+#ifndef POWSCHEME_H
+#define POWSCHEME_H
+
+#include "blake/blake2.h"
+
+
+enum SolverCancelCheck
+{
+    ListGeneration,
+    ListSorting,
+    ListColliding,
+    RoundEnd,
+    FinalSorting,
+    FinalColliding,
+    PartialGeneration,
+    PartialSorting,
+    PartialSubtreeEnd,
+    PartialIndexEnd,
+    PartialEnd,
+    MixElements
+};
+
+class SolverCancelledException : public std::exception
+{
+    virtual const char* what() const throw() {
+        return "BeamHash solver was cancelled";
+    }
+};
+
+
+class PoWScheme {
+public:
+    virtual int InitialiseState(blake2b_state& base_state) = 0;
+    virtual bool IsValidSolution(const blake2b_state& base_state, std::vector<unsigned char> soln) = 0;
+    virtual bool OptimisedSolve(const blake2b_state& base_state,
+                        const std::function<bool(const std::vector<unsigned char>&)> validBlock,
+                        const std::function<bool(SolverCancelCheck)> cancelled) = 0;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/sha256.h b/src/sha256.h
new file mode 100644
index 0000000..606cd71
--- /dev/null
+++ b/src/sha256.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2014 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_SHA256_H
+#define BITCOIN_CRYPTO_SHA256_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for SHA-256. */
+class CSHA256
+{
+public:
+    static const size_t OUTPUT_SIZE = 32;
+
+    CSHA256();
+    CSHA256& Write(const unsigned char* data, size_t len);
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+    void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE]) {
+    	FinalizeNoPadding(hash, true);
+    };
+    CSHA256& Reset();
+
+private:
+    uint32_t s[8];
+    unsigned char buf[64];
+    size_t bytes;
+    void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression);
+};
+
+#endif // BITCOIN_CRYPTO_SHA256_H
\ No newline at end of file
diff --git a/test.js b/test.js
new file mode 100644
index 0000000..2124718
--- /dev/null
+++ b/test.js
@@ -0,0 +1,43 @@
+const beamhash = require('./index');
+
+verify2Valid();
+verify2Invalid();
+
+
+function verify2Valid() {
+
+    console.log('Verify valid BeamHashII');
+
+    const inputBuf = Buffer.from('f08e259aa23e1f517393a9e8d4634f9893b4058bf474233a9102e94ffee5fbb6', 'hex');
+    const nonceBuf = Buffer.from('17dfb4348b000000', 'hex');
+    const outputBuf = Buffer.from('01eb634242b17097be184f8225097425449eaad1167b585a1ee2053f5ec67529c07abec863f2c10c979dc7b41b01c258f8733fdd01f78bc60a7dd08de06465cd86025c6946c6ded03300c419323f08158c872f6a9168bc1467317509929d4500807102a69c48f7f4', 'hex');
+
+    const isValid = beamhash.verify2(inputBuf, nonceBuf, outputBuf);
+
+    if (isValid) {
+        console.log('PASS');
+    }
+    else {
+        console.log('FAIL');
+        process.exit(-1);
+    }
+}
+
+function verify2Invalid() {
+
+    console.log('Verify invalid BeamHashII');
+
+    const inputBuf = Buffer.from('f08e259aa23e1f517393a9e8d4634f9893b4058bf474233a9102e94ffee5fbb6', 'hex');
+    const nonceBuf = Buffer.from('17dfb4348b000000', 'hex');
+    const outputBuf = Buffer.alloc(104, 0);
+
+    const isValid = beamhash.verify2(inputBuf, nonceBuf, outputBuf);
+
+    if (isValid) {
+        console.log('FAIL');
+        process.exit(-1);
+    }
+    else {
+        console.log('PASS');
+    }
+}
\ No newline at end of file