diff --git a/src/build_log.cc b/src/build_log.cc index 52c7c84f85..073d2fe81e 100644 --- a/src/build_log.cc +++ b/src/build_log.cc @@ -53,63 +53,14 @@ using namespace std; namespace { const char kFileSignature[] = "# ninja log v%d\n"; -const int kOldestSupportedVersion = 6; -const int kCurrentVersion = 6; - -// 64bit MurmurHash2, by Austin Appleby -#if defined(_MSC_VER) -#define BIG_CONSTANT(x) (x) -#else // defined(_MSC_VER) -#define BIG_CONSTANT(x) (x##LLU) -#endif // !defined(_MSC_VER) -inline -uint64_t MurmurHash64A(const void* key, size_t len) { - static const uint64_t seed = 0xDECAFBADDECAFBADull; - const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); - const int r = 47; - uint64_t h = seed ^ (len * m); - const unsigned char* data = static_cast(key); - while (len >= 8) { - uint64_t k; - memcpy(&k, data, sizeof k); - k *= m; - k ^= k >> r; - k *= m; - h ^= k; - h *= m; - data += 8; - len -= 8; - } - switch (len & 7) - { - case 7: h ^= uint64_t(data[6]) << 48; - NINJA_FALLTHROUGH; - case 6: h ^= uint64_t(data[5]) << 40; - NINJA_FALLTHROUGH; - case 5: h ^= uint64_t(data[4]) << 32; - NINJA_FALLTHROUGH; - case 4: h ^= uint64_t(data[3]) << 24; - NINJA_FALLTHROUGH; - case 3: h ^= uint64_t(data[2]) << 16; - NINJA_FALLTHROUGH; - case 2: h ^= uint64_t(data[1]) << 8; - NINJA_FALLTHROUGH; - case 1: h ^= uint64_t(data[0]); - h *= m; - }; - h ^= h >> r; - h *= m; - h ^= h >> r; - return h; -} -#undef BIG_CONSTANT - +const int kOldestSupportedVersion = 7; +const int kCurrentVersion = 7; } // namespace // static uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) { - return MurmurHash64A(command.str_, command.len_); + return rapidhash(command.str_, command.len_); } BuildLog::LogEntry::LogEntry(const string& output) diff --git a/src/build_log_test.cc b/src/build_log_test.cc index 12c2dc742c..630b1f1a92 100644 --- a/src/build_log_test.cc +++ b/src/build_log_test.cc @@ -104,7 +104,7 @@ TEST_F(BuildLogTest, FirstWriteAddsSignature) { TEST_F(BuildLogTest, DoubleEntry) { FILE* f = fopen(kTestFilename, "wb"); - fprintf(f, "# ninja log v6\n"); + fprintf(f, "# ninja log v7\n"); fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n", BuildLog::LogEntry::HashCommand("command abc")); fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n", @@ -177,7 +177,7 @@ TEST_F(BuildLogTest, ObsoleteOldVersion) { TEST_F(BuildLogTest, SpacesInOutput) { FILE* f = fopen(kTestFilename, "wb"); - fprintf(f, "# ninja log v6\n"); + fprintf(f, "# ninja log v7\n"); fprintf(f, "123\t456\t456\tout with space\t%" PRIx64 "\n", BuildLog::LogEntry::HashCommand("command")); fclose(f); @@ -200,10 +200,10 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) { // build log on Windows. This shouldn't crash, and the second version header // should be ignored. FILE* f = fopen(kTestFilename, "wb"); - fprintf(f, "# ninja log v6\n"); + fprintf(f, "# ninja log v7\n"); fprintf(f, "123\t456\t456\tout\t%" PRIx64 "\n", BuildLog::LogEntry::HashCommand("command")); - fprintf(f, "# ninja log v6\n"); + fprintf(f, "# ninja log v7\n"); fprintf(f, "456\t789\t789\tout2\t%" PRIx64 "\n", BuildLog::LogEntry::HashCommand("command2")); fclose(f); @@ -252,7 +252,7 @@ struct TestDiskInterface : public DiskInterface { TEST_F(BuildLogTest, Restat) { FILE* f = fopen(kTestFilename, "wb"); - fprintf(f, "# ninja log v6\n" + fprintf(f, "# ninja log v7\n" "1\t2\t3\tout\tcommand\n"); fclose(f); std::string err; @@ -280,7 +280,7 @@ TEST_F(BuildLogTest, VeryLongInputLine) { // Ninja's build log buffer is currently 256kB. Lines longer than that are // silently ignored, but don't affect parsing of other lines. FILE* f = fopen(kTestFilename, "wb"); - fprintf(f, "# ninja log v6\n"); + fprintf(f, "# ninja log v7\n"); fprintf(f, "123\t456\t456\tout\tcommand start"); for (size_t i = 0; i < (512 << 10) / strlen(" more_command"); ++i) fputs(" more_command", f); diff --git a/src/deps_log.cc b/src/deps_log.cc index ec0a4898a2..364d54b896 100644 --- a/src/deps_log.cc +++ b/src/deps_log.cc @@ -185,15 +185,13 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) { return LOAD_SUCCESS; } - long offset; + long offset = ftell(f); bool read_failed = false; int unique_dep_record_count = 0; int total_dep_record_count = 0; for (;;) { - offset = ftell(f); - unsigned size; - if (fread(&size, 4, 1, f) < 1) { + if (fread(&size, sizeof(size), 1, f) < 1) { if (!feof(f)) read_failed = true; break; @@ -205,6 +203,7 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) { read_failed = true; break; } + offset += size + sizeof(size); if (is_deps) { if ((size % 4) != 0) { diff --git a/src/eval_env.cc b/src/eval_env.cc index 796a3264d1..cbc935acc2 100644 --- a/src/eval_env.cc +++ b/src/eval_env.cc @@ -99,6 +99,10 @@ string BindingEnv::LookupWithFallback(const string& var, } string EvalString::Evaluate(Env* env) const { + if (parsed_.empty()) { + return single_token_; + } + string result; for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) { if (i->second == RAW) @@ -110,40 +114,57 @@ string EvalString::Evaluate(Env* env) const { } void EvalString::AddText(StringPiece text) { - // Add it to the end of an existing RAW token if possible. - if (!parsed_.empty() && parsed_.back().second == RAW) { - parsed_.back().first.append(text.str_, text.len_); + if (parsed_.empty()) { + single_token_.append(text.begin(), text.end()); + } else if (!parsed_.empty() && parsed_.back().second == RAW) { + parsed_.back().first.append(text.begin(), text.end()); } else { - parsed_.push_back(make_pair(text.AsString(), RAW)); + parsed_.push_back(std::make_pair(text.AsString(), RAW)); } } + void EvalString::AddSpecial(StringPiece text) { - parsed_.push_back(make_pair(text.AsString(), SPECIAL)); + if (parsed_.empty() && !single_token_.empty()) { + // Going from one to two tokens, so we can no longer apply + // our single_token_ optimization and need to push everything + // onto the vector. + parsed_.push_back(std::make_pair(std::move(single_token_), RAW)); + } + parsed_.push_back(std::make_pair(text.AsString(), SPECIAL)); } string EvalString::Serialize() const { string result; - for (TokenList::const_iterator i = parsed_.begin(); - i != parsed_.end(); ++i) { + if (parsed_.empty() && !single_token_.empty()) { result.append("["); - if (i->second == SPECIAL) - result.append("$"); - result.append(i->first); + result.append(single_token_); result.append("]"); + } else { + for (const auto& pair : parsed_) { + result.append("["); + if (pair.second == SPECIAL) + result.append("$"); + result.append(pair.first.begin(), pair.first.end()); + result.append("]"); + } } return result; } string EvalString::Unparse() const { string result; - for (TokenList::const_iterator i = parsed_.begin(); - i != parsed_.end(); ++i) { - bool special = (i->second == SPECIAL); - if (special) - result.append("${"); - result.append(i->first); - if (special) - result.append("}"); + if (parsed_.empty() && !single_token_.empty()) { + result.append(single_token_.begin(), single_token_.end()); + } else { + for (TokenList::const_iterator i = parsed_.begin(); + i != parsed_.end(); ++i) { + bool special = (i->second == SPECIAL); + if (special) + result.append("${"); + result.append(i->first.begin(), i->first.end()); + if (special) + result.append("}"); + } } return result; } diff --git a/src/eval_env.h b/src/eval_env.h index 677dc217a2..ae6d8bc898 100644 --- a/src/eval_env.h +++ b/src/eval_env.h @@ -39,8 +39,8 @@ struct EvalString { /// @return The string with variables not expanded. std::string Unparse() const; - void Clear() { parsed_.clear(); } - bool empty() const { return parsed_.empty(); } + void Clear() { parsed_.clear(); single_token_.clear(); } + bool empty() const { return parsed_.empty() && single_token_.empty(); } void AddText(StringPiece text); void AddSpecial(StringPiece text); @@ -53,6 +53,12 @@ struct EvalString { enum TokenType { RAW, SPECIAL }; typedef std::vector > TokenList; TokenList parsed_; + + // If we hold only a single RAW token, then we keep it here instead of + // pushing it on TokenList. This saves a bunch of allocations for + // what is a common case. If parsed_ is nonempty, then this value + // must be ignored. + std::string single_token_; }; /// An invocable build command and associated metadata (description, etc.). diff --git a/src/graph.cc b/src/graph.cc index f04ffb47c8..c1276daefb 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -740,12 +740,13 @@ bool ImplicitDepLoader::LoadDepsFromLog(Edge* edge, string* err) { return false; } - vector::iterator implicit_dep = - PreallocateSpace(edge, deps->node_count); - for (int i = 0; i < deps->node_count; ++i, ++implicit_dep) { - Node* node = deps->nodes[i]; - *implicit_dep = node; - node->AddOutEdge(edge); + Node** nodes = deps->nodes; + size_t node_count = deps->node_count; + edge->inputs_.insert(edge->inputs_.end() - edge->order_only_deps_, + nodes, nodes + node_count); + edge->implicit_deps_ += node_count; + for (size_t i = 0; i < node_count; ++i) { + nodes[i]->AddOutEdge(edge); } return true; } diff --git a/src/hash_map.h b/src/hash_map.h index 3f465338ac..4361c80a35 100644 --- a/src/hash_map.h +++ b/src/hash_map.h @@ -20,40 +20,8 @@ #include "string_piece.h" #include "util.h" -// MurmurHash2, by Austin Appleby -static inline -unsigned int MurmurHash2(const void* key, size_t len) { - static const unsigned int seed = 0xDECAFBAD; - const unsigned int m = 0x5bd1e995; - const int r = 24; - unsigned int h = seed ^ len; - const unsigned char* data = static_cast(key); - while (len >= 4) { - unsigned int k; - memcpy(&k, data, sizeof k); - k *= m; - k ^= k >> r; - k *= m; - h *= m; - h ^= k; - data += 4; - len -= 4; - } - switch (len) { - case 3: h ^= data[2] << 16; - NINJA_FALLTHROUGH; - case 2: h ^= data[1] << 8; - NINJA_FALLTHROUGH; - case 1: h ^= data[0]; - h *= m; - }; - h ^= h >> 13; - h *= m; - h ^= h >> 15; - return h; -} - -#include +#include "third_party/emhash/hash_table8.hpp" +#include "third_party/rapidhash/rapidhash.h" namespace std { template<> @@ -62,7 +30,7 @@ struct hash { typedef size_t result_type; size_t operator()(StringPiece key) const { - return MurmurHash2(key.str_, key.len_); + return rapidhash(key.str_, key.len_); } }; } @@ -73,7 +41,7 @@ struct hash { /// mapping StringPiece => Foo*. template struct ExternalStringHashMap { - typedef std::unordered_map Type; + typedef emhash8::HashMap Type; }; #endif // NINJA_MAP_H_ diff --git a/src/manifest_parser.cc b/src/manifest_parser.cc index c4b2980164..373dc65a83 100644 --- a/src/manifest_parser.cc +++ b/src/manifest_parser.cc @@ -209,14 +209,16 @@ bool ManifestParser::ParseDefault(string* err) { } bool ManifestParser::ParseEdge(string* err) { - vector ins, outs, validations; + ins_.clear(); + outs_.clear(); + validations_.clear(); { EvalString out; if (!lexer_.ReadPath(&out, err)) return false; while (!out.empty()) { - outs.push_back(out); + outs_.push_back(std::move(out)); out.Clear(); if (!lexer_.ReadPath(&out, err)) @@ -233,12 +235,12 @@ bool ManifestParser::ParseEdge(string* err) { return false; if (out.empty()) break; - outs.push_back(out); + outs_.push_back(std::move(out)); ++implicit_outs; } } - if (outs.empty()) + if (outs_.empty()) return lexer_.Error("expected path", err); if (!ExpectToken(Lexer::COLON, err)) @@ -259,7 +261,7 @@ bool ManifestParser::ParseEdge(string* err) { return false; if (in.empty()) break; - ins.push_back(in); + ins_.push_back(std::move(in)); } // Add all implicit deps, counting how many as we go. @@ -271,7 +273,7 @@ bool ManifestParser::ParseEdge(string* err) { return false; if (in.empty()) break; - ins.push_back(in); + ins_.push_back(std::move(in)); ++implicit; } } @@ -285,7 +287,7 @@ bool ManifestParser::ParseEdge(string* err) { return false; if (in.empty()) break; - ins.push_back(in); + ins_.push_back(std::move(in)); ++order_only; } } @@ -298,7 +300,7 @@ bool ManifestParser::ParseEdge(string* err) { return false; if (validation.empty()) break; - validations.push_back(validation); + validations_.push_back(std::move(validation)); } } @@ -329,9 +331,9 @@ bool ManifestParser::ParseEdge(string* err) { edge->pool_ = pool; } - edge->outputs_.reserve(outs.size()); - for (size_t i = 0, e = outs.size(); i != e; ++i) { - string path = outs[i].Evaluate(env); + edge->outputs_.reserve(outs_.size()); + for (size_t i = 0, e = outs_.size(); i != e; ++i) { + string path = outs_[i].Evaluate(env); if (path.empty()) return lexer_.Error("empty path", err); uint64_t slash_bits; @@ -351,8 +353,8 @@ bool ManifestParser::ParseEdge(string* err) { } edge->implicit_outs_ = implicit_outs; - edge->inputs_.reserve(ins.size()); - for (vector::iterator i = ins.begin(); i != ins.end(); ++i) { + edge->inputs_.reserve(ins_.size()); + for (vector::iterator i = ins_.begin(); i != ins_.end(); ++i) { string path = i->Evaluate(env); if (path.empty()) return lexer_.Error("empty path", err); @@ -363,9 +365,9 @@ bool ManifestParser::ParseEdge(string* err) { edge->implicit_deps_ = implicit; edge->order_only_deps_ = order_only; - edge->validations_.reserve(validations.size()); - for (std::vector::iterator v = validations.begin(); - v != validations.end(); ++v) { + edge->validations_.reserve(validations_.size()); + for (std::vector::iterator v = validations_.begin(); + v != validations_.end(); ++v) { string path = v->Evaluate(env); if (path.empty()) return lexer_.Error("empty path", err); @@ -419,14 +421,16 @@ bool ManifestParser::ParseFileInclude(bool new_scope, string* err) { return false; string path = eval.Evaluate(env_); - ManifestParser subparser(state_, file_reader_, options_); + if (subparser_ == nullptr) { + subparser_.reset(new ManifestParser(state_, file_reader_, options_)); + } if (new_scope) { - subparser.env_ = new BindingEnv(env_); + subparser_->env_ = new BindingEnv(env_); } else { - subparser.env_ = env_; + subparser_->env_ = env_; } - if (!subparser.Load(path, err, &lexer_)) + if (!subparser_->Load(path, err, &lexer_)) return false; if (!ExpectToken(Lexer::NEWLINE, err)) diff --git a/src/manifest_parser.h b/src/manifest_parser.h index db6812dce4..ce37759676 100644 --- a/src/manifest_parser.h +++ b/src/manifest_parser.h @@ -17,6 +17,9 @@ #include "parser.h" +#include +#include + struct BindingEnv; struct EvalString; @@ -63,6 +66,12 @@ struct ManifestParser : public Parser { BindingEnv* env_; ManifestParserOptions options_; bool quiet_; + + // ins_/out_/validations_ are reused across invocations to ParseEdge(), + // to save on the otherwise constant memory reallocation. + // subparser_ is reused solely to get better reuse out ins_/outs_/validation_. + std::unique_ptr subparser_; + std::vector ins_, outs_, validations_; }; #endif // NINJA_MANIFEST_PARSER_H_ diff --git a/src/string_piece.h b/src/string_piece.h index 1c0bee6e1c..7e7367c20a 100644 --- a/src/string_piece.h +++ b/src/string_piece.h @@ -63,6 +63,10 @@ struct StringPiece { return len_; } + size_t empty() const { + return len_ == 0; + } + const char* str_; size_t len_; }; diff --git a/src/third_party/emhash/README.ninja b/src/third_party/emhash/README.ninja new file mode 100644 index 0000000000..12ead4e545 --- /dev/null +++ b/src/third_party/emhash/README.ninja @@ -0,0 +1,8 @@ +Description: emhash8::HashMap for C++14/17 +Version: 1.6.5 (commit bdebddbdce1b473bbc189178fd523ef4a876ea01) +URL: https://github.com/ktprime/emhash +Copyright: Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com +SPDX-License-Identifier: MIT +Local changes: + - Added includes for _mm_prefetch on MinGW. + - Fixed some spelling errors to appease the linter. diff --git a/src/third_party/emhash/hash_table8.hpp b/src/third_party/emhash/hash_table8.hpp new file mode 100644 index 0000000000..36b7218381 --- /dev/null +++ b/src/third_party/emhash/hash_table8.hpp @@ -0,0 +1,1834 @@ +// emhash8::HashMap for C++14/17 +// version 1.6.5 +// https://github.com/ktprime/emhash/blob/master/hash_table8.hpp +// +// Licensed under the MIT License . +// SPDX-License-Identifier: MIT +// Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef EMH_NEW +#undef EMH_EMPTY + +// likely/unlikely +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +# define EMH_LIKELY(condition) __builtin_expect(condition, 1) +# define EMH_UNLIKELY(condition) __builtin_expect(condition, 0) +#else +# define EMH_LIKELY(condition) condition +# define EMH_UNLIKELY(condition) condition +#endif + +#define EMH_EMPTY(n) (0 > (int)(_index[n].next)) +#define EMH_EQHASH(n, key_hash) (((size_type)(key_hash) & ~_mask) == (_index[n].slot & ~_mask)) +//#define EMH_EQHASH(n, key_hash) ((size_type)(key_hash - _index[n].slot) & ~_mask) == 0 +#define EMH_NEW(key, val, bucket, key_hash) \ + new(_pairs + _num_filled) value_type(key, val); \ + _etail = bucket; \ + _index[bucket] = {bucket, _num_filled++ | ((size_type)(key_hash) & ~_mask)} + +#if _WIN32 && defined(_M_IX86) +#include +#endif + +namespace emhash8 { + +struct DefaultPolicy { + static constexpr float load_factor = 0.80f; + static constexpr float min_load_factor = 0.20f; + static constexpr size_t cacheline_size = 64U; +}; + +template, + typename EqT = std::equal_to, + typename Allocator = std::allocator>, //never used + typename Policy = DefaultPolicy> //never used +class HashMap +{ +#ifndef EMH_DEFAULT_LOAD_FACTOR + constexpr static float EMH_DEFAULT_LOAD_FACTOR = 0.80f; +#endif + constexpr static float EMH_MIN_LOAD_FACTOR = 0.25f; //< 0.5 + constexpr static uint32_t EMH_CACHE_LINE_SIZE = 64; //debug only + +public: + using htype = HashMap; + using value_type = std::pair; + using key_type = KeyT; + using mapped_type = ValueT; + //using dPolicy = Policy; + +#ifdef EMH_SMALL_TYPE + using size_type = uint16_t; +#elif EMH_SIZE_TYPE == 0 + using size_type = uint32_t; +#else + using size_type = size_t; +#endif + + using hasher = HashT; + using key_equal = EqT; + + constexpr static size_type INACTIVE = 0-1u; + //constexpr uint32_t END = 0-0x1u; + constexpr static size_type EAD = 2; + + struct Index + { + size_type next; + size_type slot; + }; + + class const_iterator; + class iterator + { + public: + using iterator_category = std::bidirectional_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = typename htype::value_type; + using pointer = value_type*; + using const_pointer = const value_type* ; + using reference = value_type&; + using const_reference = const value_type&; + + iterator() : kv_(nullptr) {} + iterator(const_iterator& cit) { + kv_ = cit.kv_; + } + + iterator(const htype* hash_map, size_type bucket) { + kv_ = hash_map->_pairs + (int)bucket; + } + + iterator& operator++() + { + kv_ ++; + return *this; + } + + iterator operator++(int) + { + auto cur = *this; kv_ ++; + return cur; + } + + iterator& operator--() + { + kv_ --; + return *this; + } + + iterator operator--(int) + { + auto cur = *this; kv_ --; + return cur; + } + + reference operator*() const { return *kv_; } + pointer operator->() const { return kv_; } + + bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; } + bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; } + bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; } + bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; } + + public: + value_type* kv_; + }; + + class const_iterator + { + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = typename htype::value_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + + const_iterator(const iterator& it) { + kv_ = it.kv_; + } + + const_iterator (const htype* hash_map, size_type bucket) { + kv_ = hash_map->_pairs + (int)bucket; + } + + const_iterator& operator++() + { + kv_ ++; + return *this; + } + + const_iterator operator++(int) + { + auto cur = *this; kv_ ++; + return cur; + } + + const_iterator& operator--() + { + kv_ --; + return *this; + } + + const_iterator operator--(int) + { + auto cur = *this; kv_ --; + return cur; + } + + const_reference operator*() const { return *kv_; } + const_pointer operator->() const { return kv_; } + + bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; } + bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; } + bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; } + bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; } + public: + const value_type* kv_; + }; + + void init(size_type bucket, float mlf = EMH_DEFAULT_LOAD_FACTOR) + { + _pairs = nullptr; + _index = nullptr; + _mask = _num_buckets = 0; + _num_filled = 0; + _mlf = (uint32_t)((1 << 27) / EMH_DEFAULT_LOAD_FACTOR); + max_load_factor(mlf); + rehash(bucket); + } + + HashMap(size_type bucket = 2, float mlf = EMH_DEFAULT_LOAD_FACTOR) + { + init(bucket, mlf); + } + + HashMap(const HashMap& rhs) + { + if (rhs.load_factor() > EMH_MIN_LOAD_FACTOR) { + _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4); + _index = alloc_index(rhs._num_buckets); + clone(rhs); + } else { + init(rhs._num_filled + 2, rhs.max_load_factor()); + for (auto it = rhs.begin(); it != rhs.end(); ++it) + insert_unique(it->first, it->second); + } + } + + HashMap(HashMap&& rhs) noexcept + { + init(0); + *this = std::move(rhs); + } + + HashMap(std::initializer_list ilist) + { + init((size_type)ilist.size()); + for (auto it = ilist.begin(); it != ilist.end(); ++it) + do_insert(*it); + } + + template + HashMap(InputIt first, InputIt last, size_type bucket_count=4) + { + init(std::distance(first, last) + bucket_count); + for (; first != last; ++first) + emplace(*first); + } + + HashMap& operator=(const HashMap& rhs) + { + if (this == &rhs) + return *this; + + if (rhs.load_factor() < EMH_MIN_LOAD_FACTOR) { + clear(); free(_pairs); _pairs = nullptr; + rehash(rhs._num_filled + 2); + for (auto it = rhs.begin(); it != rhs.end(); ++it) + insert_unique(it->first, it->second); + return *this; + } + + clearkv(); + + if (_num_buckets != rhs._num_buckets) { + free(_pairs); free(_index); + _index = alloc_index(rhs._num_buckets); + _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4); + } + + clone(rhs); + return *this; + } + + HashMap& operator=(HashMap&& rhs) noexcept + { + if (this != &rhs) { + swap(rhs); + rhs.clear(); + } + return *this; + } + + template + bool operator == (const Con& rhs) const + { + if (size() != rhs.size()) + return false; + + for (auto it = begin(), last = end(); it != last; ++it) { + auto oi = rhs.find(it->first); + if (oi == rhs.end() || it->second != oi->second) + return false; + } + return true; + } + + template + bool operator != (const Con& rhs) const { return !(*this == rhs); } + + ~HashMap() noexcept + { + clearkv(); + free(_pairs); + free(_index); + _index = nullptr; + _pairs = nullptr; + } + + void clone(const HashMap& rhs) + { + _hasher = rhs._hasher; +// _eq = rhs._eq; + _num_buckets = rhs._num_buckets; + _num_filled = rhs._num_filled; + _mlf = rhs._mlf; + _last = rhs._last; + _mask = rhs._mask; +#if EMH_HIGH_LOAD + _ehead = rhs._ehead; +#endif + _etail = rhs._etail; + + auto opairs = rhs._pairs; + memcpy((char*)_index, (char*)rhs._index, (_num_buckets + EAD) * sizeof(Index)); + + if (is_copy_trivially()) { + memcpy((char*)_pairs, (char*)opairs, _num_filled * sizeof(value_type)); + } else { + for (size_type slot = 0; slot < _num_filled; slot++) + new(_pairs + slot) value_type(opairs[slot]); + } + } + + void swap(HashMap& rhs) + { + // std::swap(_eq, rhs._eq); + std::swap(_hasher, rhs._hasher); + std::swap(_pairs, rhs._pairs); + std::swap(_index, rhs._index); + std::swap(_num_buckets, rhs._num_buckets); + std::swap(_num_filled, rhs._num_filled); + std::swap(_mask, rhs._mask); + std::swap(_mlf, rhs._mlf); + std::swap(_last, rhs._last); +#if EMH_HIGH_LOAD + std::swap(_ehead, rhs._ehead); +#endif + std::swap(_etail, rhs._etail); + } + + // ------------------------------------------------------------- + iterator first() const { return {this, 0}; } + iterator last() const { return {this, _num_filled - 1}; } + + value_type& front() { return _pairs[0]; } + const value_type& front() const { return _pairs[0]; } + value_type& back() { return _pairs[_num_filled - 1]; } + const value_type& back() const { return _pairs[_num_filled - 1]; } + + void pop_front() { erase(begin()); } //TODO. only erase first without move last + void pop_back() { erase(last()); } + + iterator begin() { return first(); } + const_iterator cbegin() const { return first(); } + const_iterator begin() const { return first(); } + + iterator end() { return {this, _num_filled}; } + const_iterator cend() const { return {this, _num_filled}; } + const_iterator end() const { return cend(); } + + const value_type* values() const { return _pairs; } + const Index* index() const { return _index; } + + size_type size() const { return _num_filled; } + bool empty() const { return _num_filled == 0; } + size_type bucket_count() const { return _num_buckets; } + + /// Returns average number of elements per bucket. + float load_factor() const { return static_cast(_num_filled) / (_mask + 1); } + + HashT& hash_function() const { return _hasher; } + EqT& key_eq() const { return _eq; } + + void max_load_factor(float mlf) + { + if (mlf < 0.992 && mlf > EMH_MIN_LOAD_FACTOR) { + _mlf = (uint32_t)((1 << 27) / mlf); + if (_num_buckets > 0) rehash(_num_buckets); + } + } + + constexpr float max_load_factor() const { return (1 << 27) / (float)_mlf; } + constexpr size_type max_size() const { return (1ull << (sizeof(size_type) * 8 - 1)); } + constexpr size_type max_bucket_count() const { return max_size(); } + +#if EMH_STATIS + //Returns the bucket number where the element with key k is located. + size_type bucket(const KeyT& key) const + { + const auto bucket = hash_bucket(key); + const auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return 0; + else if (bucket == next_bucket) + return bucket + 1; + + return hash_main(bucket) + 1; + } + + //Returns the number of elements in bucket n. + size_type bucket_size(const size_type bucket) const + { + auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return 0; + + next_bucket = hash_main(bucket); + size_type ibucket_size = 1; + + //iterator each item in current main bucket + while (true) { + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) { + break; + } + ibucket_size ++; + next_bucket = nbucket; + } + return ibucket_size; + } + + size_type get_main_bucket(const size_type bucket) const + { + auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return INACTIVE; + + return hash_main(bucket); + } + + size_type get_diss(size_type bucket, size_type next_bucket, const size_type slots) const + { + auto pbucket = reinterpret_cast(&_pairs[bucket]); + auto pnext = reinterpret_cast(&_pairs[next_bucket]); + if (pbucket / EMH_CACHE_LINE_SIZE == pnext / EMH_CACHE_LINE_SIZE) + return 0; + size_type diff = pbucket > pnext ? (pbucket - pnext) : (pnext - pbucket); + if (diff / EMH_CACHE_LINE_SIZE < slots - 1) + return diff / EMH_CACHE_LINE_SIZE + 1; + return slots - 1; + } + + int get_bucket_info(const size_type bucket, size_type steps[], const size_type slots) const + { + auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return -1; + + const auto main_bucket = hash_main(bucket); + if (next_bucket == main_bucket) + return 1; + else if (main_bucket != bucket) + return 0; + + steps[get_diss(bucket, next_bucket, slots)] ++; + size_type ibucket_size = 2; + //find a empty and linked it to tail + while (true) { + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) + break; + + steps[get_diss(nbucket, next_bucket, slots)] ++; + ibucket_size ++; + next_bucket = nbucket; + } + return (int)ibucket_size; + } + + void dump_statics() const + { + const size_type slots = 128; + size_type buckets[slots + 1] = {0}; + size_type steps[slots + 1] = {0}; + for (size_type bucket = 0; bucket < _num_buckets; ++bucket) { + auto bsize = get_bucket_info(bucket, steps, slots); + if (bsize > 0) + buckets[bsize] ++; + } + + size_type sumb = 0, collision = 0, sumc = 0, finds = 0, sumn = 0; + puts("============== buckets size ration ========="); + for (size_type i = 0; i < sizeof(buckets) / sizeof(buckets[0]); i++) { + const auto bucketsi = buckets[i]; + if (bucketsi == 0) + continue; + sumb += bucketsi; + sumn += bucketsi * i; + collision += bucketsi * (i - 1); + finds += bucketsi * i * (i + 1) / 2; + printf(" %2u %8u %2.2lf| %.2lf\n", i, bucketsi, bucketsi * 100.0 * i / _num_filled, sumn * 100.0 / _num_filled); + } + + puts("========== collision miss ration ==========="); + for (size_type i = 0; i < sizeof(steps) / sizeof(steps[0]); i++) { + sumc += steps[i]; + if (steps[i] <= 2) + continue; + printf(" %2u %8u %.2lf %.2lf\n", i, steps[i], steps[i] * 100.0 / collision, sumc * 100.0 / collision); + } + + if (sumb == 0) return; + printf(" _num_filled/bucket_size/packed collision/cache_miss/hit_find = %u/%.2lf/%zd/ %.2lf%%/%.2lf%%/%.2lf\n", + _num_filled, _num_filled * 1.0 / sumb, sizeof(value_type), (collision * 100.0 / _num_filled), (collision - steps[0]) * 100.0 / _num_filled, finds * 1.0 / _num_filled); + assert(sumn == _num_filled); + assert(sumc == collision); + puts("============== buckets size end ============="); + } +#endif + + void pack_zero(ValueT zero) + { + _pairs[_num_filled] = {KeyT(), zero}; + } + + // ------------------------------------------------------------ + template + iterator find(const K& key) noexcept + { + return {this, find_filled_slot(key)}; + } + + template + const_iterator find(const K& key) const noexcept + { + return {this, find_filled_slot(key)}; + } + + template + ValueT& at(const K& key) + { + const auto slot = find_filled_slot(key); + //throw + return _pairs[slot].second; + } + + template + const ValueT& at(const K& key) const + { + const auto slot = find_filled_slot(key); + //throw + return _pairs[slot].second; + } + + const ValueT& index(const uint32_t index) const + { + return _pairs[index].second; + } + + ValueT& index(const uint32_t index) + { + return _pairs[index].second; + } + + template + bool contains(const K& key) const noexcept + { + return find_filled_slot(key) != _num_filled; + } + + template + size_type count(const K& key) const noexcept + { + return find_filled_slot(key) == _num_filled ? 0 : 1; + //return find_sorted_bucket(key) == END ? 0 : 1; + //return find_hash_bucket(key) == END ? 0 : 1; + } + + template + std::pair equal_range(const K& key) + { + const auto found = find(key); + if (found.second == _num_filled) + return { found, found }; + else + return { found, std::next(found) }; + } + + void merge(HashMap& rhs) + { + if (empty()) { + *this = std::move(rhs); + return; + } + + for (auto rit = rhs.begin(); rit != rhs.end(); ) { + auto fit = find(rit->first); + if (fit == end()) { + insert_unique(rit->first, std::move(rit->second)); + rit = rhs.erase(rit); + } else { + ++rit; + } + } + } + + /// Returns the matching ValueT or nullptr if k isn't found. + bool try_get(const KeyT& key, ValueT& val) const noexcept + { + const auto slot = find_filled_slot(key); + const auto found = slot != _num_filled; + if (found) { + val = _pairs[slot].second; + } + return found; + } + + /// Returns the matching ValueT or nullptr if k isn't found. + ValueT* try_get(const KeyT& key) noexcept + { + const auto slot = find_filled_slot(key); + return slot != _num_filled ? &_pairs[slot].second : nullptr; + } + + /// Const version of the above + ValueT* try_get(const KeyT& key) const noexcept + { + const auto slot = find_filled_slot(key); + return slot != _num_filled ? &_pairs[slot].second : nullptr; + } + + /// set value if key exist + bool try_set(const KeyT& key, const ValueT& val) noexcept + { + const auto slot = find_filled_slot(key); + if (slot == _num_filled) + return false; + + _pairs[slot].second = val; + return true; + } + + /// set value if key exist + bool try_set(const KeyT& key, ValueT&& val) noexcept + { + const auto slot = find_filled_slot(key); + if (slot == _num_filled) + return false; + + _pairs[slot].second = std::move(val); + return true; + } + + /// Convenience function. + ValueT get_or_return_default(const KeyT& key) const noexcept + { + const auto slot = find_filled_slot(key); + return slot == _num_filled ? ValueT() : _pairs[slot].second; + } + + // ----------------------------------------------------- + std::pair do_insert(const value_type& value) noexcept + { + const auto key_hash = hash_key(value.first); + const auto bucket = find_or_allocate(value.first, key_hash); + const auto bempty = EMH_EMPTY(bucket); + if (bempty) { + EMH_NEW(value.first, value.second, bucket, key_hash); + } + + const auto slot = _index[bucket].slot & _mask; + return { {this, slot}, bempty }; + } + + std::pair do_insert(value_type&& value) noexcept + { + const auto key_hash = hash_key(value.first); + const auto bucket = find_or_allocate(value.first, key_hash); + const auto bempty = EMH_EMPTY(bucket); + if (bempty) { + EMH_NEW(std::move(value.first), std::move(value.second), bucket, key_hash); + } + + const auto slot = _index[bucket].slot & _mask; + return { {this, slot}, bempty }; + } + + template + std::pair do_insert(K&& key, V&& val) noexcept + { + const auto key_hash = hash_key(key); + const auto bucket = find_or_allocate(key, key_hash); + const auto bempty = EMH_EMPTY(bucket); + if (bempty) { + EMH_NEW(std::forward(key), std::forward(val), bucket, key_hash); + } + + const auto slot = _index[bucket].slot & _mask; + return { {this, slot}, bempty }; + } + + template + std::pair do_assign(K&& key, V&& val) noexcept + { + check_expand_need(); + const auto key_hash = hash_key(key); + const auto bucket = find_or_allocate(key, key_hash); + const auto bempty = EMH_EMPTY(bucket); + if (bempty) { + EMH_NEW(std::forward(key), std::forward(val), bucket, key_hash); + } else { + _pairs[_index[bucket].slot & _mask].second = std::move(val); + } + + const auto slot = _index[bucket].slot & _mask; + return { {this, slot}, bempty }; + } + + std::pair insert(const value_type& p) + { + check_expand_need(); + return do_insert(p); + } + + std::pair insert(value_type && p) + { + check_expand_need(); + return do_insert(std::move(p)); + } + + void insert(std::initializer_list ilist) + { + reserve(ilist.size() + _num_filled, false); + for (auto it = ilist.begin(); it != ilist.end(); ++it) + do_insert(*it); + } + + template + void insert(Iter first, Iter last) + { + reserve(std::distance(first, last) + _num_filled, false); + for (; first != last; ++first) + do_insert(first->first, first->second); + } + +#if 0 + template + void insert_unique(Iter begin, Iter end) + { + reserve(std::distance(begin, end) + _num_filled, false); + for (; begin != end; ++begin) { + insert_unique(*begin); + } + } +#endif + + template + size_type insert_unique(K&& key, V&& val) + { + check_expand_need(); + const auto key_hash = hash_key(key); + auto bucket = find_unique_bucket(key_hash); + EMH_NEW(std::forward(key), std::forward(val), bucket, key_hash); + return bucket; + } + + size_type insert_unique(value_type&& value) + { + return insert_unique(std::move(value.first), std::move(value.second)); + } + + size_type insert_unique(const value_type& value) + { + return insert_unique(value.first, value.second); + } + + template + std::pair emplace(Args&&... args) noexcept + { + check_expand_need(); + return do_insert(std::forward(args)...); + } + + //no any optimize for position + template + iterator emplace_hint(const_iterator hint, Args&&... args) + { + (void)hint; + check_expand_need(); + return do_insert(std::forward(args)...).first; + } + + template + std::pair try_emplace(const KeyT& k, Args&&... args) + { + check_expand_need(); + return do_insert(k, std::forward(args)...); + } + + template + std::pair try_emplace(KeyT&& k, Args&&... args) + { + check_expand_need(); + return do_insert(std::move(k), std::forward(args)...); + } + + template + size_type emplace_unique(Args&&... args) + { + return insert_unique(std::forward(args)...); + } + + std::pair insert_or_assign(const KeyT& key, ValueT&& val) { return do_assign(key, std::forward(val)); } + std::pair insert_or_assign(KeyT&& key, ValueT&& val) { return do_assign(std::move(key), std::forward(val)); } + + /// Return the old value or ValueT() if it didn't exist. + ValueT set_get(const KeyT& key, const ValueT& val) + { + check_expand_need(); + const auto key_hash = hash_key(key); + const auto bucket = find_or_allocate(key, key_hash); + if (EMH_EMPTY(bucket)) { + EMH_NEW(key, val, bucket, key_hash); + return ValueT(); + } else { + const auto slot = _index[bucket].slot & _mask; + ValueT old_value(val); + std::swap(_pairs[slot].second, old_value); + return old_value; + } + } + + /// Like std::map::operator[]. + ValueT& operator[](const KeyT& key) noexcept + { + check_expand_need(); + const auto key_hash = hash_key(key); + const auto bucket = find_or_allocate(key, key_hash); + if (EMH_EMPTY(bucket)) { + /* Check if inserting a value rather than overwriting an old entry */ + EMH_NEW(key, std::move(ValueT()), bucket, key_hash); + } + + const auto slot = _index[bucket].slot & _mask; + return _pairs[slot].second; + } + + ValueT& operator[](KeyT&& key) noexcept + { + check_expand_need(); + const auto key_hash = hash_key(key); + const auto bucket = find_or_allocate(key, key_hash); + if (EMH_EMPTY(bucket)) { + EMH_NEW(std::move(key), std::move(ValueT()), bucket, key_hash); + } + + const auto slot = _index[bucket].slot & _mask; + return _pairs[slot].second; + } + + /// Erase an element from the hash table. + /// return 0 if element was not found + size_type erase(const KeyT& key) noexcept + { + const auto key_hash = hash_key(key); + const auto sbucket = find_filled_bucket(key, key_hash); + if (sbucket == INACTIVE) + return 0; + + const auto main_bucket = key_hash & _mask; + erase_slot(sbucket, (size_type)main_bucket); + return 1; + } + + //iterator erase(const_iterator begin_it, const_iterator end_it) + iterator erase(const const_iterator& cit) noexcept + { + const auto slot = (size_type)(cit.kv_ - _pairs); + size_type main_bucket; + const auto sbucket = find_slot_bucket(slot, main_bucket); //TODO + erase_slot(sbucket, main_bucket); + return {this, slot}; + } + + //only last >= first + iterator erase(const_iterator first, const_iterator last) noexcept + { + auto esize = long(last.kv_ - first.kv_); + auto tsize = long((_pairs + _num_filled) - last.kv_); //last to tail size + auto next = first; + while (tsize -- > 0) { + if (esize-- <= 0) + break; + next = ++erase(next); + } + + //fast erase from last + next = this->last(); + while (esize -- > 0) + next = --erase(next); + + return {this, size_type(next.kv_ - _pairs)}; + } + + template + size_type erase_if(Pred pred) + { + auto old_size = size(); + for (auto it = begin(); it != end();) { + if (pred(*it)) + it = erase(it); + else + ++it; + } + return old_size - size(); + } + + static constexpr bool is_triviall_destructable() + { +#if __cplusplus >= 201402L || _MSC_VER > 1600 + return !(std::is_trivially_destructible::value && std::is_trivially_destructible::value); +#else + return !(std::is_pod::value && std::is_pod::value); +#endif + } + + static constexpr bool is_copy_trivially() + { +#if __cplusplus >= 201103L || _MSC_VER > 1600 + return (std::is_trivially_copyable::value && std::is_trivially_copyable::value); +#else + return (std::is_pod::value && std::is_pod::value); +#endif + } + + void clearkv() + { + if (is_triviall_destructable()) { + while (_num_filled --) + _pairs[_num_filled].~value_type(); + } + } + + /// Remove all elements, keeping full capacity. + void clear() noexcept + { + clearkv(); + + if (_num_filled > 0) + memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets); + + _last = _num_filled = 0; + _etail = INACTIVE; + +#if EMH_HIGH_LOAD + _ehead = 0; +#endif + } + + void shrink_to_fit(const float min_factor = EMH_DEFAULT_LOAD_FACTOR / 4) + { + if (load_factor() < min_factor && bucket_count() > 10) //safe guard + rehash(_num_filled + 1); + } + +#if EMH_HIGH_LOAD + #define EMH_PREVET(i, n) i[n].slot + void set_empty() + { + auto prev = 0; + for (int32_t bucket = 1; bucket < _num_buckets; ++bucket) { + if (EMH_EMPTY(bucket)) { + if (prev != 0) { + EMH_PREVET(_index, bucket) = prev; + _index[_prev].next = -bucket; + } + else + _ehead = bucket; + prev = bucket; + } + } + + EMH_PREVET(_index, _ehead) = prev; + _index[_prev].next = 0-_ehead; + _ehead = 0-_index[_ehead].next; + } + + void clear_empty() + { + auto prev = EMH_PREVET(_index, _ehead); + while (prev != _ehead) { + _index[_prev].next = INACTIVE; + prev = EMH_PREVET(_index, prev); + } + _index[_ehead].next = INACTIVE; + _ehead = 0; + } + + //prev-ehead->next + size_type pop_empty(const size_type bucket) + { + const auto prev_bucket = EMH_PREVET(_index, bucket); + const int next_bucket = 0-_index[bucket].next; + + EMH_PREVET(_index, next_bucket) = prev_bucket; + _index[prev_bucket].next = -next_bucket; + + _ehead = next_bucket; + return bucket; + } + + //ehead->bucket->next + void push_empty(const int32_t bucket) + { + const int next_bucket = 0-_index[_ehead].next; + assert(next_bucket > 0); + + EMH_PREVET(_index, bucket) = _ehead; + _index[bucket].next = -next_bucket; + + EMH_PREVET(_index, next_bucket) = bucket; + _index[_ehead].next = -bucket; + // _ehead = bucket; + } +#endif + + /// Make room for this many elements + bool reserve(uint64_t num_elems, bool force) + { + (void)force; +#if EMH_HIGH_LOAD == 0 + const auto required_buckets = num_elems * _mlf >> 27; + if (EMH_LIKELY(required_buckets < _mask)) // && !force + return false; + +#elif EMH_HIGH_LOAD + const auto required_buckets = num_elems + num_elems * 1 / 9; + if (EMH_LIKELY(required_buckets < _mask)) + return false; + + else if (_num_buckets < 16 && _num_filled < _num_buckets) + return false; + + else if (_num_buckets > EMH_HIGH_LOAD) { + if (_ehead == 0) { + set_empty(); + return false; + } else if (/*_num_filled + 100 < _num_buckets && */_index[_ehead].next != 0-_ehead) { + return false; + } + } +#endif +#if EMH_STATIS + if (_num_filled > EMH_STATIS) dump_statics(); +#endif + + //assert(required_buckets < max_size()); + rehash(required_buckets + 2); + return true; + } + + static value_type* alloc_bucket(size_type num_buckets) + { +#ifdef EMH_ALLOC + auto new_pairs = aligned_alloc(32, (uint64_t)num_buckets * sizeof(value_type)); +#else + auto new_pairs = malloc((uint64_t)num_buckets * sizeof(value_type)); +#endif + return (value_type *)(new_pairs); + } + + static Index* alloc_index(size_type num_buckets) + { + auto new_index = (char*)malloc((uint64_t)(EAD + num_buckets) * sizeof(Index)); + return (Index *)(new_index); + } + + bool reserve(size_type required_buckets) noexcept + { + if (_num_filled != required_buckets) + return reserve(required_buckets, true); + + _last = 0; +#if EMH_HIGH_LOAD + _ehead = 0; +#endif + +#if EMH_SORT + std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) { + const auto hashl = (size_type)hash_key(l.first) & _mask, hashr = (size_type)hash_key(r.first) & _mask; + return hashl < hashr; + //return l.first < r.first; + }); +#endif + + memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets); + for (size_type slot = 0; slot < _num_filled; slot++) { + const auto& key = _pairs[slot].first; + const auto key_hash = hash_key(key); + const auto bucket = size_type(key_hash & _mask); + auto& next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + _index[bucket] = {1, slot | ((size_type)(key_hash) & ~_mask)}; + else { + _index[bucket].slot |= (size_type)(key_hash) & ~_mask; + next_bucket ++; + } + } + return true; + } + + void rebuild(size_type num_buckets) noexcept + { + free(_index); + auto new_pairs = (value_type*)alloc_bucket((size_type)(num_buckets * max_load_factor()) + 4); + if (is_copy_trivially()) { + if (_pairs) + memcpy((char*)new_pairs, (char*)_pairs, _num_filled * sizeof(value_type)); + } else { + for (size_type slot = 0; slot < _num_filled; slot++) { + new(new_pairs + slot) value_type(std::move(_pairs[slot])); + if (is_triviall_destructable()) + _pairs[slot].~value_type(); + } + } + free(_pairs); + _pairs = new_pairs; + _index = (Index*)alloc_index (num_buckets); + + memset((char*)_index, INACTIVE, sizeof(_index[0]) * num_buckets); + memset((char*)(_index + num_buckets), 0, sizeof(_index[0]) * EAD); + } + + void rehash(uint64_t required_buckets) + { + if (required_buckets < _num_filled) + return; + + assert(required_buckets < max_size()); + auto num_buckets = _num_filled > (1u << 16) ? (1u << 16) : 4u; + while (num_buckets < required_buckets) { num_buckets *= 2; } +#if EMH_SAVE_MEM + if (sizeof(KeyT) < sizeof(size_type) && num_buckets >= (1ul << (2 * 8))) + num_buckets = 2ul << (sizeof(KeyT) * 8); +#endif + +#if EMH_REHASH_LOG + auto last = _last; + size_type collision = 0; +#endif + +#if EMH_HIGH_LOAD + _ehead = 0; +#endif + _last = 0; + + _mask = num_buckets - 1; +#if EMH_PACK_TAIL > 1 + _last = _mask; + num_buckets += num_buckets * EMH_PACK_TAIL / 100; //add more 5-10% +#endif + _num_buckets = num_buckets; + + rebuild(num_buckets); + +#ifdef EMH_SORT + std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) { + const auto hashl = hash_key(l.first), hashr = hash_key(r.first); + auto diff = int64_t((hashl & _mask) - (hashr & _mask)); + if (diff != 0) + return diff < 0; + return hashl < hashr; +// return l.first < r.first; + }); +#endif + + _etail = INACTIVE; + for (size_type slot = 0; slot < _num_filled; ++slot) { + const auto& key = _pairs[slot].first; + const auto key_hash = hash_key(key); + const auto bucket = find_unique_bucket(key_hash); + _index[bucket] = { bucket, slot | ((size_type)(key_hash) & ~_mask) }; + +#if EMH_REHASH_LOG + if (bucket != hash_main(bucket)) + collision ++; +#endif + } + +#if EMH_REHASH_LOG + if (_num_filled > EMH_REHASH_LOG) { + auto mbucket = _num_filled - collision; + char buff[255] = {0}; + sprintf(buff, " _num_filled/aver_size/K.V/pack/collision|last = %u/%.2lf/%s.%s/%zd|%.2lf%%,%.2lf%%", + _num_filled, double (_num_filled) / mbucket, typeid(KeyT).name(), typeid(ValueT).name(), sizeof(_pairs[0]), collision * 100.0 / _num_filled, last * 100.0 / _num_buckets); +#ifdef EMH_LOG + static uint32_t ihashs = 0; EMH_LOG() << "hash_nums = " << ihashs ++ << "|" <<__FUNCTION__ << "|" << buff << endl; +#else + puts(buff); +#endif + } +#endif + } + +private: + // Can we fit another element? + bool check_expand_need() + { + return reserve(_num_filled, false); + } + + static void prefetch_heap_block(char* ctrl) + { + // Prefetch the heap-allocated memory region to resolve potential TLB + // misses. This is intended to overlap with execution of calculating the hash for a key. +#if __linux__ + __builtin_prefetch(static_cast(ctrl)); +#elif _WIN32 && defined(_M_IX86) + _mm_prefetch((const char*)ctrl, _MM_HINT_T0); +#endif + } + + size_type slot_to_bucket(const size_type slot) const noexcept + { + size_type main_bucket; + return find_slot_bucket(slot, main_bucket); //TODO + } + + //very slow + void erase_slot(const size_type sbucket, const size_type main_bucket) noexcept + { + const auto slot = _index[sbucket].slot & _mask; + const auto ebucket = erase_bucket(sbucket, main_bucket); + const auto last_slot = --_num_filled; + if (EMH_LIKELY(slot != last_slot)) { + const auto last_bucket = (_etail == INACTIVE || ebucket == _etail) + ? slot_to_bucket(last_slot) : _etail; + + _pairs[slot] = std::move(_pairs[last_slot]); + _index[last_bucket].slot = slot | (_index[last_bucket].slot & ~_mask); + } + + if (is_triviall_destructable()) + _pairs[last_slot].~value_type(); + + _etail = INACTIVE; + _index[ebucket] = {INACTIVE, 0}; +#if EMH_HIGH_LOAD + if (_ehead) { + if (10 * _num_filled < 8 * _num_buckets) + clear_empty(); + else if (ebucket) + push_empty(ebucket); + } +#endif + } + + size_type erase_bucket(const size_type bucket, const size_type main_bucket) noexcept + { + const auto next_bucket = _index[bucket].next; + if (bucket == main_bucket) { + if (main_bucket != next_bucket) { + const auto nbucket = _index[next_bucket].next; + _index[main_bucket] = { + (nbucket == next_bucket) ? main_bucket : nbucket, + _index[next_bucket].slot + }; + } + return next_bucket; + } + + const auto prev_bucket = find_prev_bucket(main_bucket, bucket); + _index[prev_bucket].next = (bucket == next_bucket) ? prev_bucket : next_bucket; + return bucket; + } + + // Find the slot with this key, or return bucket size + size_type find_slot_bucket(const size_type slot, size_type& main_bucket) const + { + const auto key_hash = hash_key(_pairs[slot].first); + const auto bucket = main_bucket = size_type(key_hash & _mask); + if (slot == (_index[bucket].slot & _mask)) + return bucket; + + auto next_bucket = _index[bucket].next; + while (true) { + if (EMH_LIKELY(slot == (_index[next_bucket].slot & _mask))) + return next_bucket; + next_bucket = _index[next_bucket].next; + } + + return INACTIVE; + } + + // Find the slot with this key, or return bucket size + size_type find_filled_bucket(const KeyT& key, uint64_t key_hash) const noexcept + { + const auto bucket = size_type(key_hash & _mask); + auto next_bucket = _index[bucket].next; + if (EMH_UNLIKELY((int)next_bucket < 0)) + return INACTIVE; + + const auto slot = _index[bucket].slot & _mask; + //prefetch_heap_block((char*)&_pairs[slot]); + if (EMH_EQHASH(bucket, key_hash)) { + if (EMH_LIKELY(_eq(key, _pairs[slot].first))) + return bucket; + } + if (next_bucket == bucket) + return INACTIVE; + + while (true) { + if (EMH_EQHASH(next_bucket, key_hash)) { + const auto slot = _index[next_bucket].slot & _mask; + if (EMH_LIKELY(_eq(key, _pairs[slot].first))) + return next_bucket; + } + + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) + return INACTIVE; + next_bucket = nbucket; + } + + return INACTIVE; + } + + // Find the slot with this key, or return bucket size + template + size_type find_filled_slot(const K& key) const noexcept + { + const auto key_hash = hash_key(key); + const auto bucket = size_type(key_hash & _mask); + auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return _num_filled; + + const auto slot = _index[bucket].slot & _mask; + //prefetch_heap_block((char*)&_pairs[slot]); + if (EMH_EQHASH(bucket, key_hash)) { + if (EMH_LIKELY(_eq(key, _pairs[slot].first))) + return slot; + } + if (next_bucket == bucket) + return _num_filled; + + while (true) { + if (EMH_EQHASH(next_bucket, key_hash)) { + const auto slot = _index[next_bucket].slot & _mask; + if (EMH_LIKELY(_eq(key, _pairs[slot].first))) + return slot; + } + + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) + return _num_filled; + next_bucket = nbucket; + } + + return _num_filled; + } + +#if EMH_SORT + size_type find_hash_bucket(const KeyT& key) const noexcept + { + const auto key_hash = hash_key(key); + const auto bucket = size_type(key_hash & _mask); + const auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) + return END; + + auto slot = _index[bucket].slot & _mask; + if (_eq(key, _pairs[slot++].first)) + return slot; + else if (next_bucket == bucket) + return END; + + while (true) { + const auto& okey = _pairs[slot++].first; + if (_eq(key, okey)) + return slot; + + const auto hasho = hash_key(okey); + if ((hasho & _mask) != bucket) + break; + else if (hasho > key_hash) + break; + else if (EMH_UNLIKELY(slot >= _num_filled)) + break; + } + + return END; + } + + //only for find/can not insert + size_type find_sorted_bucket(const KeyT& key) const noexcept + { + const auto key_hash = hash_key(key); + const auto bucket = size_type(key_hash & _mask); + const auto slots = (int)(_index[bucket].next); //TODO + if (slots < 0 /**|| key < _pairs[slot].first*/) + return END; + + const auto slot = _index[bucket].slot & _mask; + auto ormask = _index[bucket].slot & ~_mask; + auto hmask = (size_type)(key_hash) & ~_mask; + if ((hmask | ormask) != ormask) + return END; + + if (_eq(key, _pairs[slot].first)) + return slot; + else if (slots == 1 || key < _pairs[slot].first) + return END; + +#if EMH_SORT + if (key < _pairs[slot].first || key > _pairs[slots + slot - 1].first) + return END; +#endif + + for (size_type i = 1; i < slots; ++i) { + const auto& okey = _pairs[slot + i].first; + if (_eq(key, okey)) + return slot + i; + // else if (okey > key) + // return END; + } + + return END; + } +#endif + + //kick out bucket and find empty to occpuy + //it will break the origin link and relink again. + //before: main_bucket-->prev_bucket --> bucket --> next_bucket + //atfer : main_bucket-->prev_bucket --> (removed)--> new_bucket--> next_bucket + size_type kickout_bucket(const size_type kmain, const size_type bucket) noexcept + { + const auto next_bucket = _index[bucket].next; + const auto new_bucket = find_empty_bucket(next_bucket, 2); + const auto prev_bucket = find_prev_bucket(kmain, bucket); + + const auto last = next_bucket == bucket ? new_bucket : next_bucket; + _index[new_bucket] = {last, _index[bucket].slot}; + + _index[prev_bucket].next = new_bucket; + _index[bucket].next = INACTIVE; + + return bucket; + } + + /* + ** inserts a new key into a hash table; first, check whether key's main + ** bucket/position is free. If not, check whether colliding node/bucket is in its main + ** position or not: if it is not, move colliding bucket to an empty place and + ** put new key in its main position; otherwise (colliding bucket is in its main + ** position), new key goes to an empty position. + */ + template + size_type find_or_allocate(const K& key, uint64_t key_hash) noexcept + { + const auto bucket = size_type(key_hash & _mask); + auto next_bucket = _index[bucket].next; + prefetch_heap_block((char*)&_pairs[bucket]); + if ((int)next_bucket < 0) { +#if EMH_HIGH_LOAD + if (next_bucket != INACTIVE) + pop_empty(bucket); +#endif + return bucket; + } + + const auto slot = _index[bucket].slot & _mask; + if (EMH_EQHASH(bucket, key_hash)) + if (EMH_LIKELY(_eq(key, _pairs[slot].first))) + return bucket; + + //check current bucket_key is in main bucket or not + const auto kmain = hash_bucket(_pairs[slot].first); + if (kmain != bucket) + return kickout_bucket(kmain, bucket); + else if (next_bucket == bucket) + return _index[next_bucket].next = find_empty_bucket(next_bucket, 1); + + uint32_t csize = 1; + //find next linked bucket and check key + while (true) { + const auto eslot = _index[next_bucket].slot & _mask; + if (EMH_EQHASH(next_bucket, key_hash)) { + if (EMH_LIKELY(_eq(key, _pairs[eslot].first))) + return next_bucket; + } + + csize += 1; + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) + break; + next_bucket = nbucket; + } + + //find a empty and link it to tail + const auto new_bucket = find_empty_bucket(next_bucket, csize); + prefetch_heap_block((char*)&_pairs[new_bucket]); + return _index[next_bucket].next = new_bucket; + } + + size_type find_unique_bucket(uint64_t key_hash) noexcept + { + const auto bucket = size_type(key_hash & _mask); + auto next_bucket = _index[bucket].next; + if ((int)next_bucket < 0) { +#if EMH_HIGH_LOAD + if (next_bucket != INACTIVE) + pop_empty(bucket); +#endif + return bucket; + } + + //check current bucket_key is in main bucket or not + const auto kmain = hash_main(bucket); + if (EMH_UNLIKELY(kmain != bucket)) + return kickout_bucket(kmain, bucket); + else if (EMH_UNLIKELY(next_bucket != bucket)) + next_bucket = find_last_bucket(next_bucket); + + return _index[next_bucket].next = find_empty_bucket(next_bucket, 2); + } + + /*** + Different probing techniques usually provide a trade-off between memory locality and avoidance of clustering. + Since Robin Hood hashing is relatively resilient to clustering (both primary and secondary), linear probing is the most cache friendly alternativeis typically used. + + It's the core algorithm of this hash map with highly optimization/benchmark. + normally linear probing is inefficient with high load factor, it use a new 3-way linear + probing strategy to search empty slot. from benchmark even the load factor > 0.9, it's more 2-3 timer fast than + one-way search strategy. + + 1. linear or quadratic probing a few cache line for less cache miss from input slot "bucket_from". + 2. the first search slot from member variant "_last", init with 0 + 3. the second search slot from calculated pos "(_num_filled + _last) & _mask", it's like a rand value + */ + // key is not in this mavalue. Find a place to put it. + size_type find_empty_bucket(const size_type bucket_from, uint32_t csize) noexcept + { + (void)csize; +#if EMH_HIGH_LOAD + if (_ehead) + return pop_empty(_ehead); +#endif + + auto bucket = bucket_from; + if (EMH_EMPTY(++bucket) || EMH_EMPTY(++bucket)) + return bucket; + +#ifdef EMH_QUADRATIC + constexpr size_type linear_probe_length = 2 * EMH_CACHE_LINE_SIZE / sizeof(Index);//16 + for (size_type offset = csize + 2, step = 4; offset <= linear_probe_length; ) { + bucket = (bucket_from + offset) & _mask; + if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket)) + return bucket; + offset += step; //7/8. 12. 16 + } +#else + constexpr size_type quadratic_probe_length = 6u; + for (size_type offset = 4u, step = 3u; step < quadratic_probe_length; ) { + bucket = (bucket_from + offset) & _mask; + if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket)) + return bucket; + offset += step++; + } +#endif + +#if EMH_PREFETCH + __builtin_prefetch(static_cast(_index + _last + 1), 0, EMH_PREFETCH); +#endif + + for (;;) { +#if EMH_PACK_TAIL + //find empty bucket and skip next + if (EMH_EMPTY(_last++))// || EMH_EMPTY(_last++)) + return _last++ - 1; + + if (EMH_UNLIKELY(_last >= _num_buckets)) + _last = 0; + + auto medium = (_mask / 4 + _last++) & _mask; + if (EMH_EMPTY(medium)) + return medium; +#else + _last &= _mask; + if (EMH_EMPTY(++_last))// || EMH_EMPTY(++_last)) + return _last; + + auto medium = (_num_buckets / 2 + _last) & _mask; + if (EMH_EMPTY(medium))// || EMH_EMPTY(++medium)) + return medium; +#endif + } + + return 0; + } + + size_type find_last_bucket(size_type main_bucket) const + { + auto next_bucket = _index[main_bucket].next; + if (next_bucket == main_bucket) + return main_bucket; + + while (true) { + const auto nbucket = _index[next_bucket].next; + if (nbucket == next_bucket) + return next_bucket; + next_bucket = nbucket; + } + } + + size_type find_prev_bucket(const size_type main_bucket, const size_type bucket) const + { + auto next_bucket = _index[main_bucket].next; + if (next_bucket == bucket) + return main_bucket; + + while (true) { + const auto nbucket = _index[next_bucket].next; + if (nbucket == bucket) + return next_bucket; + next_bucket = nbucket; + } + } + + size_type hash_bucket(const KeyT& key) const noexcept + { + return (size_type)hash_key(key) & _mask; + } + + size_type hash_main(const size_type bucket) const noexcept + { + const auto slot = _index[bucket].slot & _mask; + return (size_type)hash_key(_pairs[slot].first) & _mask; + } + +#if EMH_INT_HASH + static constexpr uint64_t KC = UINT64_C(11400714819323198485); + static uint64_t hash64(uint64_t key) + { +#if __SIZEOF_INT128__ && EMH_INT_HASH == 1 + __uint128_t r = key; r *= KC; + return (uint64_t)(r >> 64) + (uint64_t)r; +#elif EMH_INT_HASH == 2 + //MurmurHash3Mixer + uint64_t h = key; + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + h ^= h >> 33; + return h; +#elif _WIN64 && EMH_INT_HASH == 1 + uint64_t high; + return _umul128(key, KC, &high) + high; +#elif EMH_INT_HASH == 3 + auto ror = (key >> 32) | (key << 32); + auto low = key * 0xA24BAED4963EE407ull; + auto high = ror * 0x9FB21C651E98DF25ull; + auto mix = low + high; + return mix; +#elif EMH_INT_HASH == 1 + uint64_t r = key * UINT64_C(0xca4bcaa75ec3f625); + return (r >> 32) + r; +#elif EMH_WYHASH64 + return wyhash64(key, KC); +#else + uint64_t x = key; + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x; +#endif + } +#endif + +#if EMH_WYHASH_HASH + //#define WYHASH_CONDOM 1 + static uint64_t wymix(uint64_t A, uint64_t B) + { +#if defined(__SIZEOF_INT128__) + __uint128_t r = A; r *= B; +#if WYHASH_CONDOM2 + A ^= (uint64_t)r; B ^= (uint64_t)(r >> 64); +#else + A = (uint64_t)r; B = (uint64_t)(r >> 64); +#endif + +#elif defined(_MSC_VER) && defined(_M_X64) +#if WYHASH_CONDOM2 + uint64_t a, b; + a = _umul128(A, B, &b); + A ^= a; B ^= b; +#else + A = _umul128(A, B, &B); +#endif +#else + uint64_t ha = A >> 32, hb = B >> 32, la = (uint32_t)A, lb = (uint32_t)B, hi, lo; + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl; + lo = t + (rm1 << 32); c += lo < t; hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; +#if WYHASH_CONDOM2 + A ^= lo; B ^= hi; +#else + A = lo; B = hi; +#endif +#endif + return A ^ B; + } + + //multiply and xor mix function, aka MUM + static inline uint64_t wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v; } + static inline uint64_t wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v; } + static inline uint64_t wyr3(const uint8_t *p, size_t k) { + return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1]; + } + + inline static const uint64_t secret[4] = { + 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, + 0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull}; +public: + //wyhash main function https://github.com/wangyi-fudan/wyhash + static uint64_t wyhashstr(const char *key, const size_t len) + { + uint64_t a = 0, b = 0, seed = secret[0]; + const uint8_t *p = (const uint8_t*)key; + if (EMH_LIKELY(len <= 16)) { + if (EMH_LIKELY(len >= 4)) { + const auto half = (len >> 3) << 2; + a = (wyr4(p) << 32U) | wyr4(p + half); p += len - 4; + b = (wyr4(p) << 32U) | wyr4(p - half); + } else if (len) { + a = wyr3(p, len); + } + } else { + size_t i = len; + if (EMH_UNLIKELY(i > 48)) { + uint64_t see1 = seed, see2 = seed; + do { + seed = wymix(wyr8(p + 0) ^ secret[1], wyr8(p + 8) ^ seed); + see1 = wymix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1); + see2 = wymix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2); + p += 48; i -= 48; + } while (EMH_LIKELY(i > 48)); + seed ^= see1 ^ see2; + } + while (i > 16) { + seed = wymix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed); + i -= 16; p += 16; + } + a = wyr8(p + i - 16); + b = wyr8(p + i - 8); + } + + return wymix(secret[1] ^ len, wymix(a ^ secret[1], b ^ seed)); + } +#endif + +private: + template::value, uint32_t>::type = 0> + inline uint64_t hash_key(const UType key) const + { +#if EMH_INT_HASH + return hash64(key); +#elif EMH_IDENTITY_HASH + return key + (key >> 24); +#else + return _hasher(key); +#endif + } + + template::value, uint32_t>::type = 0> + inline uint64_t hash_key(const UType& key) const + { +#if EMH_WYHASH_HASH + return wyhashstr(key.data(), key.size()); +#else + return _hasher(key); +#endif + } + + template::value && !std::is_same::value, uint32_t>::type = 0> + inline uint64_t hash_key(const UType& key) const + { + return _hasher(key); + } + +private: + Index* _index; + value_type*_pairs; + + HashT _hasher; + EqT _eq; + uint32_t _mlf; + size_type _mask; + size_type _num_buckets; + size_type _num_filled; + size_type _last; +#if EMH_HIGH_LOAD + size_type _ehead; +#endif + size_type _etail; +}; +} // namespace emhash + diff --git a/src/third_party/rapidhash/README.ninja b/src/third_party/rapidhash/README.ninja new file mode 100644 index 0000000000..1d74b67c1f --- /dev/null +++ b/src/third_party/rapidhash/README.ninja @@ -0,0 +1,7 @@ +Description: Very fast, high quality, platform-independent hashing algorithm. +Version: commit 4a6b2570e868536be84800353efd92c699f37d2c +URL: https://github.com/Nicoshev/rapidhash +Copyright: Copyright (C) 2024 Nicolas De Carli, Based on 'wyhash', by Wang Yi +SPDX-License-Identifier: BSD-2-Clause +Local changes: + - Changed to UNIX line endings diff --git a/src/third_party/rapidhash/rapidhash.h b/src/third_party/rapidhash/rapidhash.h new file mode 100755 index 0000000000..463f733d85 --- /dev/null +++ b/src/third_party/rapidhash/rapidhash.h @@ -0,0 +1,323 @@ +/* + * rapidhash - Very fast, high quality, platform-independent hashing algorithm. + * Copyright (C) 2024 Nicolas De Carli + * + * Based on 'wyhash', by Wang Yi + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - rapidhash source repository: https://github.com/Nicoshev/rapidhash + */ + +/* + * Includes. + */ +#include +#include +#if defined(_MSC_VER) + #include + #if defined(_M_X64) && !defined(_M_ARM64EC) + #pragma intrinsic(_umul128) + #endif +#endif + +/* + * C++ macros. + * + * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)). + */ +#ifdef __cplusplus + #define RAPIDHASH_NOEXCEPT noexcept + #define RAPIDHASH_CONSTEXPR constexpr + #ifndef RAPIDHASH_INLINE + #define RAPIDHASH_INLINE inline + #endif +#else + #define RAPIDHASH_NOEXCEPT + #define RAPIDHASH_CONSTEXPR static const + #ifndef RAPIDHASH_INLINE + #define RAPIDHASH_INLINE static inline + #endif +#endif + +/* + * Protection macro, alters behaviour of rapid_mum multiplication function. + * + * RAPIDHASH_FAST: Normal behavior, max speed. + * RAPIDHASH_PROTECTED: Extra protection against entropy loss. + */ +#ifndef RAPIDHASH_PROTECTED + #define RAPIDHASH_FAST +#elif defined(RAPIDHASH_FAST) + #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously." +#endif + +/* + * Unrolling macros, changes code definition for main hash function. + * + * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes. + * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes. + * + * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED. + * + * These macros do not alter the output hash. + */ +#ifndef RAPIDHASH_COMPACT + #define RAPIDHASH_UNROLLED +#elif defined(RAPIDHASH_UNROLLED) + #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously." +#endif + +/* + * Likely and unlikely macros. + */ +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + #define _likely_(x) __builtin_expect(x,1) + #define _unlikely_(x) __builtin_expect(x,0) +#else + #define _likely_(x) (x) + #define _unlikely_(x) (x) +#endif + +/* + * Endianness macros. + */ +#ifndef RAPIDHASH_LITTLE_ENDIAN + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define RAPIDHASH_LITTLE_ENDIAN + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define RAPIDHASH_BIG_ENDIAN + #else + #warning "could not determine endianness! Falling back to little endian." + #define RAPIDHASH_LITTLE_ENDIAN + #endif +#endif + +/* + * Default seed. + */ +#define RAPID_SEED (0xbdd89aa982704029ull) + +/* + * Default secret parameters. + */ +RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; + +/* + * 64*64 -> 128bit multiply function. + * + * @param A Address of 64-bit number. + * @param B Address of 64-bit number. + * + * Calculates 128-bit C = *A * *B. + * + * When RAPIDHASH_FAST is defined: + * Overwrites A contents with C's low 64 bits. + * Overwrites B contents with C's high 64 bits. + * + * When RAPIDHASH_PROTECTED is defined: + * Xors and overwrites A contents with C's low 64 bits. + * Xors and overwrites B contents with C's high 64 bits. + */ +RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT { +#if defined(__SIZEOF_INT128__) + __uint128_t r=*A; r*=*B; + #ifdef RAPIDHASH_PROTECTED + *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); + #else + *A=(uint64_t)r; *B=(uint64_t)(r>>64); + #endif +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) + #if defined(_M_X64) + #ifdef RAPIDHASH_PROTECTED + uint64_t a, b; + a=_umul128(*A,*B,&b); + *A^=a; *B^=b; + #else + *A=_umul128(*A,*B,B); + #endif + #else + #ifdef RAPIDHASH_PROTECTED + uint64_t a, b; + b = __umulh(*A, *B); + a = *A * *B; + *A^=a; *B^=b; + #else + uint64_t c = __umulh(*A, *B); + *A = *A * *B; + *B = c; + #endif + #endif +#else + uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; + uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; + #ifdef RAPIDHASH_PROTECTED + *A^=lo; *B^=hi; + #else + *A=lo; *B=hi; + #endif +#endif +} + +/* + * Multiply and xor mix function. + * + * @param A 64-bit number. + * @param B 64-bit number. + * + * Calculates 128-bit C = A * B. + * Returns 64-bit xor between high and low 64 bits of C. + */ +RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; } + +/* + * Read functions. + */ +#ifdef RAPIDHASH_LITTLE_ENDIAN +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;} +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;} +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);} +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);} +#elif defined(_MSC_VER) +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);} +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);} +#else +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { + uint64_t v; memcpy(&v, p, 8); + return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); +} +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { + uint32_t v; memcpy(&v, p, 4); + return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); +} +#endif + +/* + * Reads and combines 3 bytes of input. + * + * @param p Buffer to read from. + * @param k Length of @p, in bytes. + * + * Always reads and combines 3 bytes from memory. + * Guarantees to read each buffer position at least once. + * + * Returns a 64-bit value containing all three bytes read. + */ +RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];} + +/* + * rapidhash main function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * @param secret Triplet of 64-bit secrets used to alter hash result predictably. + * + * Returns a 64-bit hash. + */ +RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT { + const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b; + if(_likely_(len<=16)){ + if(_likely_(len>=4)){ + const uint8_t * plast = p + len - 4; + a = (rapid_read32(p) << 32) | rapid_read32(plast); + const uint64_t delta = ((len&24)>>(len>>3)); + b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); } + else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;} + else a=b=0; + } + else{ + size_t i=len; + if(_unlikely_(i>48)){ + uint64_t see1=seed, see2=seed; +#ifdef RAPIDHASH_UNROLLED + while(_likely_(i>=96)){ + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); + seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed); + see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1); + see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2); + p+=96; i-=96; + } + if(_unlikely_(i>=48)){ + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); + p+=48; i-=48; + } +#else + do { + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); + p+=48; i-=48; + } while (_likely_(i>=48)); +#endif + seed^=see1^see2; + } + if(i>16){ + seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]); + if(i>32) + seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed); + } + a=rapid_read64(p+i-16); b=rapid_read64(p+i-8); + } + a^=secret[1]; b^=seed; rapid_mum(&a,&b); + return rapid_mix(a^secret[0]^len,b^secret[1]); +} + +/* + * rapidhash default seeded hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * + * Calls rapidhash_internal using provided parameters and default secrets. + * + * Returns a 64-bit hash. + */ +RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT { + return rapidhash_internal(key, len, seed, rapid_secret); +} + +/* + * rapidhash default hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * + * Calls rapidhash_withSeed using provided parameters and the default seed. + * + * Returns a 64-bit hash. + */ +RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT { + return rapidhash_withSeed(key, len, RAPID_SEED); +}