Skip to content

Commit

Permalink
Merge pull request #2519 from sesse/master
Browse files Browse the repository at this point in the history
Speed up parsing
  • Loading branch information
jhasse authored Nov 23, 2024
2 parents a3fda2b + c97558a commit c7ad730
Show file tree
Hide file tree
Showing 14 changed files with 2,279 additions and 144 deletions.
55 changes: 3 additions & 52 deletions src/build_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,63 +53,14 @@ using namespace std;
namespace {

const char kFileSignature[] = "# ninja log v%d\n";
const int kOldestSupportedVersion = 6;
const int kCurrentVersion = 6;

// 64bit MurmurHash2, by Austin Appleby
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
inline
uint64_t MurmurHash64A(const void* key, size_t len) {
static const uint64_t seed = 0xDECAFBADDECAFBADull;
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 8) {
uint64_t k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
data += 8;
len -= 8;
}
switch (len & 7)
{
case 7: h ^= uint64_t(data[6]) << 48;
NINJA_FALLTHROUGH;
case 6: h ^= uint64_t(data[5]) << 40;
NINJA_FALLTHROUGH;
case 5: h ^= uint64_t(data[4]) << 32;
NINJA_FALLTHROUGH;
case 4: h ^= uint64_t(data[3]) << 24;
NINJA_FALLTHROUGH;
case 3: h ^= uint64_t(data[2]) << 16;
NINJA_FALLTHROUGH;
case 2: h ^= uint64_t(data[1]) << 8;
NINJA_FALLTHROUGH;
case 1: h ^= uint64_t(data[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
#undef BIG_CONSTANT

const int kOldestSupportedVersion = 7;
const int kCurrentVersion = 7;

} // namespace

// static
uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
return MurmurHash64A(command.str_, command.len_);
return rapidhash(command.str_, command.len_);
}

BuildLog::LogEntry::LogEntry(const string& output)
Expand Down
12 changes: 6 additions & 6 deletions src/build_log_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ TEST_F(BuildLogTest, FirstWriteAddsSignature) {

TEST_F(BuildLogTest, DoubleEntry) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command abc"));
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
Expand Down Expand Up @@ -177,7 +177,7 @@ TEST_F(BuildLogTest, ObsoleteOldVersion) {

TEST_F(BuildLogTest, SpacesInOutput) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout with space\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fclose(f);
Expand All @@ -200,10 +200,10 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
// build log on Windows. This shouldn't crash, and the second version header
// should be ignored.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "456\t789\t789\tout2\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command2"));
fclose(f);
Expand Down Expand Up @@ -252,7 +252,7 @@ struct TestDiskInterface : public DiskInterface {

TEST_F(BuildLogTest, Restat) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n"
fprintf(f, "# ninja log v7\n"
"1\t2\t3\tout\tcommand\n");
fclose(f);
std::string err;
Expand Down Expand Up @@ -280,7 +280,7 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
// Ninja's build log buffer is currently 256kB. Lines longer than that are
// silently ignored, but don't affect parsing of other lines.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\tcommand start");
for (size_t i = 0; i < (512 << 10) / strlen(" more_command"); ++i)
fputs(" more_command", f);
Expand Down
7 changes: 3 additions & 4 deletions src/deps_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,13 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
return LOAD_SUCCESS;
}

long offset;
long offset = ftell(f);
bool read_failed = false;
int unique_dep_record_count = 0;
int total_dep_record_count = 0;
for (;;) {
offset = ftell(f);

unsigned size;
if (fread(&size, 4, 1, f) < 1) {
if (fread(&size, sizeof(size), 1, f) < 1) {
if (!feof(f))
read_failed = true;
break;
Expand All @@ -205,6 +203,7 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
read_failed = true;
break;
}
offset += size + sizeof(size);

if (is_deps) {
if ((size % 4) != 0) {
Expand Down
57 changes: 39 additions & 18 deletions src/eval_env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ string BindingEnv::LookupWithFallback(const string& var,
}

string EvalString::Evaluate(Env* env) const {
if (parsed_.empty()) {
return single_token_;
}

string result;
for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) {
if (i->second == RAW)
Expand All @@ -110,40 +114,57 @@ string EvalString::Evaluate(Env* env) const {
}

void EvalString::AddText(StringPiece text) {
// Add it to the end of an existing RAW token if possible.
if (!parsed_.empty() && parsed_.back().second == RAW) {
parsed_.back().first.append(text.str_, text.len_);
if (parsed_.empty()) {
single_token_.append(text.begin(), text.end());
} else if (!parsed_.empty() && parsed_.back().second == RAW) {
parsed_.back().first.append(text.begin(), text.end());
} else {
parsed_.push_back(make_pair(text.AsString(), RAW));
parsed_.push_back(std::make_pair(text.AsString(), RAW));
}
}

void EvalString::AddSpecial(StringPiece text) {
parsed_.push_back(make_pair(text.AsString(), SPECIAL));
if (parsed_.empty() && !single_token_.empty()) {
// Going from one to two tokens, so we can no longer apply
// our single_token_ optimization and need to push everything
// onto the vector.
parsed_.push_back(std::make_pair(std::move(single_token_), RAW));
}
parsed_.push_back(std::make_pair(text.AsString(), SPECIAL));
}

string EvalString::Serialize() const {
string result;
for (TokenList::const_iterator i = parsed_.begin();
i != parsed_.end(); ++i) {
if (parsed_.empty() && !single_token_.empty()) {
result.append("[");
if (i->second == SPECIAL)
result.append("$");
result.append(i->first);
result.append(single_token_);
result.append("]");
} else {
for (const auto& pair : parsed_) {
result.append("[");
if (pair.second == SPECIAL)
result.append("$");
result.append(pair.first.begin(), pair.first.end());
result.append("]");
}
}
return result;
}

string EvalString::Unparse() const {
string result;
for (TokenList::const_iterator i = parsed_.begin();
i != parsed_.end(); ++i) {
bool special = (i->second == SPECIAL);
if (special)
result.append("${");
result.append(i->first);
if (special)
result.append("}");
if (parsed_.empty() && !single_token_.empty()) {
result.append(single_token_.begin(), single_token_.end());
} else {
for (TokenList::const_iterator i = parsed_.begin();
i != parsed_.end(); ++i) {
bool special = (i->second == SPECIAL);
if (special)
result.append("${");
result.append(i->first.begin(), i->first.end());
if (special)
result.append("}");
}
}
return result;
}
10 changes: 8 additions & 2 deletions src/eval_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ struct EvalString {
/// @return The string with variables not expanded.
std::string Unparse() const;

void Clear() { parsed_.clear(); }
bool empty() const { return parsed_.empty(); }
void Clear() { parsed_.clear(); single_token_.clear(); }
bool empty() const { return parsed_.empty() && single_token_.empty(); }

void AddText(StringPiece text);
void AddSpecial(StringPiece text);
Expand All @@ -53,6 +53,12 @@ struct EvalString {
enum TokenType { RAW, SPECIAL };
typedef std::vector<std::pair<std::string, TokenType> > TokenList;
TokenList parsed_;

// If we hold only a single RAW token, then we keep it here instead of
// pushing it on TokenList. This saves a bunch of allocations for
// what is a common case. If parsed_ is nonempty, then this value
// must be ignored.
std::string single_token_;
};

/// An invocable build command and associated metadata (description, etc.).
Expand Down
13 changes: 7 additions & 6 deletions src/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -740,12 +740,13 @@ bool ImplicitDepLoader::LoadDepsFromLog(Edge* edge, string* err) {
return false;
}

vector<Node*>::iterator implicit_dep =
PreallocateSpace(edge, deps->node_count);
for (int i = 0; i < deps->node_count; ++i, ++implicit_dep) {
Node* node = deps->nodes[i];
*implicit_dep = node;
node->AddOutEdge(edge);
Node** nodes = deps->nodes;
size_t node_count = deps->node_count;
edge->inputs_.insert(edge->inputs_.end() - edge->order_only_deps_,
nodes, nodes + node_count);
edge->implicit_deps_ += node_count;
for (size_t i = 0; i < node_count; ++i) {
nodes[i]->AddOutEdge(edge);
}
return true;
}
Expand Down
40 changes: 4 additions & 36 deletions src/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,8 @@
#include "string_piece.h"
#include "util.h"

// MurmurHash2, by Austin Appleby
static inline
unsigned int MurmurHash2(const void* key, size_t len) {
static const unsigned int seed = 0xDECAFBAD;
const unsigned int m = 0x5bd1e995;
const int r = 24;
unsigned int h = seed ^ len;
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 4) {
unsigned int k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len) {
case 3: h ^= data[2] << 16;
NINJA_FALLTHROUGH;
case 2: h ^= data[1] << 8;
NINJA_FALLTHROUGH;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}

#include <unordered_map>
#include "third_party/emhash/hash_table8.hpp"
#include "third_party/rapidhash/rapidhash.h"

namespace std {
template<>
Expand All @@ -62,7 +30,7 @@ struct hash<StringPiece> {
typedef size_t result_type;

size_t operator()(StringPiece key) const {
return MurmurHash2(key.str_, key.len_);
return rapidhash(key.str_, key.len_);
}
};
}
Expand All @@ -73,7 +41,7 @@ struct hash<StringPiece> {
/// mapping StringPiece => Foo*.
template<typename V>
struct ExternalStringHashMap {
typedef std::unordered_map<StringPiece, V> Type;
typedef emhash8::HashMap<StringPiece, V> Type;
};

#endif // NINJA_MAP_H_
Loading

0 comments on commit c7ad730

Please sign in to comment.