Skip to content

Commit

Permalink
Reliable ETA and progress percentage.
Browse files Browse the repository at this point in the history
This has been bugging me for *years*. :)
Count of finished edges isn't a great statistic, it isn't
really obvious if LLVM will take 8 minues to build, or 10 minutes.

But, it's actually pretty straight-forward to get some
more useful information. We already know how much time each edge
has taken, so we could just do the dumb thing, and assume that
every edge in the plan takes the same amount of time.

Or, we can do better. `.ninja_log` already contains
the historical data on how long each edge took to produce it's outs,
so we simply need to ensure that we populate edges with that info,
and then we can greatly improve our predictions.
The math is pretty simple i think.

This is largely a port of a similar change i did to LLVM LIT:
https://reviews.llvm.org/D99073

With this, i get something quite lovely:
```
llvm-project/build-Clang12$ NINJA_STATUS="[%f/%t %p %P][%e + %E] " /repositories/ninja/build-Clang-debug/ninja opt
[288/2527  11%   4%][0:00:27 + 0:08:52] Building CXX object lib/DebugInfo/CodeView/CMakeFiles/LLVMDebugInfoCodeView.dir/AppendingTypeTableBuilder.cpp.o
```

I hope people will find this useful, and it could be merged.

Please let me know which kinds of test coverage this needs?
  • Loading branch information
LebedevRI committed Aug 24, 2022
1 parent b5f521a commit 5f04500
Show file tree
Hide file tree
Showing 8 changed files with 247 additions and 36 deletions.
4 changes: 3 additions & 1 deletion doc/manual.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ Several placeholders are available:
`%o`:: Overall rate of finished edges per second
`%c`:: Current rate of finished edges per second (average over builds
specified by `-j` or its default)
`%e`:: Elapsed time in seconds. _(Available since Ninja 1.2.)_
`%e`:: Elapsed time in hh:mm:ss format. _(Available since Ninja 1.2.)_
`%E`:: Remaining time (ETA) in hh:mm:ss format. _(Available since Ninja 1.12.0.)_
`%P`:: The percentage of time elapsed out of predicted total runtime. _(Available since Ninja 1.12.0.)_
`%%`:: A plain `%` character.
The default progress status is `"[%f/%t] "` (note the trailing space
Expand Down
22 changes: 10 additions & 12 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,11 @@ bool Plan::AddSubTarget(const Node* node, const Node* dependent, string* err,

void Plan::EdgeWanted(const Edge* edge) {
++wanted_edges_;
if (!edge->is_phony())
if (!edge->is_phony()) {
++command_edges_;
if (builder_)
builder_->status_->EdgeAddedToPlan(edge);
}
}

Edge* Plan::FindWork() {
Expand Down Expand Up @@ -294,8 +297,11 @@ bool Plan::CleanNode(DependencyScan* scan, Node* node, string* err) {

want_e->second = kWantNothing;
--wanted_edges_;
if (!(*oe)->is_phony())
if (!(*oe)->is_phony()) {
--command_edges_;
if (builder_)
builder_->status_->EdgeRemovedFromPlan(*oe);
}
}
}
}
Expand Down Expand Up @@ -607,7 +613,6 @@ bool Builder::AlreadyUpToDate() const {
bool Builder::Build(string* err) {
assert(!AlreadyUpToDate());

status_->PlanHasTotalEdges(plan_.command_edge_count());
int pending_commands = 0;
int failures_allowed = config_.failures_allowed;

Expand Down Expand Up @@ -780,8 +785,8 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) {
end_time_millis = GetTimeMillis() - start_time_millis_;
running_edges_.erase(it);

status_->BuildEdgeFinished(edge, end_time_millis, result->success(),
result->output);
status_->BuildEdgeFinished(edge, start_time_millis, end_time_millis,
result->success(), result->output);

// The rest of this function only applies to successful commands.
if (!result->success()) {
Expand Down Expand Up @@ -821,10 +826,6 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) {
}
if (node_cleaned) {
record_mtime = edge->command_start_time_;

// The total number of edges in the plan may have changed as a result
// of a restat.
status_->PlanHasTotalEdges(plan_.command_edge_count());
}
}

Expand Down Expand Up @@ -938,8 +939,5 @@ bool Builder::LoadDyndeps(Node* node, string* err) {
if (!plan_.DyndepsLoaded(&scan_, node, ddf, err))
return false;

// New command edges may have been added to the plan.
status_->PlanHasTotalEdges(plan_.command_edge_count());

return true;
}
15 changes: 13 additions & 2 deletions src/build_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2200,8 +2200,19 @@ TEST_F(BuildTest, DepsGccWithEmptyDepfileErrorsOut) {
TEST_F(BuildTest, StatusFormatElapsed) {
status_.BuildStarted();
// Before any task is done, the elapsed time must be zero.
EXPECT_EQ("[%/e0.000]",
status_.FormatProgressStatus("[%%/e%e]", 0));
EXPECT_EQ("[%/e0:00:00]", status_.FormatProgressStatus("[%%/e%e]", 0));
}

TEST_F(BuildTest, StatusFormatETA) {
status_.BuildStarted();
// Before any task is done, the ETA time must be unknown.
EXPECT_EQ("[%/E?]", status_.FormatProgressStatus("[%%/E%E]", 0));
}

TEST_F(BuildTest, StatusFormatTimeProgress) {
status_.BuildStarted();
// Before any task is done, the percentage of elapsed time must be zero.
EXPECT_EQ("[%/p 0%]", status_.FormatProgressStatus("[%%/p%p]", 0));
}

TEST_F(BuildTest, StatusFormatReplacePlaceholder) {
Expand Down
6 changes: 5 additions & 1 deletion src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ struct Edge {
id_(0), outputs_ready_(false), deps_loaded_(false),
deps_missing_(false), generated_by_dep_loader_(false),
command_start_time_(0), implicit_deps_(0), order_only_deps_(0),
implicit_outs_(0) {}
implicit_outs_(0), prev_elapsed_time(-1) {}

/// Return true if all inputs' in-edges are ready.
bool AllInputsReady() const;
Expand Down Expand Up @@ -250,6 +250,10 @@ struct Edge {
bool is_phony() const;
bool use_console() const;
bool maybe_phonycycle_diagnostic() const;

// Historical info: how long did this edge take last time,
// as per .ninja_log, if known? Defaults to -1 if unknown.
int64_t prev_elapsed_time;
};

struct EdgeCmp {
Expand Down
22 changes: 22 additions & 0 deletions src/ninja.cc
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ struct NinjaMain : public BuildLogUser {
/// @return true if the manifest was rebuilt.
bool RebuildManifest(const char* input_file, string* err, Status* status);

/// For each edge, lookup in build log how long it took last time,
/// and record that in the edge itself. It will be used for ETA predicton.
void ParsePreviousElapsedTimes();

/// Build the targets listed on the command line.
/// @return an exit code.
int RunBuild(int argc, char** argv, Status* status);
Expand Down Expand Up @@ -289,6 +293,22 @@ bool NinjaMain::RebuildManifest(const char* input_file, string* err,
return true;
}

void NinjaMain::ParsePreviousElapsedTimes() {
for (vector<Edge*>::iterator edge = state_.edges_.begin(),
edge_end = state_.edges_.end();
edge != edge_end; ++edge) {
for (vector<Node*>::iterator out = (*edge)->outputs_.begin(),
out_end = (*edge)->outputs_.end();
out != out_end; ++out) {
BuildLog::LogEntry* log_entry = build_log_.LookupByOutput((*out)->path());
if (!log_entry)
continue; // Maybe we'll have log entry for next output of this edge?
(*edge)->prev_elapsed_time = log_entry->end_time - log_entry->start_time;
break; // Onto next edge.
}
}
}

Node* NinjaMain::CollectTarget(const char* cpath, string* err) {
string path = cpath;
if (path.empty()) {
Expand Down Expand Up @@ -1588,6 +1608,8 @@ NORETURN void real_main(int argc, char** argv) {
exit(1);
}

ninja.ParsePreviousElapsedTimes();

int result = ninja.RunBuild(argc, argv, status);
if (g_metrics)
ninja.DumpMetrics();
Expand Down
166 changes: 155 additions & 11 deletions src/status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "status.h"

#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>

Expand All @@ -27,11 +28,13 @@
using namespace std;

StatusPrinter::StatusPrinter(const BuildConfig& config)
: config_(config),
started_edges_(0), finished_edges_(0), total_edges_(0), running_edges_(0),
time_millis_(0), progress_status_format_(NULL),
: config_(config), started_edges_(0), finished_edges_(0), total_edges_(0),
running_edges_(0), time_millis_(0), cpu_time_millis_(0),
time_predicted_percentage_(0.0), eta_predictable_edges_total_(0.0),
eta_predictable_cpu_time_total_(0.0), eta_predictable_edges_remaining_(0),
eta_predictable_cpu_time_remaining_(0),
eta_unpredictable_edges_remaining_(0), progress_status_format_(NULL),
current_rate_(config.parallelism) {

// Don't do anything fancy in verbose mode.
if (config_.verbosity != BuildConfig::NORMAL)
printer_.set_smart_terminal(false);
Expand All @@ -41,8 +44,30 @@ StatusPrinter::StatusPrinter(const BuildConfig& config)
progress_status_format_ = "[%f/%t] ";
}

void StatusPrinter::PlanHasTotalEdges(int total) {
total_edges_ = total;
void StatusPrinter::EdgeAddedToPlan(const Edge* edge) {
++total_edges_;

// Do we know how long did this edge take last time?
if (edge->prev_elapsed_time != -1) {
++eta_predictable_edges_total_;
++eta_predictable_edges_remaining_;
eta_predictable_cpu_time_total_ += edge->prev_elapsed_time;
eta_predictable_cpu_time_remaining_ += edge->prev_elapsed_time;
} else
++eta_unpredictable_edges_remaining_;
}

void StatusPrinter::EdgeRemovedFromPlan(const Edge* edge) {
--total_edges_;

// Do we know how long did this edge take last time?
if (edge->prev_elapsed_time != -1) {
--eta_predictable_edges_total_;
--eta_predictable_edges_remaining_;
eta_predictable_cpu_time_total_ -= edge->prev_elapsed_time;
eta_predictable_cpu_time_remaining_ -= edge->prev_elapsed_time;
} else
--eta_unpredictable_edges_remaining_;
}

void StatusPrinter::BuildEdgeStarted(const Edge* edge,
Expand All @@ -58,11 +83,96 @@ void StatusPrinter::BuildEdgeStarted(const Edge* edge,
printer_.SetConsoleLocked(true);
}

void StatusPrinter::BuildEdgeFinished(Edge* edge, int64_t end_time_millis,
bool success, const string& output) {
void StatusPrinter::RecalculateProgressPrediction() {
time_predicted_percentage_ = 0.0;

// Sometimes, the previous and actual times may be wildly different.
// For example, the previous build may have been fully recovered from ccache,
// so it was blazing fast, while the new build no longer gets hits from ccache
// for whatever reason, so it actually compiles code, which takes much longer.
// We should detect such cases, and avoid using "wrong" previous times.

// Note that we will only use the previous times if there are edges with
// previous time knowledge remaining.
bool use_previous_times =
eta_predictable_edges_remaining_ && eta_predictable_cpu_time_remaining_;

// Iff we have sufficient statistical information for the current run,
// that is, if we have took at least 15 sec AND finished at least 5% of edges,
// we can check whether our performance so far matches the previous one.
if (use_previous_times && total_edges_ && finished_edges_ &&
(time_millis_ >= 15 * 1e3) &&
(((double)finished_edges_ / total_edges_) >= 0.05)) {
// Over the edges we've just run, how long did they take on average?
double actual_average_cpu_time_millis =
(double)cpu_time_millis_ / finished_edges_;
// What is the previous average, for the edges with such knowledge?
double previous_average_cpu_time_millis =
(double)eta_predictable_cpu_time_total_ / eta_predictable_edges_total_;

double ratio = std::max(previous_average_cpu_time_millis,
actual_average_cpu_time_millis) /
std::min(previous_average_cpu_time_millis,
actual_average_cpu_time_millis);

// Let's say that the average times should differ by less than 10x
use_previous_times = ratio < 10;
}

int edges_with_known_runtime = finished_edges_;
if (use_previous_times)
edges_with_known_runtime += eta_predictable_edges_remaining_;
if (edges_with_known_runtime == 0)
return;

int edges_with_unknown_runtime = use_previous_times
? eta_unpredictable_edges_remaining_
: (total_edges_ - finished_edges_);

// Given the time elapsed on the edges we've just run,
// and the runtime of the edges for which we know previous runtime,
// what's the edge's average runtime?
int64_t edges_known_runtime_total = cpu_time_millis_;
if (use_previous_times)
edges_known_runtime_total += eta_predictable_cpu_time_remaining_;

double average_cpu_time_millis =
(double)edges_known_runtime_total / edges_with_known_runtime;

// For the edges for which we do not have the previous runtime,
// let's assume that their average runtime is the same as for the other edges,
// and we therefore can predict their remaining runtime.
double unpredictable_cpu_time_remaining_millis =
average_cpu_time_millis * edges_with_unknown_runtime;

// And therefore we can predict the remaining and total runtimes.
double total_cpu_time_remaining = unpredictable_cpu_time_remaining_millis;
if (use_previous_times)
total_cpu_time_remaining += eta_predictable_cpu_time_remaining_;
double total_cpu_time = cpu_time_millis_ + total_cpu_time_remaining;
if (total_cpu_time == 0.0)
return;

// After that we can tell how much work we've completed, in time units.
time_predicted_percentage_ = cpu_time_millis_ / total_cpu_time;
}

void StatusPrinter::BuildEdgeFinished(Edge* edge, int64_t start_time_millis,
int64_t end_time_millis, bool success,
const string& output) {
time_millis_ = end_time_millis;
++finished_edges_;

int64_t elapsed = end_time_millis - start_time_millis;
cpu_time_millis_ += elapsed;

// Do we know how long did this edge take last time?
if (edge->prev_elapsed_time != -1) {
--eta_predictable_edges_remaining_;
eta_predictable_cpu_time_remaining_ -= edge->prev_elapsed_time;
} else
--eta_unpredictable_edges_remaining_;

if (edge->use_console())
printer_.SetConsoleLocked(false);

Expand Down Expand Up @@ -201,16 +311,48 @@ string StatusPrinter::FormatProgressStatus(const char* progress_status_format,
out += buf;
break;

// Percentage
// Percentage of edges completed
case 'p': {
int percent = (100 * finished_edges_) / total_edges_;
int percent = 0;
if (finished_edges_ != 0 && total_edges_ != 0)
percent = (100 * finished_edges_) / total_edges_;
snprintf(buf, sizeof(buf), "%3i%%", percent);
out += buf;
break;
}

#define FORMAT_TIME(t) \
"%" PRId64 ":%02" PRId64 ":%02" PRId64 "", (t) / 3600, ((t) % 3600) / 60, \
(t) % 60

// Wall time elapsed
case 'e': {
snprintf(buf, sizeof(buf), "%.3f", time_millis_ / 1e3);
const int64_t elapsed_sec = time_millis_ / 1e3;
snprintf(buf, sizeof(buf), FORMAT_TIME(elapsed_sec));
out += buf;
break;
}

// Wall time ETA
case 'E': {
if (time_predicted_percentage_ != 0.0) {
// So, we know that we've spent time_millis_ wall clock,
// and that is time_predicted_percentage_ percent.
// How much time will we need to complete 100%?
double total_wall_time = time_millis_ / time_predicted_percentage_;
// Naturally, that gives us the time remaining.
const int64_t eta_sec = (total_wall_time - time_millis_) / 1e3;
snprintf(buf, sizeof(buf), FORMAT_TIME(eta_sec));
} else
snprintf(buf, sizeof(buf), "?");
out += buf;
break;
}

// Percentage of time spent out of the predicted time total
case 'P': {
snprintf(buf, sizeof(buf), "%3i%%",
(int)(100. * time_predicted_percentage_));
out += buf;
break;
}
Expand All @@ -232,6 +374,8 @@ void StatusPrinter::PrintStatus(const Edge* edge, int64_t time_millis) {
|| config_.verbosity == BuildConfig::NO_STATUS_UPDATE)
return;

RecalculateProgressPrediction();

bool force_full_command = config_.verbosity == BuildConfig::VERBOSE;

string to_print = edge->GetBinding("description");
Expand Down
Loading

0 comments on commit 5f04500

Please sign in to comment.