Skip to content

Commit

Permalink
Drop MPI support for graphstats, restructure it, add functionality to…
Browse files Browse the repository at this point in the history
… count certain degrees
  • Loading branch information
DanielSeemaier committed Nov 30, 2023
1 parent 669e1ff commit 41e5fce
Showing 1 changed file with 132 additions and 87 deletions.
219 changes: 132 additions & 87 deletions app/tools/graphstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,39 @@

using namespace kagen;

struct Configuration {
std::vector<std::string> input_filenames;
FileFormat input_format = FileFormat::EXTENSION;

int num_chunks = 1;
bool header_only = false;
bool omit_header = false;
bool strip_extension = false;

std::vector<SInt> count_num_deg_nodes;
};

struct Statistics {
std::string name;
SInt n;
SInt m;
SInt min_deg;
LPFloat avg_deg;
SInt max_deg;
};

Graph LoadGraph(const PGeneratorConfig& config) {
const PEID rank = GetCommRank(MPI_COMM_WORLD);
const PEID size = GetCommSize(MPI_COMM_WORLD);
SInt n = 0;
SInt m = 0;
SInt min_deg = 0;
LPFloat avg_deg = 0.0;
SInt max_deg = 0;

FileGraphFactory factory;
const auto normalized_config = factory.NormalizeParameters(config, rank, size, false);
auto loader = factory.Create(normalized_config, rank, size);
loader->Generate(GraphRepresentation::EDGE_LIST);
loader->Finalize(MPI_COMM_WORLD);
return loader->Take();
}
std::vector<SInt> num_deg_nodes;
};

void PrintHeader(const Configuration& config) {
((void)config);

void PrintHeader() {
std::cout << "Graph,";
std::cout << "N,";
std::cout << "M,";
for (const SInt deg: config.count_num_deg_nodes) {
std::cout << "NumDeg" << deg << "Nodes,";
}
std::cout << "MinDeg,";
std::cout << "AvgDeg,";
std::cout << "MaxDeg";
Expand All @@ -49,119 +57,156 @@ void PrintRow(const Statistics& stats) {
std::cout << stats.name << ",";
std::cout << stats.n << ",";
std::cout << stats.m << ",";
for (const SInt num_deg_nodes: stats.num_deg_nodes) {
std::cout << num_deg_nodes << ",";
}
std::cout << stats.min_deg << ",";
std::cout << stats.avg_deg << ",";
std::cout << stats.max_deg;
std::cout << std::endl;
}

Statistics GenerateInternal(const PGeneratorConfig& config) {
Graph graph = LoadGraph(config);
struct StatisticsComputator {
StatisticsComputator(const Configuration& config) : config_(config) {}

Statistics stats;
stats.n = FindNumberOfGlobalNodes(graph.vertex_range, MPI_COMM_WORLD);
stats.m = FindNumberOfGlobalEdges(graph.edges, MPI_COMM_WORLD);
void operator()(const Graph& chunk) {
stats_.m += chunk.edges.size();

const auto degree_stats = ReduceDegreeStatistics(graph.edges, stats.n, MPI_COMM_WORLD);
stats.min_deg = degree_stats.min;
stats.avg_deg = degree_stats.mean;
stats.max_deg = degree_stats.max;
for (const auto& [from, to]: chunk.edges) {
while (degrees_.size() <= from) {
degrees_.push_back(0);
}
++degrees_[from];
}
}

return stats;
}
Statistics Finalize(const Graph& graph) {
FinalizeStreamingStatistics();
return std::move(stats_);
}

Statistics GenerateExternal(const PGeneratorConfig& config, const int num_chunks) {
if (GetCommSize(MPI_COMM_WORLD) > 1) {
std::cerr << "Error: external statistics generation is only supported for a single MPI process\n";
std::exit(1);
Statistics Finalize() {
FinalizeStreamingStatistics();
return std::move(stats_);
}

Statistics stats;
private:
void FinalizeStreamingStatistics() {
stats_.n = degrees_.size();
FinalizeDegreeStatistics();
}

const auto reader = CreateGraphReader(config.input_graph.format, config.input_graph, 0, 1);
auto reported_size = reader->ReadSize();
void FinalizeDegreeStatistics() {
stats_.min_deg = std::numeric_limits<SInt>::max();
stats_.max_deg = std::numeric_limits<SInt>::min();

std::vector<SInt> degrees;
stats_.num_deg_nodes.resize(config_.count_num_deg_nodes.size());
std::fill(stats_.num_deg_nodes.begin(), stats_.num_deg_nodes.end(), 0);

for (int chunk = 0; chunk < num_chunks; ++chunk) {
const auto [from, to] = ComputeRange(reported_size.first, num_chunks, chunk);
Graph graph = reader->Read(from, to, std::numeric_limits<SInt>::max(), GraphRepresentation::EDGE_LIST);
for (SInt node = 0; node < stats_.n; ++node) {
const SInt deg = degrees_[node];
stats_.min_deg = std::min(stats_.min_deg, deg);
stats_.max_deg = std::max(stats_.max_deg, deg);

for (const auto& [from, to]: graph.edges) {
while (degrees.size() <= from) {
degrees.push_back(0);
for (std::size_t i = 0; i < config_.count_num_deg_nodes.size(); ++i) {
stats_.num_deg_nodes[i] += (deg == config_.count_num_deg_nodes[i]);
}
++degrees[from];
}

stats.m += graph.edges.size();
stats_.avg_deg = 1.0 * stats_.m / stats_.n;
}

const auto [min_it, max_it] = std::minmax_element(degrees.begin(), degrees.end());
const Configuration& config_;

stats.n = degrees.size();
stats.min_deg = *min_it;
stats.avg_deg = 1.0 * stats.m / stats.n;
stats.max_deg = *max_it;
std::vector<SInt> degrees_;

return stats;
}
Statistics stats_;
};

int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
Statistics ComputeStatistics(const Configuration& stats_config, const PGeneratorConfig& kagen_config) {
StatisticsComputator computator(stats_config);

std::vector<std::string> input_filenames;
bool do_strip_extension = false;
bool do_no_header = false;
bool do_header_only = false;
int num_chunks = 1;
PGeneratorConfig config;
auto reader = CreateGraphReader(kagen_config.input_graph.format, kagen_config.input_graph, 0, 1);

Graph graph =
ReadGraph(*reader, GraphRepresentation::EDGE_LIST, kagen_config.input_graph, 0, stats_config.num_chunks);
computator(graph);

CLI::App app("graphstats: compute some basic statistics on a graph");
for (int chunk = 1; chunk < stats_config.num_chunks; ++chunk) {
const Graph next_graph = ReadGraph(
*reader, GraphRepresentation::EDGE_LIST, kagen_config.input_graph, chunk, stats_config.num_chunks);
computator(next_graph);
}

if (stats_config.num_chunks == 1) {
graph = FinalizeReadGraph(reader->Deficits(), std::move(graph), false, MPI_COMM_WORLD);
return computator.Finalize(graph);
} else {
return computator.Finalize();
}
}

Configuration parse_cli_arguments(int argc, char* argv[]) {
Configuration config;

CLI::App app("graphstats: compute basic graph statistics");

CLI::Option_group* group = app.add_option_group("Options");
group->require_option(1);
group->add_option("input filenames", input_filenames)->check(CLI::ExistingFile);
group->add_flag("--header-only", do_header_only);
group->add_option("input filenames", config.input_filenames)->check(CLI::ExistingFile);
group->add_flag("--header-only", config.header_only);

app.add_option("-f,--format", config.input_graph.format, "File format of the input file(s).")
->transform(CLI::CheckedTransformer(GetInputFormatMap()));
app.add_flag(
"--strip-extension", do_strip_extension,
"If set, print the filename in the Graph column without file extension.");
app.add_flag("-H,--no-header", do_no_header, "If set, do not print the CSV header line.");
app.add_option(
"-C,--num-chunks", num_chunks,
"-C,--num-chunks", config.num_chunks,
"If set, compute the statistics externally by splitting the graph into this many chunks; some statistics might "
"not be available in this mode. Still requires O(n) memory.");
CLI11_PARSE(app, argc, argv);
app.add_option("-f,--format", config.input_format, "File format of the input file(s).")
->transform(CLI::CheckedTransformer(GetInputFormatMap()));
app.add_flag(
"--strip-extension", config.strip_extension,
"If set, print the filename in the Graph column without file extension.");
app.add_flag("-H,--omit-header", config.omit_header, "If set, do not print the CSV header line.");

app.add_option("--count-degree", config.count_num_deg_nodes, "Count the number of nodes with this degree.");

// Catch special case: only print CSV header line
if ((do_header_only || !do_no_header) && GetCommRank(MPI_COMM_WORLD) == ROOT) {
PrintHeader();
try {
app.parse(argc, argv);
} catch (const CLI::ParseError& e) {
(app).exit(e);
std::exit(1);
}
if (do_header_only) {

return config;
}

int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
if (GetCommSize(MPI_COMM_WORLD) != 1) {
std::cerr << "must be run with just one MPI process\n";
return MPI_Finalize();
}

for (const auto& filename: input_filenames) {
config.input_graph.filename = filename;
Configuration config = parse_cli_arguments(argc, argv);

Statistics stats;
if (num_chunks == 1) {
stats = GenerateInternal(config);
} else {
stats = GenerateExternal(config, num_chunks);
}
if (config.header_only || !config.omit_header) {
PrintHeader(config);
}
if (config.header_only) {
return MPI_Finalize();
}

for (const auto& filename: config.input_filenames) {
PGeneratorConfig kagen_config;
kagen_config.input_graph.filename = filename;
kagen_config.input_graph.format = config.input_format;

stats.name = ExtractFilename(config.input_graph.filename);
if (do_strip_extension) {
Statistics stats = ComputeStatistics(config, kagen_config);
stats.name = ExtractFilename(filename);
if (config.strip_extension) {
stats.name = StripExtension(stats.name);
}

if (GetCommRank(MPI_COMM_WORLD) == ROOT) {
PrintRow(stats);
}
PrintRow(stats);
}

return MPI_Finalize();
Expand Down

0 comments on commit 41e5fce

Please sign in to comment.