Skip to content

Commit

Permalink
[tuner] Use JSON for benchmark output
Browse files Browse the repository at this point in the history
  • Loading branch information
mihaescuvlad committed Oct 9, 2024
1 parent 4e2f351 commit a49ef9c
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 53 deletions.
70 changes: 50 additions & 20 deletions tuner/tuner/libtuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from typing import Type, Optional, Callable, Iterable, Any
import pickle
import random
import json
from abc import ABC, abstractmethod
import iree.runtime as ireert
from . import candidate_gen
Expand Down Expand Up @@ -97,6 +98,7 @@ class PathConfig:
specs_dir: Path = field(init=False)

output_unilog: Path = field(init=False)
output_json: Path = field(init=False)
result_summary_log: Path = field(init=False)
candidate_trackers_pkl: Path = field(init=False)

Expand Down Expand Up @@ -225,7 +227,7 @@ class TaskResult:


@dataclass
class ParsedDisptachBenchmarkResult:
class ParsedDispatchBenchmarkResult:
candidate_id: int
benchmark_time_in_seconds: float
candidate_mlir: Path
Expand All @@ -236,20 +238,29 @@ class ParsedDisptachBenchmarkResult:
class IREEBenchmarkResult:
# Default format follows output of iree-benchmark-module
candidate_id: int
result_str: str
result_json: list

def get_mean_time(self) -> Optional[float]:
if not self.result_str:
return None
pattern = r"process_time/real_time_mean\s+([\d.]+)\s\w{2}"
match = re.search(pattern, self.result_str)
if not match:
return None
try:
return float(match.group(1))
except ValueError:
if not self.result_json:
return None

total_time = 0.0
count = 0

for benchmark in self.result_json:
real_time = benchmark.get("real_time")
if real_time is not None:
try:
total_time += float(real_time)
count += 1
except ValueError:
continue

if count > 0:
return total_time / count

return None


def generate_display_DBR(candidate_id: int, mean_time: float) -> str:
"""Generate dispatch_benchmark_result string for displaying"""
Expand Down Expand Up @@ -611,14 +622,32 @@ def multiprocess_progress_wrapper(
pbar.update(1) # Update progress bar
results.append(result)
except KeyboardInterrupt:
# If Ctrl+C is pressed, terminate all child processes
# If Ctrl+C is pressed, terminate all child process
worker_pool.terminate()
worker_pool.join()
sys.exit(1) # Exit the script

return results


def extract_benchmark_from_run_result(run_result: RunResult) -> Optional[list]:
"""Extract the benchmark from the result JSON"""
if run_result.process_res and run_result.process_res.stdout:
try:
result_json = json.loads(run_result.process_res.stdout)

return result_json.get("benchmarks", None)
except json.JSONDecodeError as e:
handle_error(
condition=True,
msg=f"Failed to parse JSON from stdout: {e}",
error_type=ValueError,
exit_program=True,
)

return None


def numerical_sort_key(path: Path) -> tuple[int | float, str]:
"""
Define a sort key function that splits the filename into a numeric and a string part.
Expand Down Expand Up @@ -882,7 +911,7 @@ def parse_dispatch_benchmark_results(
path_config: PathConfig,
benchmark_results: list[TaskResult],
candidate_trackers: list[CandidateTracker],
) -> tuple[list[ParsedDisptachBenchmarkResult], list[str]]:
) -> tuple[list[ParsedDispatchBenchmarkResult], list[str]]:
benchmark_result_configs = []
dump_list = []
incomplete_list = []
Expand All @@ -896,8 +925,8 @@ def parse_dispatch_benchmark_results(
incomplete_list.append(candidate_id)
continue

res_str = process_res.stdout
res = IREEBenchmarkResult(candidate_id, res_str)
res_json = extract_benchmark_from_run_result(benchmark_result.run_result)
res = IREEBenchmarkResult(candidate_id, res_json)
benchmark_time = res.get_mean_time()
assert benchmark_time is not None
candidate_trackers[candidate_id].first_benchmark_time = benchmark_time
Expand All @@ -913,7 +942,7 @@ def parse_dispatch_benchmark_results(

benchmark_result_configs.append(
(
ParsedDisptachBenchmarkResult(
ParsedDispatchBenchmarkResult(
candidate_id,
benchmark_time,
mlir_path,
Expand Down Expand Up @@ -1169,6 +1198,7 @@ def parse_model_benchmark_results(
tuple[int, Optional[str]]
] = [] # format: [(candidate_id, device_id)]

parsed_model_results = []
baseline_time = None
for same_device_results in grouped_benchmark_results:
dump_unsort_list: list[tuple[float, str]] = []
Expand All @@ -1185,8 +1215,8 @@ def parse_model_benchmark_results(
baseline_time = None
continue

result_str = process_res.stdout
res = IREEBenchmarkResult(candidate_id, result_str)
result_json = extract_benchmark_from_run_result(task_result.run_result)
res = IREEBenchmarkResult(candidate_id, result_json)
benchmark_time = res.get_mean_time()
assert benchmark_time is not None

Expand Down Expand Up @@ -1320,15 +1350,15 @@ def benchmark_models(
)

dump_list = parse_model_benchmark_results(
candidate_trackers, candidate_results, baseline_results
candidate_trackers, candidate_results, baseline_results, path_config
)

append_to_file(
dump_list, filepath=path_config.output_unilog, title="Model Benchmark Results"
)


def summerize_top_candidates(
def summarize_top_candidates(
path_config: PathConfig, candidate_trackers: list[CandidateTracker]
):
dump_list = []
Expand Down
77 changes: 44 additions & 33 deletions tuner/tuner/libtuner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

import argparse
import pytest
from unittest.mock import call, patch, MagicMock
import json
from unittest.mock import ANY, call, patch, MagicMock
from . import libtuner

"""
Usage: python -m pytest test_libtuner.py
Usage: python -m pytest libtuner_test.py
"""


Expand Down Expand Up @@ -58,32 +59,31 @@ def test_collision_handler():

def test_IREEBenchmarkResult_get():
# Time is int
normal_str = r"""
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time 271 us 275 us 3000 items_per_second=3.65611k/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time 274 us 275 us 3000 items_per_second=3.65481k/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time 273 us 275 us 3000 items_per_second=3.65671k/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean 274 us 275 us 3 items_per_second=3.65587k/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean 275 us 275 us 3 items_per_second=3.65611k/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_stddev 0.073 us 0.179 us 3 items_per_second=0.971769/s
BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_cv 0.03 % 0.07 % 3 items_per_second=0.03%
"""
res = libtuner.IREEBenchmarkResult(candidate_id=1, result_str=normal_str)
assert res.get_mean_time() == float(274)
int_json = [
{
"real_time": 1,
}
]

res = libtuner.IREEBenchmarkResult(candidate_id=1, result_json=int_json)
assert res.get_mean_time() == float(1)

# Time is float
res = libtuner.IREEBenchmarkResult(
candidate_id=2,
result_str="process_time/real_time_mean 123.45 us, process_time/real_time_mean 246.78 us",
)
float_json = [
{
"real_time": 123.45,
}
]

res = libtuner.IREEBenchmarkResult(candidate_id=2, result_json=float_json)
assert res.get_mean_time() == 123.45

# Invalid str
res = libtuner.IREEBenchmarkResult(candidate_id=3, result_str="hello world")
# Invalid json
invalid_json = [{"real_time": None}]

res = libtuner.IREEBenchmarkResult(candidate_id=3, result_json=invalid_json)
assert res.get_mean_time() == None
res = libtuner.IREEBenchmarkResult(candidate_id=4, result_str="")
res = libtuner.IREEBenchmarkResult(candidate_id=4, result_json={})
assert res.get_mean_time() == None


Expand All @@ -108,12 +108,15 @@ def test_parse_dispatch_benchmark_results():
spec_dir = base_path / "specs"
path_config = libtuner.PathConfig()
object.__setattr__(path_config, "specs_dir", spec_dir)
object.__setattr__(path_config, "output_json", "output.json")

mock_result_1 = MagicMock()
mock_result_1.run_result.process_res.stdout = "process_time/real_time_mean 100.0 us"
mock_json_1 = {"benchmarks": [{"real_time": 100.0}]}
mock_result_1.run_result.process_res.stdout = json.dumps(mock_json_1)
mock_result_1.candidate_id = 1
mock_result_2 = MagicMock()
mock_result_2.run_result.process_res.stdout = "process_time/real_time_mean 200.0 us"
mock_json_2 = {"benchmarks": [{"real_time": 200.0}]}
mock_result_2.run_result.process_res.stdout = json.dumps(mock_json_2)
mock_result_2.candidate_id = 2
mock_result_3 = MagicMock()
mock_result_3.run_result.process_res = None # Incomplete result
Expand All @@ -127,13 +130,13 @@ def test_parse_dispatch_benchmark_results():
candidate_trackers.append(tracker)

expected_parsed_results = [
libtuner.ParsedDisptachBenchmarkResult(
libtuner.ParsedDispatchBenchmarkResult(
candidate_id=1,
benchmark_time_in_seconds=100.0,
candidate_mlir=libtuner.Path("/mock/mlir/path/1.mlir"),
candidate_spec_mlir=libtuner.Path("/mock/base/dir/specs/1_spec.mlir"),
),
libtuner.ParsedDisptachBenchmarkResult(
libtuner.ParsedDispatchBenchmarkResult(
candidate_id=2,
benchmark_time_in_seconds=200.0,
candidate_mlir=libtuner.Path("/mock/mlir/path/2.mlir"),
Expand Down Expand Up @@ -163,6 +166,9 @@ def test_parse_dispatch_benchmark_results():


def test_parse_model_benchmark_results():
path_config = libtuner.PathConfig()
object.__setattr__(path_config, "output_json", "output.json")

# Setup mock data for candidate_trackers
tracker0 = libtuner.CandidateTracker(0)
tracker0.compiled_model_path = libtuner.Path("/path/to/baseline.vmfb")
Expand All @@ -180,22 +186,26 @@ def test_parse_model_benchmark_results():

# Setup mock data for task results
result1 = MagicMock()
result1.run_result.process_res.stdout = "1.23"
result_json_1 = {"benchmarks": [{"real_time": 1.23}]}
result1.run_result.process_res.stdout = json.dumps(result_json_1)
result1.candidate_id = 1
result1.device_id = "device1"

result2 = MagicMock()
result2.run_result.process_res.stdout = "4.56"
result_json_2 = {"benchmarks": [{"real_time": 4.56}]}
result2.run_result.process_res.stdout = json.dumps(result_json_2)
result2.candidate_id = 2
result2.device_id = "device2"

result3 = MagicMock()
result3.run_result.process_res.stdout = "0.98"
result_json_3 = {"benchmarks": [{"real_time": 0.98}]}
result3.run_result.process_res.stdout = json.dumps(result_json_3)
result3.candidate_id = 0
result3.device_id = "device1"

result4 = MagicMock()
result4.run_result.process_res.stdout = "4.13"
result_json_4 = {"benchmarks": [{"real_time": 4.13}]}
result4.run_result.process_res.stdout = json.dumps(result_json_4)
result4.candidate_id = 0
result4.device_id = "device2"

Expand All @@ -206,7 +216,8 @@ def test_parse_model_benchmark_results():
result5.device_id = "device3"

result6 = MagicMock()
result6.run_result.process_res.stdout = "3.38"
result_json_6 = {"benchmarks": [{"real_time": 3.38}]}
result6.run_result.process_res.stdout = json.dumps(result_json_6)
result6.candidate_id = 3
result6.device_id = "device3"

Expand All @@ -215,7 +226,7 @@ def test_parse_model_benchmark_results():

# Skip real benchmark extraction, directly use given values from above
def mock_get_mean_time(self):
return float(self.result_str) if self.result_str else None
return float(self.result_json[0]["real_time"]) if self.result_json else None

# Mock IREEBenchmarkResult to return wanted benchmark times
with patch(
Expand Down

0 comments on commit a49ef9c

Please sign in to comment.