[tuner] Use JSON for benchmark output

nod-ai · Oct 9, 2024 · a49ef9c · a49ef9c
1 parent 4e2f351
commit a49ef9c
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 53 deletions.
diff --git a/tuner/tuner/libtuner.py b/tuner/tuner/libtuner.py
@@ -36,6 +36,7 @@
 from typing import Type, Optional, Callable, Iterable, Any
 import pickle
 import random
+import json
 from abc import ABC, abstractmethod
 import iree.runtime as ireert
 from . import candidate_gen
@@ -97,6 +98,7 @@ class PathConfig:
     specs_dir: Path = field(init=False)
 
     output_unilog: Path = field(init=False)
+    output_json: Path = field(init=False)
     result_summary_log: Path = field(init=False)
     candidate_trackers_pkl: Path = field(init=False)
 
@@ -225,7 +227,7 @@ class TaskResult:
 
 
 @dataclass
-class ParsedDisptachBenchmarkResult:
+class ParsedDispatchBenchmarkResult:
     candidate_id: int
     benchmark_time_in_seconds: float
     candidate_mlir: Path
@@ -236,20 +238,29 @@ class ParsedDisptachBenchmarkResult:
 class IREEBenchmarkResult:
     # Default format follows output of iree-benchmark-module
     candidate_id: int
-    result_str: str
+    result_json: list
 
     def get_mean_time(self) -> Optional[float]:
-        if not self.result_str:
-            return None
-        pattern = r"process_time/real_time_mean\s+([\d.]+)\s\w{2}"
-        match = re.search(pattern, self.result_str)
-        if not match:
-            return None
-        try:
-            return float(match.group(1))
-        except ValueError:
+        if not self.result_json:
             return None
 
+        total_time = 0.0
+        count = 0
+
+        for benchmark in self.result_json:
+            real_time = benchmark.get("real_time")
+            if real_time is not None:
+                try:
+                    total_time += float(real_time)
+                    count += 1
+                except ValueError:
+                    continue
+
+        if count > 0:
+            return total_time / count
+
+        return None
+
 
 def generate_display_DBR(candidate_id: int, mean_time: float) -> str:
     """Generate dispatch_benchmark_result string for displaying"""
@@ -611,14 +622,32 @@ def multiprocess_progress_wrapper(
                     pbar.update(1)  # Update progress bar
                     results.append(result)
             except KeyboardInterrupt:
-                # If Ctrl+C is pressed, terminate all child processes
+                # If Ctrl+C is pressed, terminate all child process
                 worker_pool.terminate()
                 worker_pool.join()
                 sys.exit(1)  # Exit the script
 
     return results
 
 
+def extract_benchmark_from_run_result(run_result: RunResult) -> Optional[list]:
+    """Extract the benchmark from the result JSON"""
+    if run_result.process_res and run_result.process_res.stdout:
+        try:
+            result_json = json.loads(run_result.process_res.stdout)
+
+            return result_json.get("benchmarks", None)
+        except json.JSONDecodeError as e:
+            handle_error(
+                condition=True,
+                msg=f"Failed to parse JSON from stdout: {e}",
+                error_type=ValueError,
+                exit_program=True,
+            )
+
+    return None
+
+
 def numerical_sort_key(path: Path) -> tuple[int | float, str]:
     """
     Define a sort key function that splits the filename into a numeric and a string part.
@@ -882,7 +911,7 @@ def parse_dispatch_benchmark_results(
     path_config: PathConfig,
     benchmark_results: list[TaskResult],
     candidate_trackers: list[CandidateTracker],
-) -> tuple[list[ParsedDisptachBenchmarkResult], list[str]]:
+) -> tuple[list[ParsedDispatchBenchmarkResult], list[str]]:
     benchmark_result_configs = []
     dump_list = []
     incomplete_list = []
@@ -896,8 +925,8 @@ def parse_dispatch_benchmark_results(
                 incomplete_list.append(candidate_id)
             continue
 
-        res_str = process_res.stdout
-        res = IREEBenchmarkResult(candidate_id, res_str)
+        res_json = extract_benchmark_from_run_result(benchmark_result.run_result)
+        res = IREEBenchmarkResult(candidate_id, res_json)
         benchmark_time = res.get_mean_time()
         assert benchmark_time is not None
         candidate_trackers[candidate_id].first_benchmark_time = benchmark_time
@@ -913,7 +942,7 @@ def parse_dispatch_benchmark_results(
 
         benchmark_result_configs.append(
             (
-                ParsedDisptachBenchmarkResult(
+                ParsedDispatchBenchmarkResult(
                     candidate_id,
                     benchmark_time,
                     mlir_path,
@@ -1169,6 +1198,7 @@ def parse_model_benchmark_results(
         tuple[int, Optional[str]]
     ] = []  # format: [(candidate_id, device_id)]
 
+    parsed_model_results = []
     baseline_time = None
     for same_device_results in grouped_benchmark_results:
         dump_unsort_list: list[tuple[float, str]] = []
@@ -1185,8 +1215,8 @@ def parse_model_benchmark_results(
                     baseline_time = None
                 continue
 
-            result_str = process_res.stdout
-            res = IREEBenchmarkResult(candidate_id, result_str)
+            result_json = extract_benchmark_from_run_result(task_result.run_result)
+            res = IREEBenchmarkResult(candidate_id, result_json)
             benchmark_time = res.get_mean_time()
             assert benchmark_time is not None
 
@@ -1320,15 +1350,15 @@ def benchmark_models(
         )
 
     dump_list = parse_model_benchmark_results(
-        candidate_trackers, candidate_results, baseline_results
+        candidate_trackers, candidate_results, baseline_results, path_config
     )
 
     append_to_file(
         dump_list, filepath=path_config.output_unilog, title="Model Benchmark Results"
     )
 
 
-def summerize_top_candidates(
+def summarize_top_candidates(
     path_config: PathConfig, candidate_trackers: list[CandidateTracker]
 ):
     dump_list = []

diff --git a/tuner/tuner/libtuner_test.py b/tuner/tuner/libtuner_test.py
@@ -6,11 +6,12 @@
 
 import argparse
 import pytest
-from unittest.mock import call, patch, MagicMock
+import json
+from unittest.mock import ANY, call, patch, MagicMock
 from . import libtuner
 
 """
-Usage: python -m pytest test_libtuner.py
+Usage: python -m pytest libtuner_test.py
 """
 
 
@@ -58,32 +59,31 @@ def test_collision_handler():
 
 def test_IREEBenchmarkResult_get():
     # Time is int
-    normal_str = r"""
-    ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-    Benchmark                                                                                                                                      Time             CPU   Iterations UserCounters...
-    ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time               271 us          275 us         3000 items_per_second=3.65611k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time               274 us          275 us         3000 items_per_second=3.65481k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time               273 us          275 us         3000 items_per_second=3.65671k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean          274 us          275 us            3 items_per_second=3.65587k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean        275 us          275 us            3 items_per_second=3.65611k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_stddev      0.073 us        0.179 us            3 items_per_second=0.971769/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_cv           0.03 %          0.07 %             3 items_per_second=0.03%
-    """
-    res = libtuner.IREEBenchmarkResult(candidate_id=1, result_str=normal_str)
-    assert res.get_mean_time() == float(274)
+    int_json = [
+        {
+            "real_time": 1,
+        }
+    ]
+
+    res = libtuner.IREEBenchmarkResult(candidate_id=1, result_json=int_json)
+    assert res.get_mean_time() == float(1)
 
     # Time is float
-    res = libtuner.IREEBenchmarkResult(
-        candidate_id=2,
-        result_str="process_time/real_time_mean 123.45 us, process_time/real_time_mean 246.78 us",
-    )
+    float_json = [
+        {
+            "real_time": 123.45,
+        }
+    ]
+
+    res = libtuner.IREEBenchmarkResult(candidate_id=2, result_json=float_json)
     assert res.get_mean_time() == 123.45
 
-    # Invalid str
-    res = libtuner.IREEBenchmarkResult(candidate_id=3, result_str="hello world")
+    # Invalid json
+    invalid_json = [{"real_time": None}]
+
+    res = libtuner.IREEBenchmarkResult(candidate_id=3, result_json=invalid_json)
     assert res.get_mean_time() == None
-    res = libtuner.IREEBenchmarkResult(candidate_id=4, result_str="")
+    res = libtuner.IREEBenchmarkResult(candidate_id=4, result_json={})
     assert res.get_mean_time() == None
 
 
@@ -108,12 +108,15 @@ def test_parse_dispatch_benchmark_results():
     spec_dir = base_path / "specs"
     path_config = libtuner.PathConfig()
     object.__setattr__(path_config, "specs_dir", spec_dir)
+    object.__setattr__(path_config, "output_json", "output.json")
 
     mock_result_1 = MagicMock()
-    mock_result_1.run_result.process_res.stdout = "process_time/real_time_mean 100.0 us"
+    mock_json_1 = {"benchmarks": [{"real_time": 100.0}]}
+    mock_result_1.run_result.process_res.stdout = json.dumps(mock_json_1)
     mock_result_1.candidate_id = 1
     mock_result_2 = MagicMock()
-    mock_result_2.run_result.process_res.stdout = "process_time/real_time_mean 200.0 us"
+    mock_json_2 = {"benchmarks": [{"real_time": 200.0}]}
+    mock_result_2.run_result.process_res.stdout = json.dumps(mock_json_2)
     mock_result_2.candidate_id = 2
     mock_result_3 = MagicMock()
     mock_result_3.run_result.process_res = None  # Incomplete result
@@ -127,13 +130,13 @@ def test_parse_dispatch_benchmark_results():
         candidate_trackers.append(tracker)
 
     expected_parsed_results = [
-        libtuner.ParsedDisptachBenchmarkResult(
+        libtuner.ParsedDispatchBenchmarkResult(
             candidate_id=1,
             benchmark_time_in_seconds=100.0,
             candidate_mlir=libtuner.Path("/mock/mlir/path/1.mlir"),
             candidate_spec_mlir=libtuner.Path("/mock/base/dir/specs/1_spec.mlir"),
         ),
-        libtuner.ParsedDisptachBenchmarkResult(
+        libtuner.ParsedDispatchBenchmarkResult(
             candidate_id=2,
             benchmark_time_in_seconds=200.0,
             candidate_mlir=libtuner.Path("/mock/mlir/path/2.mlir"),
@@ -163,6 +166,9 @@ def test_parse_dispatch_benchmark_results():
 
 
 def test_parse_model_benchmark_results():
+    path_config = libtuner.PathConfig()
+    object.__setattr__(path_config, "output_json", "output.json")
+
     # Setup mock data for candidate_trackers
     tracker0 = libtuner.CandidateTracker(0)
     tracker0.compiled_model_path = libtuner.Path("/path/to/baseline.vmfb")
@@ -180,22 +186,26 @@ def test_parse_model_benchmark_results():
 
     # Setup mock data for task results
     result1 = MagicMock()
-    result1.run_result.process_res.stdout = "1.23"
+    result_json_1 = {"benchmarks": [{"real_time": 1.23}]}
+    result1.run_result.process_res.stdout = json.dumps(result_json_1)
     result1.candidate_id = 1
     result1.device_id = "device1"
 
     result2 = MagicMock()
-    result2.run_result.process_res.stdout = "4.56"
+    result_json_2 = {"benchmarks": [{"real_time": 4.56}]}
+    result2.run_result.process_res.stdout = json.dumps(result_json_2)
     result2.candidate_id = 2
     result2.device_id = "device2"
 
     result3 = MagicMock()
-    result3.run_result.process_res.stdout = "0.98"
+    result_json_3 = {"benchmarks": [{"real_time": 0.98}]}
+    result3.run_result.process_res.stdout = json.dumps(result_json_3)
     result3.candidate_id = 0
     result3.device_id = "device1"
 
     result4 = MagicMock()
-    result4.run_result.process_res.stdout = "4.13"
+    result_json_4 = {"benchmarks": [{"real_time": 4.13}]}
+    result4.run_result.process_res.stdout = json.dumps(result_json_4)
     result4.candidate_id = 0
     result4.device_id = "device2"
 
@@ -206,7 +216,8 @@ def test_parse_model_benchmark_results():
     result5.device_id = "device3"
 
     result6 = MagicMock()
-    result6.run_result.process_res.stdout = "3.38"
+    result_json_6 = {"benchmarks": [{"real_time": 3.38}]}
+    result6.run_result.process_res.stdout = json.dumps(result_json_6)
     result6.candidate_id = 3
     result6.device_id = "device3"
 
@@ -215,7 +226,7 @@ def test_parse_model_benchmark_results():
 
     # Skip real benchmark extraction, directly use given values from above
     def mock_get_mean_time(self):
-        return float(self.result_str) if self.result_str else None
+        return float(self.result_json[0]["real_time"]) if self.result_json else None
 
     # Mock IREEBenchmarkResult to return wanted benchmark times
     with patch(