diff --git a/run.py b/run.py index 927f1472a..c41d90d83 100644 --- a/run.py +++ b/run.py @@ -477,15 +477,15 @@ def main() -> None: ) parser.add_argument( "--metrics-gpu-backend", - choices=["dcgm", "default", "nvml"], - default="default", + choices=["torch", "nvml", "dcgm"], + default="torch", help=""" - Specify the backend [dcgm, default, nvml] to collect metrics. In all modes, + Specify the backend [torch, nvml, dcgm] to collect metrics. In all modes, the latency (execution time) is always collected using `time.time_ns()`. The CPU and GPU peak memory usage metrics are optional. The CPU peak memory usage is collected by `psutil.Process()` in all modes. In nvml mode, the GPU peak memory usage is collected by the `nvml` library. In dcgm mode, the GPU peak memory usage is - collected by the `dcgm` library. In default mode, the GPU peak memory usage is collected + collected by the `dcgm` library. In torch mode, the GPU peak memory usage is collected by `torch.cuda.max_memory_allocated()`. """, ) diff --git a/torchbenchmark/util/experiment/metrics.py b/torchbenchmark/util/experiment/metrics.py index 7e7684caf..fd206b22a 100644 --- a/torchbenchmark/util/experiment/metrics.py +++ b/torchbenchmark/util/experiment/metrics.py @@ -71,7 +71,7 @@ def get_peak_memory( num_iter=MEMPROF_ITER, export_metrics_file="", metrics_needed=[], - metrics_gpu_backend="default", + metrics_gpu_backend="torch", cpu_monitored_pid=None, ) -> Tuple[Optional[float], Optional[str], Optional[float]]: "Run one step of the model, and return the peak memory in MB." diff --git a/userbenchmark/triton/run.py b/userbenchmark/triton/run.py index 033d160b3..f79ebb685 100644 --- a/userbenchmark/triton/run.py +++ b/userbenchmark/triton/run.py @@ -94,13 +94,13 @@ def get_parser(args=None): ) parser.add_argument( "--metrics-gpu-backend", - choices=["default", "nvml"], - default="default", + choices=["torch", "nvml"], + default="torch", help=( - "Specify the backend [default, nvml] to collect metrics. In all modes, the latency " + "Specify the backend [torch, nvml] to collect metrics. In all modes, the latency " "(execution time) is always collected using `time.time_ns()`. The CPU peak memory " "usage is collected by `psutil.Process()`. In nvml mode, the GPU peak memory usage " - "is collected by the `nvml` library. In default mode, the GPU peak memory usage is " + "is collected by the `nvml` library. In torch mode, the GPU peak memory usage is " "collected by `torch.cuda.max_memory_allocated()`." ), )