-
Notifications
You must be signed in to change notification settings - Fork 5
/
memory_profile_utils.py
73 lines (62 loc) · 2.37 KB
/
memory_profile_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gc
import subprocess as sp
import psutil
import torch
from deepspeed.utils import logger
def print_memory_with_message(rank, device, message: str = None):
if rank == 0:
if None is not None:
logger.info("=====" * 2 + message + "=====" * 2 + "\n")
get_nvidia_gpu_memory()
torch.distributed.barrier()
gpu_memory_plot_helper(rank, device, message)
torch.distributed.barrier()
def get_nvidia_gpu_memory():
output_to_list = lambda x: x.decode("ascii").split("\n")[:-1]
ACCEPTABLE_AVAILABLE_MEMORY = 1024
COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
try:
memory_use_info = output_to_list(
sp.check_output(COMMAND.split(), stderr=sp.STDOUT)
)[1:]
except sp.CalledProcessError as e:
raise RuntimeError(
"command '{}' return with error (code {}): {}".format(
e.cmd, e.returncode, e.output
)
)
memory_use_values = [int(x.split()[0]) for i, x in enumerate(memory_use_info)]
logger.info(
f"""
nvidia-smi info
{memory_use_values}"""
)
def gpu_memory_plot_helper(
rank,
device,
use_deepspeed_memory_usage_helper: bool = False,
):
"""
DeepSpeed has its own memory profiler, but we add this for customization in the future.
https://github.com/microsoft/DeepSpeed/blob/ff7d5275f2aa916cb5f320e0d817154e96f9cdb6/deepspeed/runtime/utils.py#L793
"""
gc.collect()
torch.cuda.synchronize(
device
) # https://pytorch.org/docs/stable/generated/torch.cuda.synchronize.html
allocated = torch.cuda.memory_allocated(device) / (1024**2)
max_allocated = torch.cuda.max_memory_allocated(device) / (1024**2)
reserved = torch.cuda.memory_reserved(device) / (1024**2)
max_reserved = torch.cuda.max_memory_reserved(device) / (1024**2)
vm_stats = psutil.virtual_memory()
used_GB = round(((vm_stats.total - vm_stats.available) / (1024**3)), 2)
logger.info(
f"""
rank: {rank} / device: {device}
CPU Virtual Memory: used = {used_GB} GB, percent = {vm_stats.percent}%
Allocated / Reserved: {allocated:.2f}MB / {reserved:.2f}MB
Max Allocated / Max Reserved: {max_allocated:.2f}MB / {max_reserved:.2f}MB"""
)
torch.cuda.reset_peak_memory_stats(
device
) # https://pytorch.org/docs/stable/generated/torch.cuda.reset_peak_memory_stats.html