Skip to content

Commit

Permalink
Add ChromiumEventLogger, log FXGraphCache and AOTAutogradCache (#132864)
Browse files Browse the repository at this point in the history
Summary:
This PR implements ChromiumEventLogger in all dynamo_timed events. For each dynamo timed call, we log:
- A start event before starting the function execution
- An end event after finishing the function execution
- An extra pair of start/end events for any phase names included in dynamo.

Separately, this also gives us the ability to log instant events. I use them to log cache hits/misses as a first step. The little arrows on the bottom of the UI are cache hits/misses, and you can look at cache details by clicking each triangle.

The outputted chromium trace events can be viewed in perfetto for a timeline of an execution. Here's what it looks like for a run of nanogpt:
![image](https://github.com/user-attachments/assets/cb9e6c7a-1acf-45e6-8a27-6651d9ae6132)

And another with warm start:
![image](https://github.com/user-attachments/assets/cd9709bc-59ef-4da1-a7dd-10b1a0ab9b8f)

Trace events are based around the JSON Event format: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview

We may want to switch to the less deprecated Protobuf format later, but so far I don't see any features we care about supported there.

Internal FB employees can see a link to this in the tlparse output:
https://interncache-all.fbcdn.net/manifold/tlparse_reports/tree/logs/.tmpVi1FIl/dedicated_log_torch_trace_bb4zl_bc.log/index.html

I'll also work on logging these

X-link: pytorch/pytorch#132864
Approved by: https://github.com/aorenste

Reviewed By: fbgheith

Differential Revision: D61069477

Pulled By: jamesjwu

fbshipit-source-id: d9249644d755a0aa4c4280ae8bdaf8163486ead8
  • Loading branch information
jamesjwu authored and facebook-github-bot committed Aug 13, 2024
1 parent 9f7977b commit dfa0fe0
Showing 1 changed file with 105 additions and 0 deletions.
105 changes: 105 additions & 0 deletions userbenchmark/dynamo/dynamobench/_dynamo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,13 @@ def dynamo_timed(
try:
with torch.profiler.record_function(f"{key} (dynamo_timed)"):
t0 = time.time()
ChromiumEventLogger.log_event_start(key, time.time_ns())
if phase_name:
ChromiumEventLogger.log_event_start(phase_name, time.time_ns())
yield
if phase_name:
ChromiumEventLogger.log_event_end(phase_name, time.time_ns())
ChromiumEventLogger.log_event_end(key, time.time_ns())
time_spent = time.time() - t0
compilation_time_metrics[key].append(time_spent)
except Exception as e:
Expand Down Expand Up @@ -799,6 +805,105 @@ def get_compilation_metrics() -> List[Union[CompilationMetrics, BwdCompilationMe
return list(_compilation_metrics)


class ChromiumEventLogger:
"""Logs chromium events to structured logs. tlparse will concatenate these into a perfetto UI link.
See https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw for
a specification of the Chromium Event JSON format.
"""

@staticmethod
def log_event_start(
event_name: str,
time_ns: int,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""
Logs the start of a single event.
:param str event_name Name of event to appear in trace
:param time_ns Timestamp in nanoseconds
:param metadata: Any extra metadata associated with this event
"""
ChromiumEventLogger._log_timed_event(
event_name,
time_ns,
"B",
metadata,
)

@staticmethod
def log_event_end(
event_name: str,
time_ns: int,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""
Logs the end of a single event. This function should only be
called after log_event_start with the same event_name.
:param event_name: Name of event to appear in trace
:param time_ns: Timestamp in nanoseconds
:param metadata: Any extra metadata associated with this event
"""
ChromiumEventLogger._log_timed_event(
event_name,
time_ns,
"E",
metadata,
)

@staticmethod
def _log_timed_event(
event_name: str,
time_ns: int,
phase: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""
Logs a timed event in chromium format. See log_event_start, log_event_end, etc.
"""
event = {
"name": event_name,
"ts": time_ns / 1000, # Chromium events are in ms
"args": metadata,
"ph": phase,
"pid": 0, # pid should be specified on all logs, we don't personally care about the actual process id
}
torch._logging.trace_structured(
"chromium_event",
payload_fn=lambda: event,
suppress_context=False,
expect_trace_id=False, # Not every chromium event will have a trace_id
)

@staticmethod
def log_instant_event(
event_name: str,
time_ns: int,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""
Log an instant event with no associated duration.
:param str event_name: Name of event to appear in trace
:param int time_ns Timestamp in nanoseconds
:param Optional[Dict[str, Any]] metadata: Any extra metadata associated with this event
:param str cname optional color for the arrow in the trace
"""
event = {
"name": event_name,
"ts": time_ns / 1000,
"args": metadata,
"ph": "i",
"pid": 0, # pid should be specified on all logs, we don't personally care about the actual process id
"s": "p", # We use "process" level instant events so they all appear on the same row in the trace.
}
torch._logging.trace_structured(
"chromium_event",
payload_fn=lambda: event,
suppress_context=False,
expect_trace_id=True,
)


@dataclasses.dataclass
class CleanupHook:
"""Remove a global variable when hook is called"""
Expand Down

0 comments on commit dfa0fe0

Please sign in to comment.