Skip to content

Commit

Permalink
Support render/compute pass timer queries, overhaul query interface (#56
Browse files Browse the repository at this point in the history
)

* initial support for pass timestamp_writes

* reorganize scope to have less code dupl. But I don't like it since it requires to import a trait. More stuff to try...

* use macro to implement scopes and simplify scope implementing

* make tests pass again

* solve low-level scope parenting with a with_ method

* fix clippy lints

* allow changing settings while scopes are open

* Rename various things scope -> query, make timestamp writes api more safe and better documented

* update docs

* update demo to use manual owning scope
  • Loading branch information
Wumpf authored Dec 3, 2023
1 parent 9b02639 commit 2a458b9
Show file tree
Hide file tree
Showing 9 changed files with 522 additions and 638 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ let mut scope = profiler.scope("name of your scope", &mut encoder, &device);
let mut nested_scope = scope.scope("nested!", &device);

// Scopes on encoders can be used to easily create profiled passes!
let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device, &Default::default());
let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device);

// Scopes expose the underlying encoder or pass they wrap:
compute_pass.set_pipeline(&pipeline);
Expand Down Expand Up @@ -90,9 +90,9 @@ dual licensed as above, without any additional terms or conditions.
* unreleased
* ⚠️ Includes many major breaking changes! ⚠️
* `GpuProfiler` can now be with several command buffers interleaved or in parallel!
* `GpuProfiler::begin_scope` returns a scope and `GpuProfiler::end_scope` consumes it again
* `Scope`/`OwningScope`/`ManualScope`/ are now all top-level in the `gpu_profiler` module
* nesting of profiling scopes is no longer done automatically: `GpuProfiler::begin_scope` now takes an optional reference to a parent scope
* `Scope`/`OwningScope`/`ManualScope`/ are now all top-level in the `gpu_profiler` module. `GpuProfiler` has utilities to create them directly.
* `GpuProfiler::begin_query` returns a query and `GpuProfiler::end_query` consumes it again
* nesting of profiling scopes is no longer done automatically: To manually associate a `GpuProfilerQuery` with a parent, use `GpuProfilerQuery::with_parent`
* removed profiling macro (doesn't work well with the new nesting model)
* `GpuProfiler` can now directly create scope structs using `GpuProfiler::scope`/`owning_scope`
* 0.15
Expand Down
58 changes: 23 additions & 35 deletions examples/demo.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use std::borrow::Cow;
use wgpu_profiler::*;
use wgpu_profiler::{GpuProfiler, GpuProfilerSettings, GpuTimerQueryResult};
use winit::{
event::{Event, VirtualKeyCode, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::Window,
};

fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32) {
fn scopes_to_console_recursive(results: &[GpuTimerQueryResult], indentation: u32) {
for scope in results {
if indentation > 0 {
print!("{:<width$}", "|", width = 4);
Expand All @@ -18,13 +18,13 @@ fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32
scope.label
);

if !scope.nested_scopes.is_empty() {
scopes_to_console_recursive(&scope.nested_scopes, indentation + 1);
if !scope.nested_queries.is_empty() {
scopes_to_console_recursive(&scope.nested_queries, indentation + 1);
}
}
}

fn console_output(results: &Option<Vec<GpuTimerScopeResult>>, enabled_features: wgpu::Features) {
fn console_output(results: &Option<Vec<GpuTimerQueryResult>>, enabled_features: wgpu::Features) {
profiling::scope!("console_output");
print!("\x1B[2J\x1B[1;1H"); // Clear terminal and put cursor to first row first column
println!("Welcome to wgpu_profiler demo!");
Expand Down Expand Up @@ -109,7 +109,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
height: size.height,
// By using the Fifo mode we ensure that CPU waits for GPU, thus we won't have an arbitrary amount of frames in flight that may be discarded.
// Profiler works just fine in any other mode, but keep in mind that this can mean that it would need to buffer up many more frames until the first results are back.
present_mode: wgpu::PresentMode::Fifo,
present_mode: wgpu::PresentMode::Immediate,
alpha_mode: wgpu::CompositeAlphaMode::Auto,
view_formats: vec![swapchain_format],
};
Expand Down Expand Up @@ -252,7 +252,7 @@ fn draw(
let mut rpass = scope.scoped_render_pass(
"render pass top",
device,
&wgpu::RenderPassDescriptor {
wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view,
Expand All @@ -262,9 +262,7 @@ fn draw(
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
..Default::default()
},
);

Expand All @@ -282,8 +280,11 @@ fn draw(
}
}
{
// It's also possible to take timings by hand, manually calling `begin_scope` and `end_scope`.
// It's also possible to take timings by hand, manually calling `begin_query` and `end_query`.
// This is generally not recommended as it's very easy to mess up by accident :)
let pass_scope = profiler
.begin_pass_query("render pass bottom", scope.recorder, device)
.with_parent(scope.scope.as_ref());
let mut rpass = scope
.recorder
.begin_render_pass(&wgpu::RenderPassDescriptor {
Expand All @@ -298,47 +299,34 @@ fn draw(
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
timestamp_writes: pass_scope.render_pass_timestamp_writes(),
});
let pass_scope = profiler.begin_scope(
"render pass bottom",
&mut rpass,
device,
scope.scope.as_ref(),
);

rpass.set_pipeline(render_pipeline);

// The same works on subscopes within the pass.
// Similarly, you can manually manage nested scopes within a render pass.
// Again, to do any actual timing, you need to enable wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES.
{
let scope = profiler.begin_scope("fractal 2", &mut rpass, device, Some(&pass_scope));
let query = profiler
.begin_query("fractal 2", &mut rpass, device)
.with_parent(Some(&pass_scope));
rpass.draw(0..6, 2..3);

// Don't forget to end the scope.
// If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled.
profiler.end_scope(&mut rpass, scope);
// Don't forget to end the query!
profiler.end_query(&mut rpass, query);
}
// Another manual variant, is to create a `ManualOwningScope` explicitly.
// Another variant is to use `ManualOwningScope`, forming a middle ground between no scope helpers and fully automatic scope closing.
let mut rpass = {
let mut rpass = wgpu_profiler::ManualOwningScope::start_nested(
"fractal 3",
profiler,
rpass,
device,
Some(&pass_scope),
);
let mut rpass = profiler.manual_owning_scope("fractal 3", rpass, device);
rpass.draw(0..6, 3..4);

// Don't forget to end the scope.
// If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled.
// Ending a `ManualOwningScope` will return the pass or encoder it owned.
rpass.end_scope()
rpass.end_query()
};

// Don't forget to end the scope.
// If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled.
profiler.end_scope(&mut rpass, pass_scope);
profiler.end_query(&mut rpass, pass_scope);
}
}

Expand Down
16 changes: 8 additions & 8 deletions src/chrometrace.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::{fs::File, io::Write, path::Path};

use crate::GpuTimerScopeResult;
use crate::GpuTimerQueryResult;

/// Writes a .json trace file that can be viewed as a flame graph in Chrome or Edge via <chrome://tracing>
pub fn write_chrometrace(
target: &Path,
profile_data: &[GpuTimerScopeResult],
profile_data: &[GpuTimerQueryResult],
) -> std::io::Result<()> {
let mut file = File::create(target)?;

Expand All @@ -27,7 +27,7 @@ pub fn write_chrometrace(

fn write_results_recursive(
file: &mut File,
result: &GpuTimerScopeResult,
result: &GpuTimerQueryResult,
last: bool,
) -> std::io::Result<()> {
// note: ThreadIds are under the control of Rust’s standard library
Expand All @@ -52,24 +52,24 @@ fn write_results_recursive(
result.time.start * 1000.0 * 1000.0,
(result.time.end - result.time.start) * 1000.0 * 1000.0,
result.label,
if last && result.nested_scopes.is_empty() {
if last && result.nested_queries.is_empty() {
"\n"
} else {
",\n"
}
)?;
if result.nested_scopes.is_empty() {
if result.nested_queries.is_empty() {
return Ok(());
}

for child in result
.nested_scopes
.nested_queries
.iter()
.take(result.nested_scopes.len() - 1)
.take(result.nested_queries.len() - 1)
{
write_results_recursive(file, child, false)?;
}
write_results_recursive(file, result.nested_scopes.last().unwrap(), last)?;
write_results_recursive(file, result.nested_queries.last().unwrap(), last)?;

Ok(())
// { "pid":1, "tid":1, "ts":546867, "dur":121564, "ph":"X", "name":"DoThings"
Expand Down
9 changes: 3 additions & 6 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,17 @@ impl Eq for CreationError {}
pub enum SettingsError {
#[error("GpuProfilerSettings::max_num_pending_frames must be at least 1.")]
InvalidMaxNumPendingFrames,

#[error("Can't change settings while there's open profiling scopes.")]
HasOpenScopes,
}

/// Errors that can occur during [`crate::GpuProfiler::end_frame`].
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum EndFrameError {
#[error("All profiling scopes need to be closed before ending a frame. There were still {0} open scopes.")]
UnclosedScopes(u32),
#[error("All profiling queries need to be closed before ending a frame. There were still {0} open queries.")]
UnclosedQueries(u32),

#[error(
"Not all queries were resolved before ending a frame.\n
Call `GpuProfiler::resolve_queries` after all profiling scopes have been closed and before ending the frame.\n
Call `GpuProfiler::resolve_queries` after all profiling queries have been closed and before ending the frame.\n
There were still {0} queries unresolved."
)]
UnresolvedQueries(u32),
Expand Down
Loading

0 comments on commit 2a458b9

Please sign in to comment.