From 2a458b971ce95bffc6ce7e63546611e30b684a4c Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 3 Dec 2023 20:45:00 +0100 Subject: [PATCH] Support render/compute pass timer queries, overhaul query interface (#56) * initial support for pass timestamp_writes * reorganize scope to have less code dupl. But I don't like it since it requires to import a trait. More stuff to try... * use macro to implement scopes and simplify scope implementing * make tests pass again * solve low-level scope parenting with a with_ method * fix clippy lints * allow changing settings while scopes are open * Rename various things scope -> query, make timestamp writes api more safe and better documented * update docs * update demo to use manual owning scope --- README.md | 8 +- examples/demo.rs | 58 ++--- src/chrometrace.rs | 16 +- src/errors.rs | 9 +- src/lib.rs | 511 +++++++++++++++++++++++-------------- src/scope.rs | 500 ++++++++++-------------------------- tests/src/errors.rs | 26 +- tests/src/mod.rs | 8 +- tests/src/nested_scopes.rs | 24 +- 9 files changed, 522 insertions(+), 638 deletions(-) diff --git a/README.md b/README.md index 16a1005..de3017c 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ let mut scope = profiler.scope("name of your scope", &mut encoder, &device); let mut nested_scope = scope.scope("nested!", &device); // Scopes on encoders can be used to easily create profiled passes! -let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device, &Default::default()); +let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device); // Scopes expose the underlying encoder or pass they wrap: compute_pass.set_pipeline(&pipeline); @@ -90,9 +90,9 @@ dual licensed as above, without any additional terms or conditions. * unreleased * ⚠️ Includes many major breaking changes! ⚠️ * `GpuProfiler` can now be with several command buffers interleaved or in parallel! - * `GpuProfiler::begin_scope` returns a scope and `GpuProfiler::end_scope` consumes it again - * `Scope`/`OwningScope`/`ManualScope`/ are now all top-level in the `gpu_profiler` module - * nesting of profiling scopes is no longer done automatically: `GpuProfiler::begin_scope` now takes an optional reference to a parent scope + * `Scope`/`OwningScope`/`ManualScope`/ are now all top-level in the `gpu_profiler` module. `GpuProfiler` has utilities to create them directly. + * `GpuProfiler::begin_query` returns a query and `GpuProfiler::end_query` consumes it again + * nesting of profiling scopes is no longer done automatically: To manually associate a `GpuProfilerQuery` with a parent, use `GpuProfilerQuery::with_parent` * removed profiling macro (doesn't work well with the new nesting model) * `GpuProfiler` can now directly create scope structs using `GpuProfiler::scope`/`owning_scope` * 0.15 diff --git a/examples/demo.rs b/examples/demo.rs index cf2ed0d..ec36231 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -1,12 +1,12 @@ use std::borrow::Cow; -use wgpu_profiler::*; +use wgpu_profiler::{GpuProfiler, GpuProfilerSettings, GpuTimerQueryResult}; use winit::{ event::{Event, VirtualKeyCode, WindowEvent}, event_loop::{ControlFlow, EventLoop}, window::Window, }; -fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32) { +fn scopes_to_console_recursive(results: &[GpuTimerQueryResult], indentation: u32) { for scope in results { if indentation > 0 { print!("{:>, enabled_features: wgpu::Features) { +fn console_output(results: &Option>, enabled_features: wgpu::Features) { profiling::scope!("console_output"); print!("\x1B[2J\x1B[1;1H"); // Clear terminal and put cursor to first row first column println!("Welcome to wgpu_profiler demo!"); @@ -109,7 +109,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { height: size.height, // By using the Fifo mode we ensure that CPU waits for GPU, thus we won't have an arbitrary amount of frames in flight that may be discarded. // Profiler works just fine in any other mode, but keep in mind that this can mean that it would need to buffer up many more frames until the first results are back. - present_mode: wgpu::PresentMode::Fifo, + present_mode: wgpu::PresentMode::Immediate, alpha_mode: wgpu::CompositeAlphaMode::Auto, view_formats: vec![swapchain_format], }; @@ -252,7 +252,7 @@ fn draw( let mut rpass = scope.scoped_render_pass( "render pass top", device, - &wgpu::RenderPassDescriptor { + wgpu::RenderPassDescriptor { label: None, color_attachments: &[Some(wgpu::RenderPassColorAttachment { view, @@ -262,9 +262,7 @@ fn draw( store: wgpu::StoreOp::Store, }, })], - depth_stencil_attachment: None, - occlusion_query_set: None, - timestamp_writes: None, + ..Default::default() }, ); @@ -282,8 +280,11 @@ fn draw( } } { - // It's also possible to take timings by hand, manually calling `begin_scope` and `end_scope`. + // It's also possible to take timings by hand, manually calling `begin_query` and `end_query`. // This is generally not recommended as it's very easy to mess up by accident :) + let pass_scope = profiler + .begin_pass_query("render pass bottom", scope.recorder, device) + .with_parent(scope.scope.as_ref()); let mut rpass = scope .recorder .begin_render_pass(&wgpu::RenderPassDescriptor { @@ -298,47 +299,34 @@ fn draw( })], depth_stencil_attachment: None, occlusion_query_set: None, - timestamp_writes: None, + timestamp_writes: pass_scope.render_pass_timestamp_writes(), }); - let pass_scope = profiler.begin_scope( - "render pass bottom", - &mut rpass, - device, - scope.scope.as_ref(), - ); rpass.set_pipeline(render_pipeline); - // The same works on subscopes within the pass. + // Similarly, you can manually manage nested scopes within a render pass. // Again, to do any actual timing, you need to enable wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES. { - let scope = profiler.begin_scope("fractal 2", &mut rpass, device, Some(&pass_scope)); + let query = profiler + .begin_query("fractal 2", &mut rpass, device) + .with_parent(Some(&pass_scope)); rpass.draw(0..6, 2..3); - // Don't forget to end the scope. - // If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled. - profiler.end_scope(&mut rpass, scope); + // Don't forget to end the query! + profiler.end_query(&mut rpass, query); } - // Another manual variant, is to create a `ManualOwningScope` explicitly. + // Another variant is to use `ManualOwningScope`, forming a middle ground between no scope helpers and fully automatic scope closing. let mut rpass = { - let mut rpass = wgpu_profiler::ManualOwningScope::start_nested( - "fractal 3", - profiler, - rpass, - device, - Some(&pass_scope), - ); + let mut rpass = profiler.manual_owning_scope("fractal 3", rpass, device); rpass.draw(0..6, 3..4); // Don't forget to end the scope. - // If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled. // Ending a `ManualOwningScope` will return the pass or encoder it owned. - rpass.end_scope() + rpass.end_query() }; // Don't forget to end the scope. - // If you drop a manually created profiling scope without calling `end_scope` we'll panic if debug assertions are enabled. - profiler.end_scope(&mut rpass, pass_scope); + profiler.end_query(&mut rpass, pass_scope); } } diff --git a/src/chrometrace.rs b/src/chrometrace.rs index f8db45f..82e38c0 100644 --- a/src/chrometrace.rs +++ b/src/chrometrace.rs @@ -1,11 +1,11 @@ use std::{fs::File, io::Write, path::Path}; -use crate::GpuTimerScopeResult; +use crate::GpuTimerQueryResult; /// Writes a .json trace file that can be viewed as a flame graph in Chrome or Edge via pub fn write_chrometrace( target: &Path, - profile_data: &[GpuTimerScopeResult], + profile_data: &[GpuTimerQueryResult], ) -> std::io::Result<()> { let mut file = File::create(target)?; @@ -27,7 +27,7 @@ pub fn write_chrometrace( fn write_results_recursive( file: &mut File, - result: &GpuTimerScopeResult, + result: &GpuTimerQueryResult, last: bool, ) -> std::io::Result<()> { // note: ThreadIds are under the control of Rust’s standard library @@ -52,24 +52,24 @@ fn write_results_recursive( result.time.start * 1000.0 * 1000.0, (result.time.end - result.time.start) * 1000.0 * 1000.0, result.label, - if last && result.nested_scopes.is_empty() { + if last && result.nested_queries.is_empty() { "\n" } else { ",\n" } )?; - if result.nested_scopes.is_empty() { + if result.nested_queries.is_empty() { return Ok(()); } for child in result - .nested_scopes + .nested_queries .iter() - .take(result.nested_scopes.len() - 1) + .take(result.nested_queries.len() - 1) { write_results_recursive(file, child, false)?; } - write_results_recursive(file, result.nested_scopes.last().unwrap(), last)?; + write_results_recursive(file, result.nested_queries.last().unwrap(), last)?; Ok(()) // { "pid":1, "tid":1, "ts":546867, "dur":121564, "ph":"X", "name":"DoThings" diff --git a/src/errors.rs b/src/errors.rs index 2af761a..b0ab496 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -44,20 +44,17 @@ impl Eq for CreationError {} pub enum SettingsError { #[error("GpuProfilerSettings::max_num_pending_frames must be at least 1.")] InvalidMaxNumPendingFrames, - - #[error("Can't change settings while there's open profiling scopes.")] - HasOpenScopes, } /// Errors that can occur during [`crate::GpuProfiler::end_frame`]. #[derive(thiserror::Error, Debug, PartialEq, Eq)] pub enum EndFrameError { - #[error("All profiling scopes need to be closed before ending a frame. There were still {0} open scopes.")] - UnclosedScopes(u32), + #[error("All profiling queries need to be closed before ending a frame. There were still {0} open queries.")] + UnclosedQueries(u32), #[error( "Not all queries were resolved before ending a frame.\n -Call `GpuProfiler::resolve_queries` after all profiling scopes have been closed and before ending the frame.\n +Call `GpuProfiler::resolve_queries` after all profiling queries have been closed and before ending the frame.\n There were still {0} queries unresolved." )] UnresolvedQueries(u32), diff --git a/src/lib.rs b/src/lib.rs index 4944383..0a0caa8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,7 @@ let mut profiler = GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); let mut nested_scope = scope.scope("nested!", &device); // Scopes on encoders can be used to easily create profiled passes! - let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device, &Default::default()); + let mut compute_pass = nested_scope.scoped_compute_pass("profiled compute", &device); // Scopes expose the underlying encoder or pass they wrap: @@ -123,7 +123,7 @@ use parking_lot::{Mutex, RwLock}; /// The result of a gpu timer scope. #[derive(Debug, Clone)] -pub struct GpuTimerScopeResult { +pub struct GpuTimerQueryResult { /// Label that was specified when opening the scope. pub label: String, @@ -139,70 +139,102 @@ pub struct GpuTimerScopeResult { pub time: Range, /// Scopes that were opened while this scope was open. - pub nested_scopes: Vec, + pub nested_queries: Vec, } -/// An in-flight GPU timer scope. +/// An inflight query for the profiler. /// -/// *Must* be closed by calling [`GpuProfiler::end_scope`]. -/// Will cause debug assertion if dropped without being closed. +/// If timer queries are enabled, this represents a reserved timer query pair on +/// one of the profiler's query sets. +/// *Must* be closed by calling [`GpuProfiler::end_query`]. /// -/// Emitted by [`GpuProfiler::begin_scope`] and consumed by [`GpuProfiler::end_scope`]. -pub struct GpuTimerScope { - /// The label assigned to this scope. - /// Will be moved into [`GpuTimerScopeResult::label`] once the scope is fully processed. +/// Emitted by [`GpuProfiler::begin_query`]/[`GpuProfiler::begin_pass_query`] and consumed by [`GpuProfiler::end_query`]. +pub struct GpuProfilerQuery { + /// The label assigned to this query. + /// Will be moved into [`GpuProfilerQuery::label`] once the query is fully processed. pub label: String, - /// The process id of the process that opened this scope. + /// The process id of the process that opened this query. pub pid: u32, - /// The thread id of the thread that opened this scope. + /// The thread id of the thread that opened this query. pub tid: ThreadId, - /// The actual query on a query pool if any (none if disabled for this type of scope). - query: Option, + /// The actual query on a query pool if any (none if disabled for this type of query). + timer_query_pair: Option, + + /// Handle which identifies this query, used for building the tree of queries. + handle: GpuTimerQueryTreeHandle, - /// Handle which identifies this scope, used for building the tree of scopes. - handle: GpuTimerScopeTreeHandle, + /// Which query this query is a child of. + parent_handle: GpuTimerQueryTreeHandle, - /// Which scope this scope is a child of. - parent_handle: GpuTimerScopeTreeHandle, + /// Whether a debug group was opened for this scope. + has_debug_group: bool, #[cfg(feature = "tracy")] tracy_scope: Option, +} - /// For debugging only, tracks if the scope got closed already. +impl GpuProfilerQuery { + /// Use the reserved query for render pass timestamp writes if any. /// - /// Scopes aren't allowed to be dropped without being closed first. - #[cfg(debug_assertions)] - was_closed: bool, -} + /// Use this only for a single render/compute pass, otherwise results will be overwritten. + /// Only ever returns `Some` for queries that were created using [`GpuProfiler::begin_pass_query`]. + pub fn render_pass_timestamp_writes(&self) -> Option { + self.timer_query_pair.as_ref().and_then(|query| { + (query.usage_state == QueryPairUsageState::ReservedForPassTimestampWrites).then(|| { + wgpu::RenderPassTimestampWrites { + query_set: &query.pool.query_set, + beginning_of_pass_write_index: Some(query.start_query_idx), + end_of_pass_write_index: Some(query.start_query_idx + 1), + } + }) + }) + } -#[cfg(debug_assertions)] -impl Drop for GpuTimerScope { - fn drop(&mut self) { - debug_assert!( - self.was_closed, - "Dropped GpuTimerScope without calling `GpuProfiler::end_scope` on it!" - ); + /// Use the reserved query for compute pass timestamp writes if any. + /// + /// Use this only for a single render/compute pass, otherwise results will be overwritten. + /// Only ever returns `Some` for queries that were created using [`GpuProfiler::begin_pass_query`]. + pub fn compute_pass_timestamp_writes(&self) -> Option { + self.timer_query_pair.as_ref().and_then(|query| { + (query.usage_state == QueryPairUsageState::ReservedForPassTimestampWrites).then(|| { + wgpu::ComputePassTimestampWrites { + query_set: &query.pool.query_set, + beginning_of_pass_write_index: Some(query.start_query_idx), + end_of_pass_write_index: Some(query.start_query_idx + 1), + } + }) + }) + } + + /// Makes this scope a child of the passed scope. + #[inline] + pub fn with_parent(self, parent: Option<&GpuProfilerQuery>) -> Self { + Self { + parent_handle: parent.map_or(ROOT_QUERY_HANDLE, |p| p.handle), + ..self + } } } + /// Settings passed on initialization of [`GpuProfiler`]. #[derive(Debug, Clone)] pub struct GpuProfilerSettings { - /// Enables/disables the profiler. + /// Enables/disables gpu timer queries. /// /// If false, the profiler will not emit any timer queries, making most operations on [`GpuProfiler`] no-ops. /// /// Since all resource creation is done lazily, this provides an effective way of disabling the profiler at runtime /// without the need of special build configurations or code to handle enabled/disabled profiling. - pub enable_timer_scopes: bool, + pub enable_timer_queries: bool, /// Enables/disables debug markers for all scopes on the respective encoder or pass. /// /// This is useful for debugging with tools like RenderDoc. /// Debug markers will be emitted even if the device does not support timer queries or disables them via - /// [`GpuProfilerSettings::enable_timer_scopes`]. + /// [`GpuProfilerSettings::enable_timer_queries`]. pub enable_debug_groups: bool, /// The profiler queues up to `max_num_pending_frames` "profiler-frames" at a time. @@ -226,7 +258,7 @@ pub struct GpuProfilerSettings { impl Default for GpuProfilerSettings { fn default() -> Self { Self { - enable_timer_scopes: true, + enable_timer_queries: true, enable_debug_groups: true, max_num_pending_frames: 3, } @@ -247,14 +279,19 @@ impl GpuProfilerSettings { /// /// You can have an arbitrary number of independent profiler instances per application/adapter. /// Manages all the necessary [`wgpu::QuerySet`] and [`wgpu::Buffer`] behind the scenes. +/// +/// Any query creation method may allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. +/// +/// After the first call that passes [`wgpu::Device`], the same device must be used with all subsequent +/// calls to [`GpuProfiler`] and all passed references to wgpu objects must originate from that device. pub struct GpuProfiler { unused_pools: Vec, active_frame: ActiveFrame, pending_frames: Vec, - num_open_scopes: AtomicU32, - next_scope_handle: AtomicU32, + num_open_queries: AtomicU32, + next_query_handle: AtomicU32, size_for_new_query_pools: u32, @@ -288,12 +325,12 @@ impl GpuProfiler { pending_frames: Vec::with_capacity(settings.max_num_pending_frames), active_frame: ActiveFrame { query_pools: RwLock::new(PendingFramePools::default()), - closed_scope_sender, - closed_scope_receiver: Mutex::new(closed_scope_receiver), + closed_query_sender: closed_scope_sender, + closed_query_receiver: Mutex::new(closed_scope_receiver), }, - num_open_scopes: AtomicU32::new(0), - next_scope_handle: AtomicU32::new(0), + num_open_queries: AtomicU32::new(0), + next_query_handle: AtomicU32::new(0), size_for_new_query_pools: QueryPool::MIN_CAPACITY, @@ -319,26 +356,22 @@ impl GpuProfiler { /// Changes the settings of an existing profiler. /// - /// This fails if there are open profiling scopes. - /// - /// If timer scopes are disabled (by setting [GpuProfilerSettings::enable_timer_scopes] to false), + /// If timer scopes are disabled by setting [GpuProfilerSettings::enable_timer_queries] to false, /// any timer queries that are in flight will still be processed, /// but unused query sets and buffers will be deallocated during [`Self::process_finished_frame`]. + /// Similarly, any opened debugging scope will still be closed if debug groups are disabled by setting + /// [GpuProfilerSettings::enable_debug_groups] to false. pub fn change_settings(&mut self, settings: GpuProfilerSettings) -> Result<(), SettingsError> { - if self.num_open_scopes.load(Ordering::Acquire) > 0 { - Err(SettingsError::HasOpenScopes) - } else { - settings.validate()?; - if !settings.enable_timer_scopes { - self.unused_pools.clear(); - } - self.settings = settings; - - Ok(()) + settings.validate()?; + if !settings.enable_timer_queries { + self.unused_pools.clear(); } + self.settings = settings; + + Ok(()) } - /// Starts a new profiler scope. + /// Starts a new auto-closing profiler scope. /// /// To nest scopes inside this scope, call [`Scope::scope`] on the returned scope. /// @@ -350,10 +383,6 @@ impl GpuProfiler { /// /// If [`GpuProfilerSettings::enable_debug_groups`] is true, a debug group will be pushed on the encoder or pass. /// - /// May allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. - /// After the first call, the same [`wgpu::Device`] must be used with all subsequent calls to [`GpuProfiler`] - /// (and all passed references to wgpu objects must originate from that device). - /// /// Scope is automatically closed on drop. #[must_use] #[track_caller] @@ -364,10 +393,15 @@ impl GpuProfiler { encoder_or_pass: &'a mut Recorder, device: &wgpu::Device, ) -> Scope<'a, Recorder> { - Scope::start(label, self, encoder_or_pass, device) + let scope = self.begin_query(label, encoder_or_pass, device); + Scope { + profiler: self, + recorder: encoder_or_pass, + scope: Some(scope), + } } - /// Starts a new profiler scope that takes ownership of the passed encoder or rendering/compute pass. + /// Starts a new auto-closing profiler scope that takes ownership of the passed encoder or rendering/compute pass. /// /// To nest scopes inside this scope, call [`OwningScope::scope`] on the returned scope. /// @@ -379,10 +413,6 @@ impl GpuProfiler { /// /// If [`GpuProfilerSettings::enable_debug_groups`] is true, a debug group will be pushed on the encoder or pass. /// - /// May allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. - /// After the first call, the same [`wgpu::Device`] must be used with all subsequent calls to [`GpuProfiler`] - /// (and all passed references to wgpu objects must originate from that device). - /// /// Scope is automatically closed on drop. #[must_use] #[track_caller] @@ -390,13 +420,25 @@ impl GpuProfiler { pub fn owning_scope<'a, Recorder: ProfilerCommandRecorder>( &'a self, label: impl Into, - encoder_or_pass: Recorder, + mut encoder_or_pass: Recorder, device: &wgpu::Device, ) -> OwningScope<'a, Recorder> { - OwningScope::start(label, self, encoder_or_pass, device) + let scope = self.begin_query(label, &mut encoder_or_pass, device); + OwningScope { + profiler: self, + recorder: encoder_or_pass, + scope: Some(scope), + } } - /// Starts a new debug & timer scope on a given encoder or rendering/compute pass if enabled that must be manually closed. + /// Starts a new **manually closed** profiler scope that takes ownership of the passed encoder or rendering/compute pass. + /// + /// Does NOT call [`GpuProfiler::end_query()`] on drop. + /// This construct is just for completeness in cases where working with scopes is preferred but one can't rely on the Drop call in the right place. + /// This is useful when the owned value needs to be recovered after the end of the scope. + /// In particular, to submit a [`wgpu::CommandEncoder`] to a queue, ownership of the encoder is necessary. + /// + /// To nest scopes inside this scope, call [`ManualOwningScope::scope`] on the returned scope. /// /// If an [`wgpu::CommandEncoder`] is passed but the [`wgpu::Device`] /// does not support [`wgpu::Features::TIMESTAMP_QUERY`], no gpu timer will be queried and the scope will @@ -404,118 +446,142 @@ impl GpuProfiler { /// If an [`wgpu::ComputePass`] or [`wgpu::RenderPass`] is passed but the [`wgpu::Device`] /// does not support [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`], no scope will be opened. /// - /// In any case, the returned scope *must* be closed by calling [`GpuProfiler::end_scope`]. - /// Dropping it without closing it will trigger a debug assertion. - /// To do this automatically, use `GpuProfiler::scope`/`GpuProfiler::owning_scope` instead. - /// /// If [`GpuProfilerSettings::enable_debug_groups`] is true, a debug group will be pushed on the encoder or pass. + #[must_use] + #[track_caller] + #[inline] + pub fn manual_owning_scope<'a, Recorder: ProfilerCommandRecorder>( + &'a self, + label: impl Into, + mut encoder_or_pass: Recorder, + device: &wgpu::Device, + ) -> ManualOwningScope<'a, Recorder> { + let scope = self.begin_query(label, &mut encoder_or_pass, device); + ManualOwningScope { + profiler: self, + recorder: encoder_or_pass, + scope: Some(scope), + } + } + + /// Starts a new profiler query on the given encoder or rendering/compute pass (if enabled). + /// + /// The returned query *must* be closed by calling [`GpuProfiler::end_query`] with the same encoder/pass, + /// even if timer queries are disabled. + /// To do this automatically, use [`GpuProfiler::scope`]/[`GpuProfiler::owning_scope`] instead. + /// + /// If an [`wgpu::CommandEncoder`] is passed but the [`wgpu::Device`] + /// does not support [`wgpu::Features::TIMESTAMP_QUERY`], no gpu timer will be queried and the scope will + /// not show up in the final results. + /// If an [`wgpu::ComputePass`] or [`wgpu::RenderPass`] is passed but the [`wgpu::Device`] + /// does not support [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`], no timer queries will be allocated. /// - /// May allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. - /// After the first call, the same [`wgpu::Device`] must be used with all subsequent calls to [`GpuProfiler`] - /// (and all passed references to wgpu objects must originate from that device). + /// If [`GpuProfilerSettings::enable_debug_groups`] is true, a debug group will be pushed on the encoder or pass. #[track_caller] #[must_use] - pub fn begin_scope( + pub fn begin_query( &self, label: impl Into, encoder_or_pass: &mut Recorder, device: &wgpu::Device, - parent_scope: Option<&GpuTimerScope>, - ) -> GpuTimerScope { - // Give opening/closing scopes acquire/release semantics: - // This way, we won't get any nasty surprises when observing zero open scopes. - self.num_open_scopes.fetch_add(1, Ordering::Acquire); - - let handle = self.next_scope_tree_handle(); - let label = label.into(); + ) -> GpuProfilerQuery { + let mut query = self.begin_query_internal(label.into(), encoder_or_pass, device); + if let Some(timer_query) = &mut query.timer_query_pair { + encoder_or_pass + .write_timestamp(&timer_query.pool.query_set, timer_query.start_query_idx); + timer_query.usage_state = QueryPairUsageState::OnlyStartWritten; + }; if self.settings.enable_debug_groups { - encoder_or_pass.push_debug_group(&label); + encoder_or_pass.push_debug_group(&query.label); + query.has_debug_group = true; } + query + } - let (query, _tracy_scope) = if self.settings.enable_timer_scopes - && timestamp_write_supported(encoder_or_pass, device.features()) - { - let query = self.reserve_query_pair(device); - encoder_or_pass.write_timestamp(&query.pool.query_set, query.begin_query_idx); - - #[cfg(feature = "tracy")] - let tracy_scope = { - let location = std::panic::Location::caller(); - self.tracy_context.as_ref().and_then(|c| { - c.span_alloc(&label, "", location.file(), location.line()) - .ok() - }) - }; - #[cfg(not(feature = "tracy"))] - let tracy_scope = Option::<()>::None; - - (Some(query), tracy_scope) - } else { - (None, None) - }; - - GpuTimerScope { - label, - pid: std::process::id(), - tid: std::thread::current().id(), - query, - handle, - parent_handle: parent_scope.map_or(ROOT_SCOPE_HANDLE, |s| s.handle), - #[cfg(feature = "tracy")] - tracy_scope: _tracy_scope, - #[cfg(debug_assertions)] - was_closed: false, + /// Starts a new profiler query to be used for render/compute pass timestamp writes. + /// + /// The returned query *must* be closed by calling [`GpuProfiler::end_query`], even if timer queries are disabled. + /// To do this automatically, use [`Scope::scoped_render_pass`]/[`Scope::scoped_compute_pass`] instead. + /// + /// Call [`GpuProfilerQuery::render_pass_timestamp_writes`] or [`GpuProfilerQuery::compute_pass_timestamp_writes`] + /// to acquire the corresponding `wgpu::RenderPassTimestampWrites`/`wgpu::ComputePassTimestampWrites` object. + /// + /// If the [`wgpu::Device`] does not support [`wgpu::Features::TIMESTAMP_QUERY`], no gpu timer will be reserved. + /// + /// Unlike [`GpuProfiler::begin_query`] this will not create a debug scope, + /// in order to not force passing of the same encoder/pass to [`GpuProfiler::end_query`]. + /// (this is needed to relax resource tracking requirements a bit, making it easier to implement the automatic scopes) + pub fn begin_pass_query( + &self, + label: impl Into, + encoder: &mut wgpu::CommandEncoder, + device: &wgpu::Device, + ) -> GpuProfilerQuery { + let mut query = self.begin_query_internal(label.into(), encoder, device); + if let Some(timer_query) = &mut query.timer_query_pair { + timer_query.usage_state = QueryPairUsageState::ReservedForPassTimestampWrites; } + query } - /// Ends passed scope. + /// Ends passed query. /// - /// Behavior is not defined if the last open scope was opened on a different encoder or pass than the one passed here. - /// - /// If the previous call to [`GpuProfiler::begin_scope`] did not open a timer scope because it was not supported or disabled, - /// this call will do nothing (except closing the currently open debug scope if enabled). - pub fn end_scope( + /// If the passed query was opened with [`GpuProfiler::begin_query`], the passed encoder or pass must be the same + /// as when the query was opened. + pub fn end_query( &self, encoder_or_pass: &mut Recorder, - mut scope: GpuTimerScope, + mut query: GpuProfilerQuery, ) { - #[cfg(debug_assertions)] - { - scope.was_closed = true; + if let Some(timer_query) = &mut query.timer_query_pair { + match timer_query.usage_state { + QueryPairUsageState::Reserved => { + unreachable!("Query pair has been reserved but isn't used for anything!") + } + QueryPairUsageState::ReservedForPassTimestampWrites => { + // No need to do a timestamp write, this is handled by wgpu. + } + QueryPairUsageState::OnlyStartWritten => { + encoder_or_pass.write_timestamp( + &timer_query.pool.query_set, + timer_query.start_query_idx + 1, + ); + timer_query.usage_state = QueryPairUsageState::BothStartAndEndWritten; + } + QueryPairUsageState::BothStartAndEndWritten => { + unreachable!("Query pair has already been used!") + } + } } - if let Some(query) = &scope.query { - encoder_or_pass.write_timestamp(&query.pool.query_set, query.begin_query_idx + 1); + #[cfg(feature = "tracy")] + if let Some(ref mut tracy_scope) = query.tracy_scope { + tracy_scope.end_zone(); + } - #[cfg(feature = "tracy")] - if let Some(ref mut tracy_scope) = scope.tracy_scope { - tracy_scope.end_zone(); - } + if query.has_debug_group { + encoder_or_pass.pop_debug_group(); } - let send_result = self.active_frame.closed_scope_sender.send(scope); + let send_result = self.active_frame.closed_query_sender.send(query); - // The only way we can fail sending the scope is if the receiver has been dropped. + // The only way we can fail sending the query is if the receiver has been dropped. // Since it sits on `active_frame` as well, there's no way for this to happen! debug_assert!(send_result.is_ok()); - if self.settings.enable_debug_groups { - encoder_or_pass.pop_debug_group(); - } - - // Count scopes even if we haven't processed this one, makes experiences more consistent + // Count queries even if we haven't processed this one, makes experiences more consistent // if there's a lack of support for some queries. - self.num_open_scopes.fetch_sub(1, Ordering::Release); + self.num_open_queries.fetch_sub(1, Ordering::Release); } /// Puts query resolve commands in the encoder for all unresolved, pending queries of the active profiler frame. /// /// Note that you do *not* need to do this for every encoder, it is sufficient do do this once per frame as long - /// as you submit the corresponding command buffer after all others that may have opened scopes in the same frame. - /// (It does not matter if the passed encoder itself has previously opened scopes or not.) + /// as you submit the corresponding command buffer after all others that may have opened queries in the same frame. + /// (It does not matter if the passed encoder itself has previously opened queries or not.) /// If you were to make this part of a command buffer that is enqueued before any other that has - /// opened scopes in the same profiling frame, no failure will occur but some timing results may be invalid. + /// opened queries in the same profiling frame, no failure will occur but some timing results may be invalid. /// /// It is advised to call this only once at the end of a profiling frame, but it is safe to do so several times. /// @@ -564,27 +630,27 @@ impl GpuProfiler { /// /// Needs to be called **after** submitting any encoder used in the current profiler frame. /// - /// Fails if there are still open scopes or unresolved queries. + /// Fails if there are still open queries or unresolved queries. pub fn end_frame(&mut self) -> Result<(), EndFrameError> { - let num_open_scopes = self.num_open_scopes.load(Ordering::Acquire); - if num_open_scopes != 0 { - return Err(EndFrameError::UnclosedScopes(num_open_scopes)); + let num_open_queries = self.num_open_queries.load(Ordering::Acquire); + if num_open_queries != 0 { + return Err(EndFrameError::UnclosedQueries(num_open_queries)); } let query_pools = self.active_frame.query_pools.get_mut(); let mut new_pending_frame = PendingFrame { query_pools: std::mem::take(&mut query_pools.used_pools), - closed_scope_by_parent_handle: HashMap::new(), + closed_query_by_parent_handle: HashMap::new(), mapped_buffers: Arc::new(AtomicU32::new(0)), }; - for scope in self.active_frame.closed_scope_receiver.get_mut().try_iter() { + for query in self.active_frame.closed_query_receiver.get_mut().try_iter() { new_pending_frame - .closed_scope_by_parent_handle - .entry(scope.parent_handle) + .closed_query_by_parent_handle + .entry(query.parent_handle) .or_default() - .push(scope); + .push(query); } // All loads of pool.num_used_queries are Relaxed since we assume, @@ -621,8 +687,8 @@ impl GpuProfiler { // Dropping the oldest frame could get us into an endless cycle where we're never able to complete // any pending frames as the ones closest to completion would be evicted. if let Some(dropped_frame) = self.pending_frames.pop() { - // Drop scopes first since they still have references to the query pools that we want to reuse. - drop(dropped_frame.closed_scope_by_parent_handle); + // Drop queries first since they still have references to the query pools that we want to reuse. + drop(dropped_frame.closed_query_by_parent_handle); // Mark the frame as dropped. We'll give back the query pools once the mapping is done. // Any previously issued map_async call that haven't finished yet, will invoke their callback with mapping abort. @@ -667,7 +733,7 @@ impl GpuProfiler { pub fn process_finished_frame( &mut self, timestamp_period: f32, - ) -> Option> { + ) -> Option> { let frame = self.pending_frames.first_mut()?; // We only process if all mappings succeed. @@ -686,8 +752,8 @@ impl GpuProfiler { Self::process_timings_recursive( timestamp_to_sec, - &mut frame.closed_scope_by_parent_handle, - ROOT_SCOPE_HANDLE, + &mut frame.closed_query_by_parent_handle, + ROOT_QUERY_HANDLE, ) }; @@ -718,13 +784,13 @@ fn timestamp_write_supported( } impl GpuProfiler { - fn next_scope_tree_handle(&self) -> GpuTimerScopeTreeHandle { + fn next_scope_tree_handle(&self) -> GpuTimerQueryTreeHandle { // Relaxed is fine, we just want a number that nobody uses this frame already. - let mut handle = self.next_scope_handle.fetch_add(1, Ordering::Relaxed); + let mut handle = self.next_query_handle.fetch_add(1, Ordering::Relaxed); // We don't ever expect to run out of handles during a single frame, but who knows how long the app runs. - while handle == ROOT_SCOPE_HANDLE { - handle = self.next_scope_handle.fetch_add(1, Ordering::Relaxed); + while handle == ROOT_QUERY_HANDLE { + handle = self.next_query_handle.fetch_add(1, Ordering::Relaxed); } handle @@ -740,8 +806,8 @@ impl GpuProfiler { // If a pool was less than half of the size of the max frame, then we don't keep it. // This way we're going to need less pools in upcoming frames and thus have less overhead in the long run. - // If timer scopes were disabled, we also don't keep any pools. - if self.settings.enable_timer_scopes && pool.capacity >= capacity_threshold { + // If timer queries were disabled, we also don't keep any pools. + if self.settings.enable_timer_queries && pool.capacity >= capacity_threshold { self.active_frame .query_pools .get_mut() @@ -751,7 +817,7 @@ impl GpuProfiler { } } - fn try_reserve_query_pair(pool: &Arc) -> Option { + fn try_reserve_query_pair(pool: &Arc) -> Option { let mut num_used_queries = pool.num_used_queries.load(Ordering::Relaxed); loop { @@ -773,9 +839,10 @@ impl GpuProfiler { ) { Ok(_) => { // We successfully acquired two queries! - return Some(ReservedQueryPair { + return Some(ReservedTimerQueryPair { pool: pool.clone(), - begin_query_idx: num_used_queries, + start_query_idx: num_used_queries, + usage_state: QueryPairUsageState::Reserved, }); } Err(updated) => { @@ -788,7 +855,7 @@ impl GpuProfiler { // Reserves two query objects. // Our query pools always have an even number of queries, so we know the next query is the next in the same pool. - fn reserve_query_pair(&self, device: &wgpu::Device) -> ReservedQueryPair { + fn reserve_query_pair(&self, device: &wgpu::Device) -> ReservedTimerQueryPair { // First, try to allocate from current top pool. // Requires taking a read lock on the current query pool. { @@ -842,21 +909,69 @@ impl GpuProfiler { } } + #[track_caller] + #[must_use] + fn begin_query_internal( + &self, + label: String, + encoder_or_pass: &mut Recorder, + device: &wgpu::Device, + ) -> GpuProfilerQuery { + // Give opening/closing queries acquire/release semantics: + // This way, we won't get any nasty surprises when observing zero open queries. + self.num_open_queries.fetch_add(1, Ordering::Acquire); + + let query = if self.settings.enable_timer_queries + && timestamp_write_supported(encoder_or_pass, device.features()) + { + Some(self.reserve_query_pair(device)) + } else { + None + }; + + let _tracy_scope = if self.settings.enable_timer_queries { + #[cfg(feature = "tracy")] + { + let location = std::panic::Location::caller(); + self.tracy_context.as_ref().and_then(|c| { + c.span_alloc(&label, "", location.file(), location.line()) + .ok() + }) + } + #[cfg(not(feature = "tracy"))] + Option::<()>::None + } else { + None + }; + + GpuProfilerQuery { + label, + pid: std::process::id(), + tid: std::thread::current().id(), + timer_query_pair: query, + handle: self.next_scope_tree_handle(), + parent_handle: ROOT_QUERY_HANDLE, + has_debug_group: false, + #[cfg(feature = "tracy")] + tracy_scope: _tracy_scope, + } + } + fn process_timings_recursive( timestamp_to_sec: f64, - closed_scope_by_parent_handle: &mut HashMap>, - parent_handle: GpuTimerScopeTreeHandle, - ) -> Vec { - let Some(scopes_with_same_parent) = closed_scope_by_parent_handle.remove(&parent_handle) + closed_scope_by_parent_handle: &mut HashMap>, + parent_handle: GpuTimerQueryTreeHandle, + ) -> Vec { + let Some(queries_with_same_parent) = closed_scope_by_parent_handle.remove(&parent_handle) else { return Vec::new(); }; - scopes_with_same_parent + queries_with_same_parent .into_iter() .filter_map(|mut scope| { - let Some(query) = scope.query.take() else { - // Inactive scopes don't have any results or nested scopes with results. + let Some(query) = scope.timer_query_pair.take() else { + // Inactive queries don't have any results or nested queries with results. // Currently, we drop them from the results completely. // In the future we could still make them show up since they convey information like label & pid/tid. return None; @@ -864,7 +979,7 @@ impl GpuProfiler { // Read timestamp from buffer. // By design timestamps for start/end are consecutive. - let offset = (query.begin_query_idx * QUERY_SIZE) as u64; + let offset = (query.start_query_idx * QUERY_SIZE) as u64; let buffer_slice = &query .pool .read_buffer @@ -883,17 +998,17 @@ impl GpuProfiler { tracy_scope.upload_timestamp(start_raw as i64, end_raw as i64); } - let nested_scopes = Self::process_timings_recursive( + let nested_queries = Self::process_timings_recursive( timestamp_to_sec, closed_scope_by_parent_handle, scope.handle, ); - Some(GpuTimerScopeResult { + Some(GpuTimerQueryResult { label: std::mem::take(&mut scope.label), time: (start_raw as f64 * timestamp_to_sec) ..(end_raw as f64 * timestamp_to_sec), - nested_scopes, + nested_queries, pid: scope.pid, tid: scope.tid, }) @@ -902,7 +1017,22 @@ impl GpuProfiler { } } -struct ReservedQueryPair { +#[derive(PartialEq, Eq)] +enum QueryPairUsageState { + /// Transitional state used upon creation. + Reserved, + + /// Don't do manual timestamp writes, wgpu is expected to do them for us. + ReservedForPassTimestampWrites, + + /// Start query has been used, end query is still available. + OnlyStartWritten, + + /// Both start & end query have been used. + BothStartAndEndWritten, +} + +struct ReservedTimerQueryPair { /// QueryPool on which both start & end queries of the scope are done. /// /// By putting an arc here instead of an index into a vec, we don't need @@ -911,7 +1041,10 @@ struct ReservedQueryPair { /// Query index at which the scope begins. /// The query after this is reserved for the end of the scope. - begin_query_idx: u32, + start_query_idx: u32, + + /// Current use of the query pair. + usage_state: QueryPairUsageState, } /// A pool of queries, consisting of a single queryset & buffer for query results. @@ -968,36 +1101,36 @@ impl QueryPool { #[derive(Default)] struct PendingFramePools { /// List of all pools used in this frame. - /// The last pool is the one new profiling scopes will try to make timer queries into. + /// The last pool is the one new profiling queries will try to make timer queries into. used_pools: Vec>, /// List of unused pools recycled from previous frames. unused_pools: Vec, } -/// Internal handle to building a tree of profiling scopes. -type GpuTimerScopeTreeHandle = u32; +/// Internal handle to building a tree of profiling queries. +type GpuTimerQueryTreeHandle = u32; /// Handle for the root scope. -const ROOT_SCOPE_HANDLE: GpuTimerScopeTreeHandle = std::u32::MAX; +const ROOT_QUERY_HANDLE: GpuTimerQueryTreeHandle = std::u32::MAX; struct ActiveFrame { query_pools: RwLock, - /// Closed scopes get send to this channel. + /// Closed queries get send to this channel. /// /// Note that channel is still overkill for what we want here: /// We're in a multi producer situation, *but* the single consumer is known to be only /// active in a mut context, i.e. while we're consuming we know that we're not producing. /// We have to wrap it in a Mutex because the channel is not Sync, but we actually never lock it /// since we only ever access it in a `mut` context. - closed_scope_sender: std::sync::mpsc::Sender, - closed_scope_receiver: Mutex>, + closed_query_sender: std::sync::mpsc::Sender, + closed_query_receiver: Mutex>, } struct PendingFrame { query_pools: Vec>, - closed_scope_by_parent_handle: HashMap>, + closed_query_by_parent_handle: HashMap>, /// Keeps track of the number of buffers in the query pool that have been mapped successfully. mapped_buffers: std::sync::Arc, diff --git a/src/scope.rs b/src/scope.rs index f74ab30..734db26 100644 --- a/src/scope.rs +++ b/src/scope.rs @@ -1,404 +1,182 @@ //! Scope types that wrap a `wgpu` encoder/pass and start a scope on creation. In most cases, they //! then allow automatically ending the scope on drop. -use crate::{GpuProfiler, GpuTimerScope, ProfilerCommandRecorder}; +use crate::{GpuProfiler, GpuProfilerQuery, ProfilerCommandRecorder}; /// Scope that takes a (mutable) reference to the encoder/pass. /// -/// Calls [`GpuProfiler::end_scope()`] on drop. +/// Calls [`GpuProfiler::end_query()`] on drop. pub struct Scope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: &'a mut Recorder, - pub scope: Option, + pub scope: Option, +} + +impl<'a, R: ProfilerCommandRecorder> Drop for Scope<'a, R> { + #[inline] + fn drop(&mut self) { + if let Some(scope) = self.scope.take() { + self.profiler.end_query(self.recorder, scope); + } + } } /// Scope that takes ownership of the encoder/pass. /// -/// Calls [`GpuProfiler::end_scope()`] on drop. +/// Calls [`GpuProfiler::end_query()`] on drop. pub struct OwningScope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: Recorder, - pub scope: Option, + pub scope: Option, +} + +impl<'a, R: ProfilerCommandRecorder> Drop for OwningScope<'a, R> { + #[inline] + fn drop(&mut self) { + if let Some(scope) = self.scope.take() { + self.profiler.end_query(&mut self.recorder, scope); + } + } } /// Scope that takes ownership of the encoder/pass. /// -/// Does NOT call [`GpuProfiler::end_scope()`] on drop. +/// Does NOT call [`GpuProfiler::end_query()`] on drop. /// This construct is just for completeness in cases where working with scopes is preferred but one can't rely on the Drop call in the right place. /// This is useful when the owned value needs to be recovered after the end of the scope. /// In particular, to submit a [`wgpu::CommandEncoder`] to a queue, ownership of the encoder is necessary. pub struct ManualOwningScope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: Recorder, - pub scope: Option, + pub scope: Option, } -impl<'a, W: ProfilerCommandRecorder> Scope<'a, W> { - /// Starts a new profiler scope without nesting. - #[must_use] - #[track_caller] - #[inline] - pub fn start( - label: impl Into, - profiler: &'a GpuProfiler, - recorder: &'a mut W, - device: &wgpu::Device, - ) -> Self { - let scope = profiler.begin_scope(label, recorder, device, None); - Self { - profiler, - recorder, - scope: Some(scope), - } - } - - /// Starts a new profiler scope nested in another scope. - #[must_use] - #[track_caller] - #[inline] - pub fn start_nested( - label: impl Into, - profiler: &'a GpuProfiler, - recorder: &'a mut W, - device: &wgpu::Device, - parent: Option<&GpuTimerScope>, - ) -> Self { - let scope = profiler.begin_scope(label, recorder, device, parent); - Self { - profiler, - recorder, - scope: Some(scope), - } - } - - /// Starts a new profiler scope nested within this one. - #[must_use] +impl<'a, R: ProfilerCommandRecorder> ManualOwningScope<'a, R> { + /// Ends the scope allowing the extraction of the owned [`ProfilerCommandRecorder`]. #[track_caller] #[inline] - pub fn scope(&mut self, label: impl Into, device: &wgpu::Device) -> Scope<'_, W> { - Scope::start_nested( - label, - self.profiler, - self.recorder, - device, - self.scope.as_ref(), - ) + pub fn end_query(mut self) -> R { + // Can't fail since creation implies begin_query. + self.profiler + .end_query(&mut self.recorder, self.scope.take().unwrap()); + self.recorder } } -impl<'a, W: ProfilerCommandRecorder> OwningScope<'a, W> { - /// Starts a new profiler scope without nesting. - #[must_use] - #[track_caller] - #[inline] - pub fn start( - label: impl Into, - profiler: &'a GpuProfiler, - mut recorder: W, - device: &wgpu::Device, - ) -> Self { - let scope = profiler.begin_scope(label, &mut recorder, device, None); - Self { - profiler, - recorder, - scope: Some(scope), +/// Most implementation code of the different scope types is exactly the same. +/// +/// This macro allows to avoid code duplication. +/// Another way of achieving this are extension traits, but this would mean that a user has to +/// import the extension trait to use all methods of the scope types which I found a bit annoying. +macro_rules! impl_scope_ext { + ($scope:ident, $recorder_type:ty) => { + impl<'a, R: ProfilerCommandRecorder> $scope<'a, R> { + /// Starts a new profiler scope nested within this one. + #[must_use] + #[track_caller] + #[inline] + pub fn scope( + &mut self, + label: impl Into, + device: &wgpu::Device, + ) -> Scope<'_, R> { + let recorder: &mut R = &mut self.recorder; + let scope = self + .profiler + .begin_query(label, recorder, device) + .with_parent(self.scope.as_ref()); + Scope { + profiler: self.profiler, + recorder, + scope: Some(scope), + } + } } - } - /// Starts a new profiler scope nested in another scope. - #[must_use] - #[track_caller] - #[inline] - pub fn start_nested( - label: impl Into, - profiler: &'a GpuProfiler, - mut recorder: W, - device: &wgpu::Device, - parent: Option<&GpuTimerScope>, - ) -> Self { - let scope = profiler.begin_scope(label, &mut recorder, device, parent); - Self { - profiler, - recorder, - scope: Some(scope), + impl<'a> $scope<'a, wgpu::CommandEncoder> { + /// Start a render pass wrapped in a [`OwningScope`]. + /// + /// Ignores passed `wgpu::RenderPassDescriptor::timestamp_writes` and replaces it with + /// `timestamp_writes` managed by `GpuProfiler`. + /// + /// Note that in order to take measurements, this does not require the + /// [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`] feature, only [`wgpu::Features::TIMESTAMP_QUERY`]. + #[track_caller] + pub fn scoped_render_pass<'b>( + &'b mut self, + label: impl Into, + device: &wgpu::Device, + pass_descriptor: wgpu::RenderPassDescriptor<'b, '_>, + ) -> OwningScope<'b, wgpu::RenderPass<'b>> { + let child_scope = self + .profiler + .begin_pass_query(label, &mut self.recorder, device) + .with_parent(self.scope.as_ref()); + let render_pass = self + .recorder + .begin_render_pass(&wgpu::RenderPassDescriptor { + timestamp_writes: child_scope.render_pass_timestamp_writes(), + ..pass_descriptor + }); + + OwningScope { + profiler: self.profiler, + recorder: render_pass, + scope: Some(child_scope), + } + } + + /// Start a compute pass wrapped in a [`OwningScope`]. + /// + /// Uses passed label both for profiler scope and compute pass label. + /// `timestamp_writes` managed by `GpuProfiler`. + /// + /// Note that in order to take measurements, this does not require the + /// [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`] feature, only [`wgpu::Features::TIMESTAMP_QUERY`]. + #[track_caller] + pub fn scoped_compute_pass<'b>( + &'b mut self, + label: impl Into, + device: &wgpu::Device, + ) -> OwningScope<'b, wgpu::ComputePass<'b>> { + let child_scope = self + .profiler + .begin_pass_query(label, &mut self.recorder, device) + .with_parent(self.scope.as_ref()); + + let render_pass = self + .recorder + .begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some(&child_scope.label), + timestamp_writes: child_scope.compute_pass_timestamp_writes(), + }); + + OwningScope { + profiler: self.profiler, + recorder: render_pass, + scope: Some(child_scope), + } + } } - } - /// Starts a new profiler scope nested within this one. - #[must_use] - #[track_caller] - #[inline] - pub fn scope(&mut self, label: impl Into, device: &wgpu::Device) -> Scope<'_, W> { - Scope::start_nested( - label, - self.profiler, - &mut self.recorder, - device, - self.scope.as_ref(), - ) - } -} + impl<'a, R: ProfilerCommandRecorder> std::ops::Deref for $scope<'a, R> { + type Target = R; -impl<'a, W: ProfilerCommandRecorder> ManualOwningScope<'a, W> { - /// Starts a new profiler scope. - /// - /// Scope is NOT closed on drop and needs to be closed manually with [`ManualOwningScope::end_scope`] - #[must_use] - #[track_caller] - #[inline] - pub fn start( - label: impl Into, - profiler: &'a GpuProfiler, - mut recorder: W, - device: &wgpu::Device, - ) -> Self { - let scope = profiler.begin_scope(label, &mut recorder, device, None); - Self { - profiler, - recorder, - scope: Some(scope), + #[inline] + fn deref(&self) -> &Self::Target { + &self.recorder + } } - } - /// Starts a new profiler scope nested in another one. - /// - /// Scope is NOT closed on drop and needs to be closed manually with [`ManualOwningScope::end_scope`] - #[must_use] - #[track_caller] - #[inline] - pub fn start_nested( - label: impl Into, - profiler: &'a GpuProfiler, - mut recorder: W, - device: &wgpu::Device, - parent: Option<&GpuTimerScope>, - ) -> Self { - let scope = profiler.begin_scope(label, &mut recorder, device, parent); - Self { - profiler, - recorder, - scope: Some(scope), + impl<'a, R: ProfilerCommandRecorder> std::ops::DerefMut for $scope<'a, R> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.recorder + } } - } - - /// Starts a new profiler scope nested within this one. - /// - /// Scope is NOT closed on drop and needs to be closed manually with [`ManualOwningScope::end_scope`] - #[must_use] - #[track_caller] - #[inline] - pub fn scope(&mut self, label: impl Into, device: &wgpu::Device) -> Scope<'_, W> { - Scope::start(label, self.profiler, &mut self.recorder, device) - } - - /// Ends the scope allowing the extraction of the owned [`ProfilerCommandRecorder`]. - #[track_caller] - #[inline] - pub fn end_scope(mut self) -> W { - // Can't fail since creation implies begin_scope. - self.profiler - .end_scope(&mut self.recorder, self.scope.take().unwrap()); - self.recorder - } -} - -impl<'a> Scope<'a, wgpu::CommandEncoder> { - /// Start a render pass wrapped in a [`OwningScope`]. - /// - /// TODO(#51): Use `RenderPassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_render_pass<'b>( - &'b mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::RenderPassDescriptor<'b, '_>, - ) -> OwningScope<'b, wgpu::RenderPass<'b>> { - let render_pass = self.recorder.begin_render_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - render_pass, - device, - self.scope.as_ref(), - ) - } - - /// Start a compute pass wrapped in a [`OwningScope`]. - /// - /// TODO(#51): Use `ComputePassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_compute_pass( - &mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::ComputePassDescriptor<'_>, - ) -> OwningScope { - let compute_pass = self.recorder.begin_compute_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - compute_pass, - device, - self.scope.as_ref(), - ) - } -} - -impl<'a> OwningScope<'a, wgpu::CommandEncoder> { - /// Start a render pass wrapped in an [`OwningScope`]. - /// - /// TODO(#51): Use `RenderPassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_render_pass<'b>( - &'b mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::RenderPassDescriptor<'b, '_>, - ) -> OwningScope<'b, wgpu::RenderPass<'b>> { - let render_pass = self.recorder.begin_render_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - render_pass, - device, - self.scope.as_ref(), - ) - } - - /// Start a compute pass wrapped in a [`OwningScope`]. - /// - /// TODO(#51): Use `ComputePassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_compute_pass( - &mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::ComputePassDescriptor<'_>, - ) -> OwningScope { - let compute_pass = self.recorder.begin_compute_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - compute_pass, - device, - self.scope.as_ref(), - ) - } -} - -impl<'a> ManualOwningScope<'a, wgpu::CommandEncoder> { - /// Start a render pass wrapped in an [`OwningScope`]. - /// - /// TODO(#51): Use `RenderPassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_render_pass<'b>( - &'b mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::RenderPassDescriptor<'b, '_>, - ) -> OwningScope<'b, wgpu::RenderPass<'b>> { - let render_pass = self.recorder.begin_render_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - render_pass, - device, - self.scope.as_ref(), - ) - } - - /// Start a compute pass wrapped in an [`OwningScope`]. - /// - /// TODO(#51): Use `ComputePassDescriptor::timestamp_writes` - #[track_caller] - #[inline] - pub fn scoped_compute_pass( - &mut self, - label: impl Into, - device: &wgpu::Device, - pass_descriptor: &wgpu::ComputePassDescriptor<'_>, - ) -> OwningScope { - let compute_pass = self.recorder.begin_compute_pass(pass_descriptor); - OwningScope::start_nested( - label, - self.profiler, - compute_pass, - device, - self.scope.as_ref(), - ) - } -} - -// Scope -impl<'a, W: ProfilerCommandRecorder> std::ops::Deref for Scope<'a, W> { - type Target = W; - - #[inline] - fn deref(&self) -> &Self::Target { - self.recorder - } -} - -impl<'a, W: ProfilerCommandRecorder> std::ops::DerefMut for Scope<'a, W> { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - self.recorder - } -} - -impl<'a, W: ProfilerCommandRecorder> Drop for Scope<'a, W> { - #[inline] - fn drop(&mut self) { - // Creation implies begin_scope, so this can't fail. - self.profiler - .end_scope(self.recorder, self.scope.take().unwrap()); - } -} - -// OwningScope -impl<'a, W: ProfilerCommandRecorder> std::ops::Deref for OwningScope<'a, W> { - type Target = W; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.recorder - } -} - -impl<'a, W: ProfilerCommandRecorder> std::ops::DerefMut for OwningScope<'a, W> { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.recorder - } -} - -impl<'a, W: ProfilerCommandRecorder> Drop for OwningScope<'a, W> { - #[inline] - fn drop(&mut self) { - // Creation implies begin_scope, so this can't fail. - self.profiler - .end_scope(&mut self.recorder, self.scope.take().unwrap()); - } + }; } -// ManualOwningScope -impl<'a, W: ProfilerCommandRecorder> std::ops::Deref for ManualOwningScope<'a, W> { - type Target = W; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.recorder - } -} - -impl<'a, W: ProfilerCommandRecorder> std::ops::DerefMut for ManualOwningScope<'a, W> { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.recorder - } -} +impl_scope_ext!(Scope, &'a mut R); +impl_scope_ext!(OwningScope, R); +impl_scope_ext!(ManualOwningScope, R); diff --git a/tests/src/errors.rs b/tests/src/errors.rs index a27b762..372754c 100644 --- a/tests/src/errors.rs +++ b/tests/src/errors.rs @@ -17,40 +17,40 @@ fn invalid_pending_frame_count() { } #[test] -fn end_frame_unclosed_scope() { +fn end_frame_unclosed_query() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); - let unclosed_scope = { + let unclosed_query = { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device, None); + let query = profiler.begin_query("open query", &mut encoder, &device); profiler.resolve_queries(&mut encoder); - scope + query }; assert_eq!( profiler.end_frame(), - Err(wgpu_profiler::EndFrameError::UnclosedScopes(1)) + Err(wgpu_profiler::EndFrameError::UnclosedQueries(1)) ); // Make sure we can recover from this. { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - profiler.end_scope(&mut encoder, unclosed_scope); + profiler.end_query(&mut encoder, unclosed_query); profiler.resolve_queries(&mut encoder); } assert_eq!(profiler.end_frame(), Ok(())); } #[test] -fn end_frame_unresolved_scope() { +fn end_frame_unresolved_query() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device, None); - profiler.end_scope(&mut encoder, scope); + let query = profiler.begin_query("open query", &mut encoder, &device); + profiler.end_query(&mut encoder, query); } assert_eq!( @@ -69,18 +69,18 @@ fn end_frame_unresolved_scope() { } #[test] -fn change_settings_while_scope_open() { +fn change_settings_while_query_open() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device, None); + let query = profiler.begin_query("open query", &mut encoder, &device); assert_eq!( profiler.change_settings(GpuProfilerSettings::default()), - Err(wgpu_profiler::SettingsError::HasOpenScopes) + Ok(()) ); - profiler.end_scope(&mut encoder, scope); + profiler.end_query(&mut encoder, query); } diff --git a/tests/src/mod.rs b/tests/src/mod.rs index 0430955..f128bde 100644 --- a/tests/src/mod.rs +++ b/tests/src/mod.rs @@ -53,7 +53,7 @@ fn expected_scope( fn validate_results( features: wgpu::Features, - result: &[wgpu_profiler::GpuTimerScopeResult], + result: &[wgpu_profiler::GpuTimerQueryResult], expected: &[ExpectedScope], ) { let expected = expected @@ -73,13 +73,13 @@ fn validate_results( ); for (result, expected) in result.iter().zip(expected.iter()) { assert_eq!(result.label, expected.0); - validate_results(features, &result.nested_scopes, &expected.2); + validate_results(features, &result.nested_queries, &expected.2); } } fn validate_results_unordered( features: wgpu::Features, - result: &[wgpu_profiler::GpuTimerScopeResult], + result: &[wgpu_profiler::GpuTimerQueryResult], expected: &[ExpectedScope], ) { let expected = expected @@ -104,6 +104,6 @@ fn validate_results_unordered( for (result, expected) in result.iter().zip(expected.iter()) { assert!(expected_labels.remove(&result.label)); - validate_results(features, &result.nested_scopes, &expected.2); + validate_results(features, &result.nested_queries, &expected.2); } } diff --git a/tests/src/nested_scopes.rs b/tests/src/nested_scopes.rs index bf2b6b2..491a1d6 100644 --- a/tests/src/nested_scopes.rs +++ b/tests/src/nested_scopes.rs @@ -14,17 +14,9 @@ fn nested_scopes(device: &wgpu::Device, queue: &wgpu::Queue) { { let mut outer_scope = profiler.scope("e0_s0", &mut encoder0, device); { - drop(outer_scope.scoped_compute_pass( - "e0_s0_c0", - device, - &wgpu::ComputePassDescriptor::default(), - )); + drop(outer_scope.scoped_compute_pass("e0_s0_c0", device)); { - let mut inner_scope = outer_scope.scoped_compute_pass( - "e0_s0_c1", - device, - &wgpu::ComputePassDescriptor::default(), - ); + let mut inner_scope = outer_scope.scoped_compute_pass("e0_s0_c1", device); { drop(inner_scope.scope("e0_s0_c1_s0", device)); let mut innermost_scope = inner_scope.scope("e0_s0_c1_s1", device); @@ -53,17 +45,13 @@ fn nested_scopes(device: &wgpu::Device, queue: &wgpu::Queue) { // Another scope, but with the profiler disabled which should be possible on the fly. profiler .change_settings(GpuProfilerSettings { - enable_timer_scopes: false, + enable_timer_queries: false, ..Default::default() }) .unwrap(); let mut scope = profiler.scope("e2_s1", &mut encoder0, device); { - let mut scope = scope.scoped_compute_pass( - "e2_s1_c1", - device, - &wgpu::ComputePassDescriptor::default(), - ); + let mut scope = scope.scoped_compute_pass("e2_s1_c1", device); drop(scope.scope("e2_s1_c1_s0", device)); } } @@ -91,10 +79,10 @@ fn nested_scopes(device: &wgpu::Device, queue: &wgpu::Queue) { "e0_s0", Requires::Timestamps, [ - expected_scope("e0_s0_c0", Requires::TimestampsInPasses, []), + expected_scope("e0_s0_c0", Requires::Timestamps, []), expected_scope( "e0_s0_c1", - Requires::TimestampsInPasses, + Requires::Timestamps, [ expected_scope("e0_s0_c1_s0", Requires::TimestampsInPasses, []), expected_scope(