diff --git a/examples/demo.rs b/examples/demo.rs index 0b5bf9f..5836474 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -1,12 +1,12 @@ use std::borrow::Cow; -use wgpu_profiler::{GpuProfiler, GpuProfilerSettings, GpuTimerScopeResult}; +use wgpu_profiler::{GpuProfiler, GpuProfilerSettings, GpuTimerQueryResult}; use winit::{ event::{Event, VirtualKeyCode, WindowEvent}, event_loop::{ControlFlow, EventLoop}, window::Window, }; -fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32) { +fn scopes_to_console_recursive(results: &[GpuTimerQueryResult], indentation: u32) { for scope in results { if indentation > 0 { print!("{:>, enabled_features: wgpu::Features) { +fn console_output(results: &Option>, enabled_features: wgpu::Features) { profiling::scope!("console_output"); print!("\x1B[2J\x1B[1;1H"); // Clear terminal and put cursor to first row first column println!("Welcome to wgpu_profiler demo!"); @@ -283,7 +283,7 @@ fn draw( // It's also possible to take timings by hand, manually calling `begin_scope` and `end_scope`. // This is generally not recommended as it's very easy to mess up by accident :) let pass_scope = profiler - .begin_pass_scope("render pass bottom", scope.recorder, device) + .begin_pass_query("render pass bottom", scope.recorder, device) .with_parent(scope.scope.as_ref()); let mut rpass = scope .recorder @@ -307,13 +307,13 @@ fn draw( // Similarly, you can manually manage nested scopes within a render pass. // Again, to do any actual timing, you need to enable wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES. { - let scope = profiler - .begin_scope("fractal 2", &mut rpass, device) + let query = profiler + .begin_query("fractal 2", &mut rpass, device) .with_parent(Some(&pass_scope)); rpass.draw(0..6, 2..3); - // Don't forget to end the scope! - profiler.end_scope(&mut rpass, scope); + // Don't forget to end the query! + profiler.end_query(&mut rpass, query); } // TODO: // Another variant is to use `ManualOwningScope`, forming a middle ground between no scope helpers and fully automatic scope closing. @@ -333,7 +333,7 @@ fn draw( // }; // Don't forget to end the scope. - profiler.end_scope(&mut rpass, pass_scope); + profiler.end_query(&mut rpass, pass_scope); } } diff --git a/src/chrometrace.rs b/src/chrometrace.rs index f8db45f..82e38c0 100644 --- a/src/chrometrace.rs +++ b/src/chrometrace.rs @@ -1,11 +1,11 @@ use std::{fs::File, io::Write, path::Path}; -use crate::GpuTimerScopeResult; +use crate::GpuTimerQueryResult; /// Writes a .json trace file that can be viewed as a flame graph in Chrome or Edge via pub fn write_chrometrace( target: &Path, - profile_data: &[GpuTimerScopeResult], + profile_data: &[GpuTimerQueryResult], ) -> std::io::Result<()> { let mut file = File::create(target)?; @@ -27,7 +27,7 @@ pub fn write_chrometrace( fn write_results_recursive( file: &mut File, - result: &GpuTimerScopeResult, + result: &GpuTimerQueryResult, last: bool, ) -> std::io::Result<()> { // note: ThreadIds are under the control of Rust’s standard library @@ -52,24 +52,24 @@ fn write_results_recursive( result.time.start * 1000.0 * 1000.0, (result.time.end - result.time.start) * 1000.0 * 1000.0, result.label, - if last && result.nested_scopes.is_empty() { + if last && result.nested_queries.is_empty() { "\n" } else { ",\n" } )?; - if result.nested_scopes.is_empty() { + if result.nested_queries.is_empty() { return Ok(()); } for child in result - .nested_scopes + .nested_queries .iter() - .take(result.nested_scopes.len() - 1) + .take(result.nested_queries.len() - 1) { write_results_recursive(file, child, false)?; } - write_results_recursive(file, result.nested_scopes.last().unwrap(), last)?; + write_results_recursive(file, result.nested_queries.last().unwrap(), last)?; Ok(()) // { "pid":1, "tid":1, "ts":546867, "dur":121564, "ph":"X", "name":"DoThings" diff --git a/src/errors.rs b/src/errors.rs index f350658..b0ab496 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -49,12 +49,12 @@ pub enum SettingsError { /// Errors that can occur during [`crate::GpuProfiler::end_frame`]. #[derive(thiserror::Error, Debug, PartialEq, Eq)] pub enum EndFrameError { - #[error("All profiling scopes need to be closed before ending a frame. There were still {0} open scopes.")] - UnclosedScopes(u32), + #[error("All profiling queries need to be closed before ending a frame. There were still {0} open queries.")] + UnclosedQueries(u32), #[error( "Not all queries were resolved before ending a frame.\n -Call `GpuProfiler::resolve_queries` after all profiling scopes have been closed and before ending the frame.\n +Call `GpuProfiler::resolve_queries` after all profiling queries have been closed and before ending the frame.\n There were still {0} queries unresolved." )] UnresolvedQueries(u32), diff --git a/src/lib.rs b/src/lib.rs index e6e5312..44d813b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,7 +123,7 @@ use parking_lot::{Mutex, RwLock}; /// The result of a gpu timer scope. #[derive(Debug, Clone)] -pub struct GpuTimerScopeResult { +pub struct GpuTimerQueryResult { /// Label that was specified when opening the scope. pub label: String, @@ -139,34 +139,35 @@ pub struct GpuTimerScopeResult { pub time: Range, /// Scopes that were opened while this scope was open. - pub nested_scopes: Vec, + pub nested_queries: Vec, } -/// An in-flight GPU timer scope. +/// An inflight query for the profiler. /// -/// *Must* be closed by calling [`GpuProfiler::end_scope`]. -/// Will cause debug assertion if dropped without being closed. +/// If timer queries are enabled, this represents a reserved timer query pair on +/// one of the profiler's query sets. +/// *Must* be closed by calling [`GpuProfiler::end_query`]. /// -/// Emitted by [`GpuProfiler::begin_scope`] and consumed by [`GpuProfiler::end_scope`]. -pub struct GpuTimerScope { - /// The label assigned to this scope. - /// Will be moved into [`GpuTimerScopeResult::label`] once the scope is fully processed. +/// Emitted by [`GpuProfiler::begin_query`]/[`GpuProfiler::begin_pass_query`] and consumed by [`GpuProfiler::end_query`]. +pub struct GpuProfilerQuery { + /// The label assigned to this query. + /// Will be moved into [`GpuProfilerQuery::label`] once the query is fully processed. pub label: String, - /// The process id of the process that opened this scope. + /// The process id of the process that opened this query. pub pid: u32, - /// The thread id of the thread that opened this scope. + /// The thread id of the thread that opened this query. pub tid: ThreadId, - /// The actual query on a query pool if any (none if disabled for this type of scope). - query: Option, + /// The actual query on a query pool if any (none if disabled for this type of query). + timer_query_pair: Option, - /// Handle which identifies this scope, used for building the tree of scopes. - handle: GpuTimerScopeTreeHandle, + /// Handle which identifies this query, used for building the tree of queries. + handle: GpuTimerQueryTreeHandle, - /// Which scope this scope is a child of. - parent_handle: GpuTimerScopeTreeHandle, + /// Which query this query is a child of. + parent_handle: GpuTimerQueryTreeHandle, /// Whether a debug group was opened for this scope. has_debug_group: bool, @@ -175,38 +176,44 @@ pub struct GpuTimerScope { tracy_scope: Option, } -impl GpuTimerScope { +impl GpuProfilerQuery { /// Use the reserved query for render pass timestamp writes if any. /// /// Use this only for a single render/compute pass, otherwise results will be overwritten. + /// Only ever returns `Some` for queries that were created using [`GpuProfiler::begin_pass_query`]. pub fn render_pass_timestamp_writes(&self) -> Option { - self.query - .as_ref() - .map(|query| wgpu::RenderPassTimestampWrites { - query_set: &query.pool.query_set, - beginning_of_pass_write_index: Some(query.start_query_idx), - end_of_pass_write_index: Some(query.start_query_idx + 1), + self.timer_query_pair.as_ref().and_then(|query| { + (query.usage_state == QueryPairUsageState::ReservedForPassTimestampWrites).then(|| { + wgpu::RenderPassTimestampWrites { + query_set: &query.pool.query_set, + beginning_of_pass_write_index: Some(query.start_query_idx), + end_of_pass_write_index: Some(query.start_query_idx + 1), + } }) + }) } /// Use the reserved query for compute pass timestamp writes if any. /// /// Use this only for a single render/compute pass, otherwise results will be overwritten. + /// Only ever returns `Some` for queries that were created using [`GpuProfiler::begin_pass_query`]. pub fn compute_pass_timestamp_writes(&self) -> Option { - self.query - .as_ref() - .map(|query| wgpu::ComputePassTimestampWrites { - query_set: &query.pool.query_set, - beginning_of_pass_write_index: Some(query.start_query_idx), - end_of_pass_write_index: Some(query.start_query_idx + 1), + self.timer_query_pair.as_ref().and_then(|query| { + (query.usage_state == QueryPairUsageState::ReservedForPassTimestampWrites).then(|| { + wgpu::ComputePassTimestampWrites { + query_set: &query.pool.query_set, + beginning_of_pass_write_index: Some(query.start_query_idx), + end_of_pass_write_index: Some(query.start_query_idx + 1), + } }) + }) } /// Makes this scope a child of the passed scope. #[inline] - pub fn with_parent(self, parent: Option<&GpuTimerScope>) -> Self { + pub fn with_parent(self, parent: Option<&GpuProfilerQuery>) -> Self { Self { - parent_handle: parent.map_or(ROOT_SCOPE_HANDLE, |p| p.handle), + parent_handle: parent.map_or(ROOT_QUERY_HANDLE, |p| p.handle), ..self } } @@ -215,19 +222,19 @@ impl GpuTimerScope { /// Settings passed on initialization of [`GpuProfiler`]. #[derive(Debug, Clone)] pub struct GpuProfilerSettings { - /// Enables/disables the profiler. + /// Enables/disables gpu timer queries. /// /// If false, the profiler will not emit any timer queries, making most operations on [`GpuProfiler`] no-ops. /// /// Since all resource creation is done lazily, this provides an effective way of disabling the profiler at runtime /// without the need of special build configurations or code to handle enabled/disabled profiling. - pub enable_timer_scopes: bool, + pub enable_timer_queries: bool, /// Enables/disables debug markers for all scopes on the respective encoder or pass. /// /// This is useful for debugging with tools like RenderDoc. /// Debug markers will be emitted even if the device does not support timer queries or disables them via - /// [`GpuProfilerSettings::enable_timer_scopes`]. + /// [`GpuProfilerSettings::enable_timer_queries`]. pub enable_debug_groups: bool, /// The profiler queues up to `max_num_pending_frames` "profiler-frames" at a time. @@ -251,7 +258,7 @@ pub struct GpuProfilerSettings { impl Default for GpuProfilerSettings { fn default() -> Self { Self { - enable_timer_scopes: true, + enable_timer_queries: true, enable_debug_groups: true, max_num_pending_frames: 3, } @@ -273,7 +280,7 @@ impl GpuProfilerSettings { /// You can have an arbitrary number of independent profiler instances per application/adapter. /// Manages all the necessary [`wgpu::QuerySet`] and [`wgpu::Buffer`] behind the scenes. /// -/// Any scope opening method may allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. +/// Any query creation method may allocate a new [`wgpu::QuerySet`] and [`wgpu::Buffer`] internally if necessary. /// /// After the first call that passes [`wgpu::Device`], the same device must be used with all subsequent /// calls to [`GpuProfiler`] and all passed references to wgpu objects must originate from that device. @@ -283,8 +290,8 @@ pub struct GpuProfiler { active_frame: ActiveFrame, pending_frames: Vec, - num_open_scopes: AtomicU32, - next_scope_handle: AtomicU32, + num_open_queries: AtomicU32, + next_query_handle: AtomicU32, size_for_new_query_pools: u32, @@ -318,12 +325,12 @@ impl GpuProfiler { pending_frames: Vec::with_capacity(settings.max_num_pending_frames), active_frame: ActiveFrame { query_pools: RwLock::new(PendingFramePools::default()), - closed_scope_sender, - closed_scope_receiver: Mutex::new(closed_scope_receiver), + closed_query_sender: closed_scope_sender, + closed_query_receiver: Mutex::new(closed_scope_receiver), }, - num_open_scopes: AtomicU32::new(0), - next_scope_handle: AtomicU32::new(0), + num_open_queries: AtomicU32::new(0), + next_query_handle: AtomicU32::new(0), size_for_new_query_pools: QueryPool::MIN_CAPACITY, @@ -349,14 +356,14 @@ impl GpuProfiler { /// Changes the settings of an existing profiler. /// - /// If timer scopes are disabled by setting [GpuProfilerSettings::enable_timer_scopes] to false, + /// If timer scopes are disabled by setting [GpuProfilerSettings::enable_timer_queries] to false, /// any timer queries that are in flight will still be processed, /// but unused query sets and buffers will be deallocated during [`Self::process_finished_frame`]. /// Similarly, any opened debugging scope will still be closed if debug groups are disabled by setting /// [GpuProfilerSettings::enable_debug_groups] to false. pub fn change_settings(&mut self, settings: GpuProfilerSettings) -> Result<(), SettingsError> { settings.validate()?; - if !settings.enable_timer_scopes { + if !settings.enable_timer_queries { self.unused_pools.clear(); } self.settings = settings; @@ -386,7 +393,7 @@ impl GpuProfiler { encoder_or_pass: &'a mut Recorder, device: &wgpu::Device, ) -> Scope<'a, Recorder> { - let scope = self.begin_scope(label, encoder_or_pass, device); + let scope = self.begin_query(label, encoder_or_pass, device); Scope { profiler: self, recorder: encoder_or_pass, @@ -416,7 +423,7 @@ impl GpuProfiler { mut encoder_or_pass: Recorder, device: &wgpu::Device, ) -> OwningScope<'a, Recorder> { - let scope = self.begin_scope(label, &mut encoder_or_pass, device); + let scope = self.begin_query(label, &mut encoder_or_pass, device); OwningScope { profiler: self, recorder: encoder_or_pass, @@ -426,7 +433,7 @@ impl GpuProfiler { /// Starts a new **manually closed** profiler scope that takes ownership of the passed encoder or rendering/compute pass. /// - /// Does NOT call [`GpuProfiler::end_scope()`] on drop. + /// Does NOT call [`GpuProfiler::end_query()`] on drop. /// This construct is just for completeness in cases where working with scopes is preferred but one can't rely on the Drop call in the right place. /// This is useful when the owned value needs to be recovered after the end of the scope. /// In particular, to submit a [`wgpu::CommandEncoder`] to a queue, ownership of the encoder is necessary. @@ -449,7 +456,7 @@ impl GpuProfiler { mut encoder_or_pass: Recorder, device: &wgpu::Device, ) -> ManualOwningScope<'a, Recorder> { - let scope = self.begin_scope(label, &mut encoder_or_pass, device); + let scope = self.begin_query(label, &mut encoder_or_pass, device); ManualOwningScope { profiler: self, recorder: encoder_or_pass, @@ -457,114 +464,124 @@ impl GpuProfiler { } } - /// Starts a new profiler scope on a given encoder or rendering/compute pass if enabled that must be manually closed. + /// Starts a new profiler query on the given encoder or rendering/compute pass (if enabled). /// - /// The returned scope *must* be closed by calling [`GpuProfiler::end_scope`]. - /// Dropping it without closing it will trigger a debug assertion. - /// To do this automatically, use `GpuProfiler::scope`/`GpuProfiler::owning_scope` instead. + /// The returned query *must* be closed by calling [`GpuProfiler::end_query`] with the same encoder/pass, + /// even if timer queries are disabled. + /// To do this automatically, use [`GpuProfiler::scope`]/[`GpuProfiler::owning_scope`] instead. /// /// If an [`wgpu::CommandEncoder`] is passed but the [`wgpu::Device`] /// does not support [`wgpu::Features::TIMESTAMP_QUERY`], no gpu timer will be queried and the scope will /// not show up in the final results. /// If an [`wgpu::ComputePass`] or [`wgpu::RenderPass`] is passed but the [`wgpu::Device`] - /// does not support [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`], no scope will be opened. + /// does not support [`wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES`], no timer queries will be allocated. /// /// If [`GpuProfilerSettings::enable_debug_groups`] is true, a debug group will be pushed on the encoder or pass. #[track_caller] #[must_use] - pub fn begin_scope( + pub fn begin_query( &self, label: impl Into, encoder_or_pass: &mut Recorder, device: &wgpu::Device, - ) -> GpuTimerScope { - let mut scope = self.begin_scope_internal(label.into(), encoder_or_pass, device); - if let Some(query) = &mut scope.query { - encoder_or_pass.write_timestamp(&query.pool.query_set, query.start_query_idx); - query.usage_state = QueryPairUsageState::OnlyStartWritten; + ) -> GpuProfilerQuery { + let mut query = self.begin_query_internal(label.into(), encoder_or_pass, device); + if let Some(timer_query) = &mut query.timer_query_pair { + encoder_or_pass + .write_timestamp(&timer_query.pool.query_set, timer_query.start_query_idx); + timer_query.usage_state = QueryPairUsageState::OnlyStartWritten; }; if self.settings.enable_debug_groups { - encoder_or_pass.push_debug_group(&scope.label); - scope.has_debug_group = true; + encoder_or_pass.push_debug_group(&query.label); + query.has_debug_group = true; } - scope + query } - /// Need to call end scope with the encoder again, not the pass the scope is used with. - /// TODO: proper doc - /// TODO: highlevel methods for this? - /// TODO: Naming needs a facelift - `GpuTimerScope` vs `Scope` is WEIRD! - pub fn begin_pass_scope( + /// Starts a new profiler query to be used for render/compute pass timestamp writes. + /// + /// The returned query *must* be closed by calling [`GpuProfiler::end_query`], even if timer queries are disabled. + /// To do this automatically, use [`GpuProfiler::Scope::scoped_render_pass`]/[`GpuProfiler::Scope::scoped_compute_pass`] instead. + /// + /// Call [`GpuProfilerQuery::render_pass_timestamp_writes`] or [`GpuProfilerQuery::compute_pass_timestamp_writes`] + /// to acquire the corresponding `wgpu::RenderPassTimestampWrites`/`wgpu::ComputePassTimestampWrites` object. + /// + /// If the [`wgpu::Device`] does not support [`wgpu::Features::TIMESTAMP_QUERY`], no gpu timer will be reserved. + /// + /// Unlike [`GpuProfiler::begin_query`] this will not create a debug scope, + /// in order to not force passing of the same encoder/pass to [`GpuProfiler::end_query`]. + /// (this is needed to relax resource tracking requirements a bit, making it easier to implement the automatic scopes) + pub fn begin_pass_query( &self, label: impl Into, encoder: &mut wgpu::CommandEncoder, device: &wgpu::Device, - ) -> GpuTimerScope { - let mut scope = self.begin_scope_internal(label.into(), encoder, device); - if let Some(query) = &mut scope.query { - query.usage_state = QueryPairUsageState::ReservedForPassTimestampWrites; + ) -> GpuProfilerQuery { + let mut query = self.begin_query_internal(label.into(), encoder, device); + if let Some(timer_query) = &mut query.timer_query_pair { + timer_query.usage_state = QueryPairUsageState::ReservedForPassTimestampWrites; } - scope + query } - /// Ends passed scope. - /// - /// Behavior is not defined if the last open scope was opened on a different encoder or pass than the one passed here. + /// Ends passed query. /// - /// If the previous call to [`GpuProfiler::begin_scope`] did not open a timer scope because it was not supported or disabled, - /// this call will do nothing (except closing the currently open debug scope if enabled). - pub fn end_scope( + /// If the passed query was opened with [`GpuProfiler::begin_query`], the passed encoder or pass must be the same + /// as when the query was opened. + pub fn end_query( &self, encoder_or_pass: &mut Recorder, - mut scope: GpuTimerScope, + mut query: GpuProfilerQuery, ) { - if let Some(query) = &mut scope.query { - match query.usage_state { + if let Some(timer_query) = &mut query.timer_query_pair { + match timer_query.usage_state { QueryPairUsageState::Reserved => { - unreachable!("Scope has been reserved but isn't used for anything!") + unreachable!("Query pair has been reserved but isn't used for anything!") } QueryPairUsageState::ReservedForPassTimestampWrites => { // No need to do a timestamp write, this is handled by wgpu. } QueryPairUsageState::OnlyStartWritten => { - encoder_or_pass - .write_timestamp(&query.pool.query_set, query.start_query_idx + 1); - query.usage_state = QueryPairUsageState::BothStartAndEndWritten; + encoder_or_pass.write_timestamp( + &timer_query.pool.query_set, + timer_query.start_query_idx + 1, + ); + timer_query.usage_state = QueryPairUsageState::BothStartAndEndWritten; } QueryPairUsageState::BothStartAndEndWritten => { - unreachable!("Scope has already been closed!") + unreachable!("Query pair has already been used!") } } + } - #[cfg(feature = "tracy")] - if let Some(ref mut tracy_scope) = scope.tracy_scope { - tracy_scope.end_zone(); - } + #[cfg(feature = "tracy")] + if let Some(ref mut tracy_scope) = query.tracy_scope { + tracy_scope.end_zone(); } - if scope.has_debug_group { + if query.has_debug_group { encoder_or_pass.pop_debug_group(); } - let send_result = self.active_frame.closed_scope_sender.send(scope); + let send_result = self.active_frame.closed_query_sender.send(query); - // The only way we can fail sending the scope is if the receiver has been dropped. + // The only way we can fail sending the query is if the receiver has been dropped. // Since it sits on `active_frame` as well, there's no way for this to happen! debug_assert!(send_result.is_ok()); - // Count scopes even if we haven't processed this one, makes experiences more consistent + // Count queries even if we haven't processed this one, makes experiences more consistent // if there's a lack of support for some queries. - self.num_open_scopes.fetch_sub(1, Ordering::Release); + self.num_open_queries.fetch_sub(1, Ordering::Release); } /// Puts query resolve commands in the encoder for all unresolved, pending queries of the active profiler frame. /// /// Note that you do *not* need to do this for every encoder, it is sufficient do do this once per frame as long - /// as you submit the corresponding command buffer after all others that may have opened scopes in the same frame. - /// (It does not matter if the passed encoder itself has previously opened scopes or not.) + /// as you submit the corresponding command buffer after all others that may have opened queries in the same frame. + /// (It does not matter if the passed encoder itself has previously opened queries or not.) /// If you were to make this part of a command buffer that is enqueued before any other that has - /// opened scopes in the same profiling frame, no failure will occur but some timing results may be invalid. + /// opened queries in the same profiling frame, no failure will occur but some timing results may be invalid. /// /// It is advised to call this only once at the end of a profiling frame, but it is safe to do so several times. /// @@ -613,27 +630,27 @@ impl GpuProfiler { /// /// Needs to be called **after** submitting any encoder used in the current profiler frame. /// - /// Fails if there are still open scopes or unresolved queries. + /// Fails if there are still open queries or unresolved queries. pub fn end_frame(&mut self) -> Result<(), EndFrameError> { - let num_open_scopes = self.num_open_scopes.load(Ordering::Acquire); - if num_open_scopes != 0 { - return Err(EndFrameError::UnclosedScopes(num_open_scopes)); + let num_open_queries = self.num_open_queries.load(Ordering::Acquire); + if num_open_queries != 0 { + return Err(EndFrameError::UnclosedQueries(num_open_queries)); } let query_pools = self.active_frame.query_pools.get_mut(); let mut new_pending_frame = PendingFrame { query_pools: std::mem::take(&mut query_pools.used_pools), - closed_scope_by_parent_handle: HashMap::new(), + closed_query_by_parent_handle: HashMap::new(), mapped_buffers: Arc::new(AtomicU32::new(0)), }; - for scope in self.active_frame.closed_scope_receiver.get_mut().try_iter() { + for query in self.active_frame.closed_query_receiver.get_mut().try_iter() { new_pending_frame - .closed_scope_by_parent_handle - .entry(scope.parent_handle) + .closed_query_by_parent_handle + .entry(query.parent_handle) .or_default() - .push(scope); + .push(query); } // All loads of pool.num_used_queries are Relaxed since we assume, @@ -670,8 +687,8 @@ impl GpuProfiler { // Dropping the oldest frame could get us into an endless cycle where we're never able to complete // any pending frames as the ones closest to completion would be evicted. if let Some(dropped_frame) = self.pending_frames.pop() { - // Drop scopes first since they still have references to the query pools that we want to reuse. - drop(dropped_frame.closed_scope_by_parent_handle); + // Drop queries first since they still have references to the query pools that we want to reuse. + drop(dropped_frame.closed_query_by_parent_handle); // Mark the frame as dropped. We'll give back the query pools once the mapping is done. // Any previously issued map_async call that haven't finished yet, will invoke their callback with mapping abort. @@ -716,7 +733,7 @@ impl GpuProfiler { pub fn process_finished_frame( &mut self, timestamp_period: f32, - ) -> Option> { + ) -> Option> { let frame = self.pending_frames.first_mut()?; // We only process if all mappings succeed. @@ -735,8 +752,8 @@ impl GpuProfiler { Self::process_timings_recursive( timestamp_to_sec, - &mut frame.closed_scope_by_parent_handle, - ROOT_SCOPE_HANDLE, + &mut frame.closed_query_by_parent_handle, + ROOT_QUERY_HANDLE, ) }; @@ -767,13 +784,13 @@ fn timestamp_write_supported( } impl GpuProfiler { - fn next_scope_tree_handle(&self) -> GpuTimerScopeTreeHandle { + fn next_scope_tree_handle(&self) -> GpuTimerQueryTreeHandle { // Relaxed is fine, we just want a number that nobody uses this frame already. - let mut handle = self.next_scope_handle.fetch_add(1, Ordering::Relaxed); + let mut handle = self.next_query_handle.fetch_add(1, Ordering::Relaxed); // We don't ever expect to run out of handles during a single frame, but who knows how long the app runs. - while handle == ROOT_SCOPE_HANDLE { - handle = self.next_scope_handle.fetch_add(1, Ordering::Relaxed); + while handle == ROOT_QUERY_HANDLE { + handle = self.next_query_handle.fetch_add(1, Ordering::Relaxed); } handle @@ -789,8 +806,8 @@ impl GpuProfiler { // If a pool was less than half of the size of the max frame, then we don't keep it. // This way we're going to need less pools in upcoming frames and thus have less overhead in the long run. - // If timer scopes were disabled, we also don't keep any pools. - if self.settings.enable_timer_scopes && pool.capacity >= capacity_threshold { + // If timer queries were disabled, we also don't keep any pools. + if self.settings.enable_timer_queries && pool.capacity >= capacity_threshold { self.active_frame .query_pools .get_mut() @@ -800,7 +817,7 @@ impl GpuProfiler { } } - fn try_reserve_query_pair(pool: &Arc) -> Option { + fn try_reserve_query_pair(pool: &Arc) -> Option { let mut num_used_queries = pool.num_used_queries.load(Ordering::Relaxed); loop { @@ -822,7 +839,7 @@ impl GpuProfiler { ) { Ok(_) => { // We successfully acquired two queries! - return Some(ReservedQueryPair { + return Some(ReservedTimerQueryPair { pool: pool.clone(), start_query_idx: num_used_queries, usage_state: QueryPairUsageState::Reserved, @@ -838,7 +855,7 @@ impl GpuProfiler { // Reserves two query objects. // Our query pools always have an even number of queries, so we know the next query is the next in the same pool. - fn reserve_query_pair(&self, device: &wgpu::Device) -> ReservedQueryPair { + fn reserve_query_pair(&self, device: &wgpu::Device) -> ReservedTimerQueryPair { // First, try to allocate from current top pool. // Requires taking a read lock on the current query pool. { @@ -894,44 +911,46 @@ impl GpuProfiler { #[track_caller] #[must_use] - fn begin_scope_internal( + fn begin_query_internal( &self, label: String, encoder_or_pass: &mut Recorder, device: &wgpu::Device, - ) -> GpuTimerScope { - // Give opening/closing scopes acquire/release semantics: - // This way, we won't get any nasty surprises when observing zero open scopes. - self.num_open_scopes.fetch_add(1, Ordering::Acquire); + ) -> GpuProfilerQuery { + // Give opening/closing queries acquire/release semantics: + // This way, we won't get any nasty surprises when observing zero open queries. + self.num_open_queries.fetch_add(1, Ordering::Acquire); - let handle = self.next_scope_tree_handle(); - - let (query, _tracy_scope) = if self.settings.enable_timer_scopes + let query = if self.settings.enable_timer_queries && timestamp_write_supported(encoder_or_pass, device.features()) { + Some(self.reserve_query_pair(device)) + } else { + None + }; + + let _tracy_scope = if self.settings.enable_timer_queries { #[cfg(feature = "tracy")] - let tracy_scope = { + { let location = std::panic::Location::caller(); self.tracy_context.as_ref().and_then(|c| { c.span_alloc(&label, "", location.file(), location.line()) .ok() }) - }; + } #[cfg(not(feature = "tracy"))] - let tracy_scope = Option::<()>::None; - - (Some(self.reserve_query_pair(device)), tracy_scope) + Option::<()>::None } else { - (None, None) + None }; - GpuTimerScope { + GpuProfilerQuery { label, pid: std::process::id(), tid: std::thread::current().id(), - query, - handle, - parent_handle: ROOT_SCOPE_HANDLE, + timer_query_pair: query, + handle: self.next_scope_tree_handle(), + parent_handle: ROOT_QUERY_HANDLE, has_debug_group: false, #[cfg(feature = "tracy")] tracy_scope: _tracy_scope, @@ -940,19 +959,19 @@ impl GpuProfiler { fn process_timings_recursive( timestamp_to_sec: f64, - closed_scope_by_parent_handle: &mut HashMap>, - parent_handle: GpuTimerScopeTreeHandle, - ) -> Vec { - let Some(scopes_with_same_parent) = closed_scope_by_parent_handle.remove(&parent_handle) + closed_scope_by_parent_handle: &mut HashMap>, + parent_handle: GpuTimerQueryTreeHandle, + ) -> Vec { + let Some(queries_with_same_parent) = closed_scope_by_parent_handle.remove(&parent_handle) else { return Vec::new(); }; - scopes_with_same_parent + queries_with_same_parent .into_iter() .filter_map(|mut scope| { - let Some(query) = scope.query.take() else { - // Inactive scopes don't have any results or nested scopes with results. + let Some(query) = scope.timer_query_pair.take() else { + // Inactive queries don't have any results or nested queries with results. // Currently, we drop them from the results completely. // In the future we could still make them show up since they convey information like label & pid/tid. return None; @@ -979,17 +998,17 @@ impl GpuProfiler { tracy_scope.upload_timestamp(start_raw as i64, end_raw as i64); } - let nested_scopes = Self::process_timings_recursive( + let nested_queries = Self::process_timings_recursive( timestamp_to_sec, closed_scope_by_parent_handle, scope.handle, ); - Some(GpuTimerScopeResult { + Some(GpuTimerQueryResult { label: std::mem::take(&mut scope.label), time: (start_raw as f64 * timestamp_to_sec) ..(end_raw as f64 * timestamp_to_sec), - nested_scopes, + nested_queries, pid: scope.pid, tid: scope.tid, }) @@ -1013,7 +1032,7 @@ enum QueryPairUsageState { BothStartAndEndWritten, } -struct ReservedQueryPair { +struct ReservedTimerQueryPair { /// QueryPool on which both start & end queries of the scope are done. /// /// By putting an arc here instead of an index into a vec, we don't need @@ -1082,36 +1101,36 @@ impl QueryPool { #[derive(Default)] struct PendingFramePools { /// List of all pools used in this frame. - /// The last pool is the one new profiling scopes will try to make timer queries into. + /// The last pool is the one new profiling queries will try to make timer queries into. used_pools: Vec>, /// List of unused pools recycled from previous frames. unused_pools: Vec, } -/// Internal handle to building a tree of profiling scopes. -type GpuTimerScopeTreeHandle = u32; +/// Internal handle to building a tree of profiling queries. +type GpuTimerQueryTreeHandle = u32; /// Handle for the root scope. -const ROOT_SCOPE_HANDLE: GpuTimerScopeTreeHandle = std::u32::MAX; +const ROOT_QUERY_HANDLE: GpuTimerQueryTreeHandle = std::u32::MAX; struct ActiveFrame { query_pools: RwLock, - /// Closed scopes get send to this channel. + /// Closed queries get send to this channel. /// /// Note that channel is still overkill for what we want here: /// We're in a multi producer situation, *but* the single consumer is known to be only /// active in a mut context, i.e. while we're consuming we know that we're not producing. /// We have to wrap it in a Mutex because the channel is not Sync, but we actually never lock it /// since we only ever access it in a `mut` context. - closed_scope_sender: std::sync::mpsc::Sender, - closed_scope_receiver: Mutex>, + closed_query_sender: std::sync::mpsc::Sender, + closed_query_receiver: Mutex>, } struct PendingFrame { query_pools: Vec>, - closed_scope_by_parent_handle: HashMap>, + closed_query_by_parent_handle: HashMap>, /// Keeps track of the number of buffers in the query pool that have been mapped successfully. mapped_buffers: std::sync::Arc, diff --git a/src/scope.rs b/src/scope.rs index 356d6a0..ff41b76 100644 --- a/src/scope.rs +++ b/src/scope.rs @@ -1,7 +1,7 @@ //! Scope types that wrap a `wgpu` encoder/pass and start a scope on creation. In most cases, they //! then allow automatically ending the scope on drop. -use crate::{GpuProfiler, GpuTimerScope, ProfilerCommandRecorder}; +use crate::{GpuProfiler, GpuProfilerQuery, ProfilerCommandRecorder}; /// Scope that takes a (mutable) reference to the encoder/pass. /// @@ -9,14 +9,14 @@ use crate::{GpuProfiler, GpuTimerScope, ProfilerCommandRecorder}; pub struct Scope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: &'a mut Recorder, - pub scope: Option, + pub scope: Option, } impl<'a, R: ProfilerCommandRecorder> Drop for Scope<'a, R> { #[inline] fn drop(&mut self) { if let Some(scope) = self.scope.take() { - self.profiler.end_scope(self.recorder, scope); + self.profiler.end_query(self.recorder, scope); } } } @@ -27,14 +27,14 @@ impl<'a, R: ProfilerCommandRecorder> Drop for Scope<'a, R> { pub struct OwningScope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: Recorder, - pub scope: Option, + pub scope: Option, } impl<'a, R: ProfilerCommandRecorder> Drop for OwningScope<'a, R> { #[inline] fn drop(&mut self) { if let Some(scope) = self.scope.take() { - self.profiler.end_scope(&mut self.recorder, scope); + self.profiler.end_query(&mut self.recorder, scope); } } } @@ -48,7 +48,7 @@ impl<'a, R: ProfilerCommandRecorder> Drop for OwningScope<'a, R> { pub struct ManualOwningScope<'a, Recorder: ProfilerCommandRecorder> { pub profiler: &'a GpuProfiler, pub recorder: Recorder, - pub scope: Option, + pub scope: Option, } impl<'a, R: ProfilerCommandRecorder> ManualOwningScope<'a, R> { @@ -58,7 +58,7 @@ impl<'a, R: ProfilerCommandRecorder> ManualOwningScope<'a, R> { pub fn end_scope(mut self) -> R { // Can't fail since creation implies begin_scope. self.profiler - .end_scope(&mut self.recorder, self.scope.take().unwrap()); + .end_query(&mut self.recorder, self.scope.take().unwrap()); self.recorder } } @@ -83,7 +83,7 @@ macro_rules! impl_scope_ext { let recorder: &mut R = &mut self.recorder; let scope = self .profiler - .begin_scope(label, recorder, device) + .begin_query(label, recorder, device) .with_parent(self.scope.as_ref()); Scope { profiler: self.profiler, @@ -110,7 +110,7 @@ macro_rules! impl_scope_ext { ) -> OwningScope<'b, wgpu::RenderPass<'b>> { let child_scope = self .profiler - .begin_pass_scope(label, &mut self.recorder, device) + .begin_pass_query(label, &mut self.recorder, device) .with_parent(self.scope.as_ref()); let render_pass = self .recorder @@ -141,7 +141,7 @@ macro_rules! impl_scope_ext { ) -> OwningScope<'b, wgpu::ComputePass<'b>> { let child_scope = self .profiler - .begin_pass_scope(label, &mut self.recorder, device) + .begin_pass_query(label, &mut self.recorder, device) .with_parent(self.scope.as_ref()); let render_pass = self diff --git a/tests/src/errors.rs b/tests/src/errors.rs index 828b3f1..372754c 100644 --- a/tests/src/errors.rs +++ b/tests/src/errors.rs @@ -17,40 +17,40 @@ fn invalid_pending_frame_count() { } #[test] -fn end_frame_unclosed_scope() { +fn end_frame_unclosed_query() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); - let unclosed_scope = { + let unclosed_query = { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device); + let query = profiler.begin_query("open query", &mut encoder, &device); profiler.resolve_queries(&mut encoder); - scope + query }; assert_eq!( profiler.end_frame(), - Err(wgpu_profiler::EndFrameError::UnclosedScopes(1)) + Err(wgpu_profiler::EndFrameError::UnclosedQueries(1)) ); // Make sure we can recover from this. { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - profiler.end_scope(&mut encoder, unclosed_scope); + profiler.end_query(&mut encoder, unclosed_query); profiler.resolve_queries(&mut encoder); } assert_eq!(profiler.end_frame(), Ok(())); } #[test] -fn end_frame_unresolved_scope() { +fn end_frame_unresolved_query() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device); - profiler.end_scope(&mut encoder, scope); + let query = profiler.begin_query("open query", &mut encoder, &device); + profiler.end_query(&mut encoder, query); } assert_eq!( @@ -69,18 +69,18 @@ fn end_frame_unresolved_scope() { } #[test] -fn change_settings_while_scope_open() { +fn change_settings_while_query_open() { let (_, device, _) = create_device(wgpu::Features::TIMESTAMP_QUERY).unwrap(); let mut profiler = wgpu_profiler::GpuProfiler::new(GpuProfilerSettings::default()).unwrap(); let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); - let scope = profiler.begin_scope("open scope", &mut encoder, &device); + let query = profiler.begin_query("open query", &mut encoder, &device); assert_eq!( profiler.change_settings(GpuProfilerSettings::default()), Ok(()) ); - profiler.end_scope(&mut encoder, scope); + profiler.end_query(&mut encoder, query); } diff --git a/tests/src/mod.rs b/tests/src/mod.rs index 0430955..f128bde 100644 --- a/tests/src/mod.rs +++ b/tests/src/mod.rs @@ -53,7 +53,7 @@ fn expected_scope( fn validate_results( features: wgpu::Features, - result: &[wgpu_profiler::GpuTimerScopeResult], + result: &[wgpu_profiler::GpuTimerQueryResult], expected: &[ExpectedScope], ) { let expected = expected @@ -73,13 +73,13 @@ fn validate_results( ); for (result, expected) in result.iter().zip(expected.iter()) { assert_eq!(result.label, expected.0); - validate_results(features, &result.nested_scopes, &expected.2); + validate_results(features, &result.nested_queries, &expected.2); } } fn validate_results_unordered( features: wgpu::Features, - result: &[wgpu_profiler::GpuTimerScopeResult], + result: &[wgpu_profiler::GpuTimerQueryResult], expected: &[ExpectedScope], ) { let expected = expected @@ -104,6 +104,6 @@ fn validate_results_unordered( for (result, expected) in result.iter().zip(expected.iter()) { assert!(expected_labels.remove(&result.label)); - validate_results(features, &result.nested_scopes, &expected.2); + validate_results(features, &result.nested_queries, &expected.2); } } diff --git a/tests/src/nested_scopes.rs b/tests/src/nested_scopes.rs index e21c683..491a1d6 100644 --- a/tests/src/nested_scopes.rs +++ b/tests/src/nested_scopes.rs @@ -45,7 +45,7 @@ fn nested_scopes(device: &wgpu::Device, queue: &wgpu::Queue) { // Another scope, but with the profiler disabled which should be possible on the fly. profiler .change_settings(GpuProfilerSettings { - enable_timer_scopes: false, + enable_timer_queries: false, ..Default::default() }) .unwrap();