rust-lang · JoJoDeveloping · Oct 26, 2024 · RalfJung · Oct 29, 2024 · RalfJung
diff --git a/src/bin/miri.rs b/src/bin/miri.rs
@@ -28,7 +28,10 @@ use std::num::NonZero;
 use std::path::PathBuf;
 use std::str::FromStr;
 
-use miri::{BacktraceStyle, BorrowTrackerMethod, ProvenanceMode, RetagFields, ValidationMode};
+use miri::{
+    BacktraceStyle, BorrowTrackerMethod, ProvenanceGcSettings, ProvenanceMode, RetagFields,
+    ValidationMode,
+};
 use rustc_data_structures::sync::Lrc;
 use rustc_driver::Compilation;
 use rustc_hir::def_id::LOCAL_CRATE;
@@ -607,7 +610,10 @@ fn main() {
             let interval = param.parse::<u32>().unwrap_or_else(|err| {
                 show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err)
             });
-            miri_config.gc_interval = interval;
+            miri_config.gc_settings = match interval {
+                0 => ProvenanceGcSettings::Disabled,
+                interval => ProvenanceGcSettings::Regularly { interval },
+            };
         } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
             miri_config.measureme_out = Some(param.to_string());
         } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") {

diff --git a/src/borrow_tracker/tree_borrows/mod.rs b/src/borrow_tracker/tree_borrows/mod.rs
@@ -60,6 +60,9 @@ impl<'tcx> Tree {
         };
         let global = machine.borrow_tracker.as_ref().unwrap();
         let span = machine.current_span();
+        if self.tree_grew_significantly_since_last_gc() {
+            machine.request_gc();
+        }
         self.perform_access(
             tag,
             Some((range, access_kind, diagnostics::AccessCause::Explicit(access_kind))),
@@ -85,6 +88,9 @@ impl<'tcx> Tree {
         };
         let global = machine.borrow_tracker.as_ref().unwrap();
         let span = machine.current_span();
+        if self.tree_grew_significantly_since_last_gc() {
+            machine.request_gc();
+        }
         self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span)
     }
 
@@ -106,6 +112,9 @@ impl<'tcx> Tree {
         alloc_id: AllocId, // diagnostics
     ) -> InterpResult<'tcx> {
         let span = machine.current_span();
+        if self.tree_grew_significantly_since_last_gc() {
+            machine.request_gc();
+        }
         // `None` makes it the magic on-protector-end operation
         self.perform_access(tag, None, global, alloc_id, span)
     }
@@ -297,6 +306,11 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         )?;
         // Record the parent-child pair in the tree.
         tree_borrows.new_child(orig_tag, new_tag, new_perm.initial_state, range, span)?;
+
+        // Also request a GC if things are getting too large.
+        if tree_borrows.tree_grew_significantly_since_last_gc() {
+            this.machine.request_gc();
+        }
         drop(tree_borrows);
 
         // Also inform the data race model (but only if any bytes are actually affected).

diff --git a/src/borrow_tracker/tree_borrows/tree.rs b/src/borrow_tracker/tree_borrows/tree.rs
@@ -259,6 +259,8 @@ pub struct Tree {
     pub(super) rperms: RangeMap<UniValMap<LocationState>>,
     /// The index of the root node.
     pub(super) root: UniIndex,
+    /// The number of nodes when we last ran the Garbage Collector
+    nodes_at_last_gc: usize,
 }
 
 /// A node in the borrow tree. Each node is uniquely identified by a tag via
@@ -604,7 +606,7 @@ impl Tree {
             perms.insert(root_idx, LocationState::new_init(Permission::new_active()));
             RangeMap::new(size, perms)
         };
-        Self { root: root_idx, nodes, rperms, tag_mapping }
+        Self { root: root_idx, nodes, rperms, tag_mapping, nodes_at_last_gc: 1 }
     }
 }
 
@@ -833,13 +835,30 @@ impl<'tcx> Tree {
 
 /// Integration with the BorTag garbage collector
 impl Tree {
+    /// The number of nodes in this tree
+    pub fn nodes_count(&self) -> usize {
+        self.tag_mapping.len()
+    }
+
+    /// Whether the tree grew significantly since the last provenance GC run
+    pub fn tree_grew_significantly_since_last_gc(&self) -> bool {
+        let current = self.nodes_count();
+        // do not trigger the GC for small nodes
+        let last = self.nodes_at_last_gc.max(50);
+        // trigger the GC if the tree doubled since the last run,
+        // or otherwise got "significantly" larger.
+        // Note that for trees < 100 nodes, nothing happens.
-        // Note that for trees < 100 nodes, nothing happens.
+        // Note that for trees <= 100 nodes, nothing happens.
-        // Note that for trees < 100 nodes, nothing happens.
+        // Note that for trees <= 100 nodes, nothing happens.
+        current > 2 * last || current > last + 1500
+    }
+
     pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet<BorTag>) {
         self.remove_useless_children(self.root, live_tags);
         // Right after the GC runs is a good moment to check if we can
         // merge some adjacent ranges that were made equal by the removal of some
         // tags (this does not necessarily mean that they have identical internal representations,
         // see the `PartialEq` impl for `UniValMap`)
         self.rperms.merge_adjacent_thorough();
+        self.nodes_at_last_gc = self.nodes_count();
     }
 
     /// Checks if a node is useless and should be GC'ed.

diff --git a/src/eval.rs b/src/eval.rs
@@ -83,6 +83,18 @@ pub enum ValidationMode {
     Deep,
 }
 
+/// Settings for the provenance GC
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ProvenanceGcSettings {
+    /// The GC is disabled
+    Disabled,
+    /// The GC is to run regularly, every `inverval` basic blocks
+    Regularly { interval: u32 },
+    /// The GC runs as needed, determined by heuristics.
+    /// See `should_run_provenance_gc`
+    Heuristically,
+}
+
 /// Configuration needed to spawn a Miri instance.
 #[derive(Clone)]
 pub struct MiriConfig {
@@ -145,8 +157,8 @@ pub struct MiriConfig {
     /// The location of a shared object file to load when calling external functions
     /// FIXME! consider allowing users to specify paths to multiple files, or to a directory
     pub native_lib: Option<PathBuf>,
-    /// Run a garbage collector for BorTags every N basic blocks.
-    pub gc_interval: u32,
+    /// Settings for the provenance GC (e.g. how often it runs)
+    pub gc_settings: ProvenanceGcSettings,
     /// The number of CPUs to be reported by miri.
     pub num_cpus: u32,
     /// Requires Miri to emulate pages of a certain size
@@ -188,7 +200,7 @@ impl Default for MiriConfig {
             report_progress: None,
             retag_fields: RetagFields::Yes,
             native_lib: None,
-            gc_interval: 10_000,
+            gc_settings: ProvenanceGcSettings::Heuristically,
             num_cpus: 1,
             page_size: None,
             collect_leak_backtraces: true,

diff --git a/src/lib.rs b/src/lib.rs
@@ -131,8 +131,8 @@ pub use crate::diagnostics::{
     EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error,
 };
 pub use crate::eval::{
-    AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, ValidationMode,
-    create_ecx, eval_entry,
+    AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, ProvenanceGcSettings, RejectOpWith,
+    ValidationMode, create_ecx, eval_entry,
 };
 pub use crate::helpers::{AccessKind, EvalContextExt as _};
 pub use crate::intrinsics::EvalContextExt as _;

diff --git a/src/machine.rs b/src/machine.rs
@@ -3,7 +3,7 @@
 
 use std::any::Any;
 use std::borrow::Cow;
-use std::cell::RefCell;
+use std::cell::{Cell, RefCell};
 use std::collections::hash_map::Entry;
 use std::path::Path;
 use std::{fmt, process};
@@ -556,9 +556,10 @@ pub struct MiriMachine<'tcx> {
     pub native_lib: Option<!>,
 
     /// Run a garbage collector for BorTags every N basic blocks.
-    pub(crate) gc_interval: u32,
+    pub(crate) gc_settings: ProvenanceGcSettings,
     /// The number of blocks that passed since the last BorTag GC pass.
     pub(crate) since_gc: u32,
+    pub(crate) gc_requested: Cell<bool>,
 
     /// The number of CPUs to be reported by miri.
     pub(crate) num_cpus: u32,
@@ -716,8 +717,9 @@ impl<'tcx> MiriMachine<'tcx> {
             native_lib: config.native_lib.as_ref().map(|_| {
                 panic!("calling functions from native libraries via FFI is only supported on Unix")
             }),
-            gc_interval: config.gc_interval,
+            gc_settings: config.gc_settings,
             since_gc: 0,
+            gc_requested: Cell::new(false),
             num_cpus: config.num_cpus,
             page_size,
             stack_addr,
@@ -784,6 +786,10 @@ impl<'tcx> MiriMachine<'tcx> {
             .and_then(|(_allocated, deallocated)| *deallocated)
             .map(Span::data)
     }
+
+    pub(crate) fn request_gc(&self) {
+        self.gc_requested.set(true)
+    }
 }
 
 impl VisitProvenance for MiriMachine<'_> {
@@ -828,8 +834,9 @@ impl VisitProvenance for MiriMachine<'_> {
             report_progress: _,
             basic_block_count: _,
             native_lib: _,
-            gc_interval: _,
+            gc_settings: _,
             since_gc: _,
+            gc_requested: _,
             num_cpus: _,
             page_size: _,
             stack_addr: _,
@@ -1491,8 +1498,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         // stacks.
         // When debug assertions are enabled, run the GC as often as possible so that any cases
         // where it mistakenly removes an important tag become visible.
-        if ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval {
-            ecx.machine.since_gc = 0;
+        if ecx.should_run_provenance_gc() {
             ecx.run_provenance_gc();
         }
 

diff --git a/src/provenance_gc.rs b/src/provenance_gc.rs
@@ -208,8 +208,23 @@ fn remove_unreachable_allocs<'tcx>(this: &mut MiriInterpCx<'tcx>, allocs: FxHash
 
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
+    fn should_run_provenance_gc(&self) -> bool {
+        let this = self.eval_context_ref();
+        match this.machine.gc_settings {
+            ProvenanceGcSettings::Disabled => false,
+            ProvenanceGcSettings::Regularly { interval } => this.machine.since_gc >= interval,
+            ProvenanceGcSettings::Heuristically =>
+            // no GC run if the last one was recently
+                this.machine.since_gc >= 75
+                    // run GC if requested, or every 10000 blocks otherwise
+                    && (this.machine.since_gc >= 10_000 || this.machine.gc_requested.get()),
+        }
+    }
+
     fn run_provenance_gc(&mut self) {
         let this = self.eval_context_mut();
+        this.machine.since_gc = 0;
+        this.machine.gc_requested.set(false);
 
         // We collect all tags and AllocId from every part of the interpreter.
         let mut tags = FxHashSet::default();