From 8d9678a62c7f910f8f3ef2403e7f07c43265c0e8 Mon Sep 17 00:00:00 2001
From: Daniel Burgener <Daniel.Burgener@microsoft.com>
Date: Thu, 28 Sep 2023 16:43:19 -0400
Subject: [PATCH] Only build the parser once

The initial implementation assumed that PolicyParser::new() was a
relatively cheap function time-wise, and called it once per file.
There's no need to rebuild it, and it turns out in lalrpop performance
discussions that new() can actually be expensive (and possible
performance improvements could move work into new() to speed up the
steady state).

Regardless of the significance, there's no real cost to pulling new()
out to call once per file (although it might complicate the API ever so
slightly if we later expose parse_policy() as a public function.)

The benchmark run is below.  The weird thing is that Stress Functions
only has one file, so I would expect this to be a no-op for performance
there.  But that 14% has been pretty reproducible across multiple runs.
I'm not really sure what's going on there.

Full system compile     time:   [6.1514 ms 6.2534 ms 6.3622 ms]
                        change: [-79.353% -77.290% -74.995%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
  5 (5.00%) high mild

Benchmarking Stress functions: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 10.2s, or reduce sample count to 40.
Stress functions        time:   [99.906 ms 102.88 ms 105.94 ms]
                        change: [-19.498% -13.892% -8.4060%] (p = 0.00 < 0.05)
                        Performance has improved.
---
 src/lib.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index c7113720..256a2dc4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -309,6 +309,7 @@ fn compile_machine_policies_internal(
 fn get_policies(input_files: Vec<&str>) -> Result<Vec<PolicyFile>, CascadeErrors> {
     let mut errors = CascadeErrors::new();
     let mut policies: Vec<PolicyFile> = Vec::new();
+    let parser = parser::PolicyParser::new();
     for f in input_files {
         let policy_str = match std::fs::read_to_string(f) {
             Ok(s) => s,
@@ -317,7 +318,7 @@ fn get_policies(input_files: Vec<&str>) -> Result<Vec<PolicyFile>, CascadeErrors
                 continue;
             }
         };
-        let p = match parse_policy(&policy_str) {
+        let p = match parse_policy(&parser, &policy_str) {
             Ok(p) => p,
             Err(evec) => {
                 for e in evec {
@@ -332,13 +333,13 @@ fn get_policies(input_files: Vec<&str>) -> Result<Vec<PolicyFile>, CascadeErrors
     errors.into_result(policies)
 }
 
-fn parse_policy(
-    policy: &str,
-) -> Result<Box<Policy>, Vec<LalrpopParseError<usize, lalrpop_util::lexer::Token, ParseErrorMsg>>> {
+fn parse_policy<'a>(
+    parser: &parser::PolicyParser,
+    policy: &'a str,
+) -> Result<Box<Policy>, Vec<LalrpopParseError<usize, lalrpop_util::lexer::Token<'a>, ParseErrorMsg>>>
+{
     let mut errors = Vec::new();
-    // TODO: Probably should only construct once
-    // Why though?
-    let parse_res = parser::PolicyParser::new().parse(&mut errors, policy);
+    let parse_res = parser.parse(&mut errors, policy);
     // errors is a vec of ErrorRecovery.  ErrorRecovery is a struct wrapping a ParseError
     // and a sequence of discarded characters.  We don't need those characters, so we just
     // remove the wrapping.