From 8d9678a62c7f910f8f3ef2403e7f07c43265c0e8 Mon Sep 17 00:00:00 2001 From: Daniel Burgener Date: Thu, 28 Sep 2023 16:43:19 -0400 Subject: [PATCH] Only build the parser once The initial implementation assumed that PolicyParser::new() was a relatively cheap function time-wise, and called it once per file. There's no need to rebuild it, and it turns out in lalrpop performance discussions that new() can actually be expensive (and possible performance improvements could move work into new() to speed up the steady state). Regardless of the significance, there's no real cost to pulling new() out to call once per file (although it might complicate the API ever so slightly if we later expose parse_policy() as a public function.) The benchmark run is below. The weird thing is that Stress Functions only has one file, so I would expect this to be a no-op for performance there. But that 14% has been pretty reproducible across multiple runs. I'm not really sure what's going on there. Full system compile time: [6.1514 ms 6.2534 ms 6.3622 ms] change: [-79.353% -77.290% -74.995%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) 5 (5.00%) high mild Benchmarking Stress functions: Warming up for 3.0000 s Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 10.2s, or reduce sample count to 40. Stress functions time: [99.906 ms 102.88 ms 105.94 ms] change: [-19.498% -13.892% -8.4060%] (p = 0.00 < 0.05) Performance has improved. --- src/lib.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c7113720..256a2dc4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -309,6 +309,7 @@ fn compile_machine_policies_internal( fn get_policies(input_files: Vec<&str>) -> Result, CascadeErrors> { let mut errors = CascadeErrors::new(); let mut policies: Vec = Vec::new(); + let parser = parser::PolicyParser::new(); for f in input_files { let policy_str = match std::fs::read_to_string(f) { Ok(s) => s, @@ -317,7 +318,7 @@ fn get_policies(input_files: Vec<&str>) -> Result, CascadeErrors continue; } }; - let p = match parse_policy(&policy_str) { + let p = match parse_policy(&parser, &policy_str) { Ok(p) => p, Err(evec) => { for e in evec { @@ -332,13 +333,13 @@ fn get_policies(input_files: Vec<&str>) -> Result, CascadeErrors errors.into_result(policies) } -fn parse_policy( - policy: &str, -) -> Result, Vec>> { +fn parse_policy<'a>( + parser: &parser::PolicyParser, + policy: &'a str, +) -> Result, Vec, ParseErrorMsg>>> +{ let mut errors = Vec::new(); - // TODO: Probably should only construct once - // Why though? - let parse_res = parser::PolicyParser::new().parse(&mut errors, policy); + let parse_res = parser.parse(&mut errors, policy); // errors is a vec of ErrorRecovery. ErrorRecovery is a struct wrapping a ParseError // and a sequence of discarded characters. We don't need those characters, so we just // remove the wrapping.