diff --git a/src/bin/sccache-dist/main.rs b/src/bin/sccache-dist/main.rs index ea702b71e..9c116568b 100644 --- a/src/bin/sccache-dist/main.rs +++ b/src/bin/sccache-dist/main.rs @@ -241,6 +241,7 @@ fn run(command: Command) -> Result { scheduler_url, scheduler_auth, toolchain_cache_size, + max_per_core_load, num_cpus_to_ignore, }) => { let bind_addr = bind_addr.unwrap_or(public_addr); @@ -307,6 +308,7 @@ fn run(command: Command) -> Result { bind_addr, scheduler_url.to_url(), scheduler_auth, + max_per_core_load, num_cpus_to_ignore, server, ) @@ -605,8 +607,7 @@ impl SchedulerIncoming for Scheduler { }) // Sort servers by least load and oldest error .sorted_by(|(_, details_a, load_a), (_, details_b, load_b)| { - let (penalty_a, penalty_b) = match (details_a.last_error, details_b.last_error) - { + let (score_a, score_b) = match (details_a.last_error, details_b.last_error) { // If neither server has a recent error, prefer the one with lowest load (None, None) => (*load_a, *load_b), // Prefer servers with no recent errors over servers with recent errors @@ -618,7 +619,7 @@ impl SchedulerIncoming for Scheduler { score_server(load_b, now - err_b), ), }; - penalty_a.total_cmp(&penalty_b) + score_a.total_cmp(&score_b) }) .find_or_first(|_| true) .map(|(server_id, _, _)| *server_id) diff --git a/src/config.rs b/src/config.rs index 722316425..fc6d9d4d0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1190,6 +1190,14 @@ pub mod server { TEN_GIGS } + pub fn default_max_per_core_load() -> f64 { + std::env::var("SCCACHE_DIST_MAX_PER_CORE_LOAD") + .ok() + .and_then(|s| s.parse().ok()) + // Default to 1 to match the server's default thread pool multiple + .unwrap_or(1f64) + } + fn default_num_cpus_to_ignore() -> usize { std::env::var("SCCACHE_DIST_NUM_CPUS_TO_IGNORE") .ok() @@ -1268,6 +1276,7 @@ pub mod server { pub scheduler_url: HTTPUrl, pub scheduler_auth: SchedulerAuth, pub toolchain_cache_size: u64, + pub max_per_core_load: f64, pub num_cpus_to_ignore: usize, } @@ -1283,6 +1292,7 @@ pub mod server { ), scheduler_auth: SchedulerAuth::Insecure, toolchain_cache_size: default_toolchain_cache_size(), + max_per_core_load: default_max_per_core_load(), num_cpus_to_ignore: default_num_cpus_to_ignore(), } } diff --git a/src/dist/http.rs b/src/dist/http.rs index bd5125416..5095cef14 100644 --- a/src/dist/http.rs +++ b/src/dist/http.rs @@ -928,6 +928,7 @@ mod server { jwt_key: Vec, // Randomly generated nonce to allow the scheduler to detect server restarts server_nonce: ServerNonce, + max_per_core_load: f64, num_cpus_to_ignore: usize, handler: S, } @@ -938,6 +939,7 @@ mod server { bind_addr: SocketAddr, scheduler_url: reqwest::Url, scheduler_auth: String, + max_per_core_load: f64, num_cpus_to_ignore: usize, handler: S, ) -> Result { @@ -958,6 +960,7 @@ mod server { privkey_pem, jwt_key, server_nonce, + max_per_core_load, num_cpus_to_ignore, handler, }) @@ -974,6 +977,7 @@ mod server { privkey_pem, jwt_key, server_nonce, + max_per_core_load, num_cpus_to_ignore, handler, } = self; @@ -1099,7 +1103,7 @@ mod server { // This limit is rouille's default for `start_server_with_pool`, which // we would use, except that interface doesn't permit any sort of // error handling to be done. - let server = server.pool_size(num_cpus); + let server = server.pool_size((num_cpus as f64 * max_per_core_load).floor() as usize); server.run(); panic!("Rouille server terminated") diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index eb689f1a4..fdde68e0a 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -419,6 +419,7 @@ impl DistSystem { server_addr, self.scheduler_url().to_url(), token, + 1f64, 0, handler, )