diff --git a/Cargo.lock b/Cargo.lock index 11db30392..c28f39738 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2577,6 +2577,7 @@ dependencies = [ "serde_json", "serial_test", "sha2", + "shlex", "strip-ansi-escapes", "syslog", "tar", @@ -2777,6 +2778,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index ce3426c05..9bbacfba0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,7 @@ hyper-util = { version = "0.1.3", optional = true, features = [ "server", ] } is-terminal = "0.4.12" +itertools = "0.12" jobserver = "0.1" jwt = { package = "jsonwebtoken", version = "9", optional = true } libc = "0.2.153" @@ -118,6 +119,7 @@ object = "0.32" rouille = { version = "3.6", optional = true, default-features = false, features = [ "ssl", ] } +shlex = "1.3.0" syslog = { version = "6", optional = true } version-compare = { version = "0.1.1", optional = true } diff --git a/src/bin/sccache-dist/build.rs b/src/bin/sccache-dist/build.rs index 815674661..33f9f6d3c 100644 --- a/src/bin/sccache-dist/build.rs +++ b/src/bin/sccache-dist/build.rs @@ -231,7 +231,7 @@ impl OverlayBuilder { for (tc, _) in entries { warn!("Removing old un-compressed toolchain: {:?}", tc); assert!(toolchain_dir_map.remove(tc).is_some()); - fs::remove_dir_all(&self.dir.join("toolchains").join(&tc.archive_id)) + fs::remove_dir_all(self.dir.join("toolchains").join(&tc.archive_id)) .context("Failed to remove old toolchain directory")?; } } diff --git a/src/compiler/c.rs b/src/compiler/c.rs index f3bc696e3..69fe3e923 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -153,8 +153,12 @@ pub enum CCompilerKind { Diab, /// Microsoft Visual C++ Msvc, - /// NVIDIA cuda compiler + /// NVIDIA CUDA compiler Nvcc, + /// NVIDIA CUDA optimizer and PTX generator + Cicc, + /// NVIDIA CUDA PTX assembler + Ptxas, /// NVIDIA hpc c, c++ compiler Nvhpc, /// Tasking VX @@ -1160,6 +1164,7 @@ impl Compilation for CCompilation ref env_vars, .. } = *self; + compiler.generate_compile_commands( path_transformer, executable, @@ -1379,6 +1384,10 @@ impl pkg::ToolchainPackager for CToolchainPackager { add_named_file(&mut package_builder, "liblto_plugin.so")?; } + CCompilerKind::Cicc => {} + + CCompilerKind::Ptxas => {} + CCompilerKind::Nvcc => { // Various programs called by the nvcc front end. // presumes the underlying host compiler is consistent diff --git a/src/compiler/cicc.rs b/src/compiler/cicc.rs new file mode 100644 index 000000000..f38f4fe05 --- /dev/null +++ b/src/compiler/cicc.rs @@ -0,0 +1,314 @@ +// Copyright 2016 Mozilla Foundation +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(unused_imports, dead_code, unused_variables)] + +use crate::compiler::args::*; +use crate::compiler::c::{ArtifactDescriptor, CCompilerImpl, CCompilerKind, ParsedArguments}; +use crate::compiler::{ + CCompileCommand, Cacheable, ColorMode, CompileCommand, CompilerArguments, Language, + SingleCompileCommand, +}; +use crate::{counted_array, dist}; + +use crate::mock_command::{CommandCreator, CommandCreatorSync, RunCommand}; + +use async_trait::async_trait; + +use std::collections::HashMap; +use std::ffi::OsString; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process; + +use crate::errors::*; + +/// A unit struct on which to implement `CCompilerImpl`. +#[derive(Clone, Debug)] +pub struct Cicc { + pub version: Option, +} + +#[async_trait] +impl CCompilerImpl for Cicc { + fn kind(&self) -> CCompilerKind { + CCompilerKind::Cicc + } + fn plusplus(&self) -> bool { + true + } + fn version(&self) -> Option { + self.version.clone() + } + fn parse_arguments( + &self, + arguments: &[OsString], + cwd: &Path, + ) -> CompilerArguments { + parse_arguments(arguments, cwd, Language::Ptx, &ARGS[..]) + } + #[allow(clippy::too_many_arguments)] + async fn preprocess( + &self, + _creator: &T, + _executable: &Path, + parsed_args: &ParsedArguments, + cwd: &Path, + _env_vars: &[(OsString, OsString)], + _may_dist: bool, + _rewrite_includes_only: bool, + _preprocessor_cache_mode: bool, + ) -> Result + where + T: CommandCreatorSync, + { + preprocess(cwd, parsed_args).await + } + fn generate_compile_commands( + &self, + path_transformer: &mut dist::PathTransformer, + executable: &Path, + parsed_args: &ParsedArguments, + cwd: &Path, + env_vars: &[(OsString, OsString)], + _rewrite_includes_only: bool, + ) -> Result<( + Box>, + Option, + Cacheable, + )> + where + T: CommandCreatorSync, + { + generate_compile_commands(path_transformer, executable, parsed_args, cwd, env_vars).map( + |(command, dist_command, cacheable)| { + (CCompileCommand::new(command), dist_command, cacheable) + }, + ) + } +} + +pub fn parse_arguments( + arguments: &[OsString], + cwd: &Path, + language: Language, + arg_info: S, +) -> CompilerArguments +where + S: SearchableArgInfo, +{ + let mut args = arguments.to_vec(); + let input_loc = arguments.len() - 3; + let input = args.splice(input_loc..input_loc + 1, []).next().unwrap(); + + let mut take_next = false; + let mut extra_inputs = vec![]; + let mut outputs = HashMap::new(); + + let mut common_args = vec![]; + let mut unhashed_args = vec![]; + + for arg in ArgsIter::new(args.iter().cloned(), arg_info) { + match arg { + Ok(arg) => { + let args = match arg.get_data() { + Some(PassThrough(_)) => { + take_next = false; + &mut common_args + } + Some(Output(o)) => { + take_next = false; + let path = cwd.join(o); + outputs.insert( + "obj", + ArtifactDescriptor { + path, + optional: false, + }, + ); + continue; + } + Some(UnhashedInput(o)) => { + take_next = false; + let path = cwd.join(o); + if !path.exists() { + continue; + } + extra_inputs.push(path); + &mut unhashed_args + } + Some(UnhashedOutput(o)) => { + take_next = false; + let path = cwd.join(o); + if let Some(flag) = arg.flag_str() { + outputs.insert( + flag, + ArtifactDescriptor { + path, + optional: false, + }, + ); + } + &mut unhashed_args + } + Some(UnhashedFlag) | Some(Unhashed(_)) => { + take_next = false; + &mut unhashed_args + } + None => match arg { + Argument::Raw(ref p) => { + if take_next { + take_next = false; + &mut common_args + } else { + continue; + } + } + Argument::UnknownFlag(ref p) => { + let s = p.to_string_lossy(); + take_next = s.starts_with('-'); + &mut common_args + } + _ => unreachable!(), + }, + }; + args.extend(arg.iter_os_strings()); + } + _ => continue, + }; + } + + CompilerArguments::Ok(ParsedArguments { + input: input.into(), + outputs, + double_dash_input: false, + language, + compilation_flag: OsString::new(), + depfile: None, + dependency_args: vec![], + preprocessor_args: vec![], + common_args, + arch_args: vec![], + unhashed_args, + extra_dist_files: extra_inputs, + extra_hash_files: vec![], + msvc_show_includes: false, + profile_generate: false, + color_mode: ColorMode::Off, + suppress_rewrite_includes_only: false, + too_hard_for_preprocessor_cache_mode: None, + }) +} + +pub async fn preprocess(cwd: &Path, parsed_args: &ParsedArguments) -> Result { + // cicc and ptxas expect input to be an absolute path + let input = if parsed_args.input.is_absolute() { + parsed_args.input.clone() + } else { + cwd.join(&parsed_args.input) + }; + std::fs::read(input) + .map_err(anyhow::Error::new) + .map(|s| process::Output { + status: process::ExitStatus::default(), + stdout: s, + stderr: vec![], + }) +} + +pub fn generate_compile_commands( + path_transformer: &mut dist::PathTransformer, + executable: &Path, + parsed_args: &ParsedArguments, + cwd: &Path, + env_vars: &[(OsString, OsString)], +) -> Result<( + SingleCompileCommand, + Option, + Cacheable, +)> { + // Unused arguments + #[cfg(not(feature = "dist-client"))] + { + let _ = path_transformer; + } + + trace!("compile"); + + let lang_str = &parsed_args.language.as_str(); + let out_file = match parsed_args.outputs.get("obj") { + Some(obj) => &obj.path, + None => return Err(anyhow!("Missing {:?} file output", lang_str)), + }; + + let mut arguments: Vec = vec![]; + arguments.extend_from_slice(&parsed_args.common_args); + arguments.extend_from_slice(&parsed_args.unhashed_args); + arguments.extend(vec![ + (&parsed_args.input).into(), + "-o".into(), + out_file.into(), + ]); + + let command = SingleCompileCommand { + executable: executable.to_owned(), + arguments, + env_vars: env_vars.to_owned(), + cwd: cwd.to_owned(), + }; + + #[cfg(not(feature = "dist-client"))] + let dist_command = None; + #[cfg(feature = "dist-client")] + let dist_command = (|| { + let mut arguments: Vec = vec![]; + arguments.extend(dist::osstrings_to_strings(&parsed_args.common_args)?); + arguments.extend(dist::osstrings_to_strings(&parsed_args.unhashed_args)?); + arguments.extend(vec![ + path_transformer.as_dist(&parsed_args.input)?, + "-o".into(), + path_transformer.as_dist(out_file)?, + ]); + Some(dist::CompileCommand { + executable: path_transformer.as_dist(executable.canonicalize().unwrap().as_path())?, + arguments, + env_vars: dist::osstring_tuples_to_strings(env_vars)?, + cwd: path_transformer.as_dist_abs(cwd)?, + }) + })(); + + Ok((command, dist_command, Cacheable::Yes)) +} + +ArgData! { pub + Output(PathBuf), + UnhashedInput(PathBuf), + UnhashedOutput(PathBuf), + UnhashedFlag, + PassThrough(OsString), + Unhashed(OsString), +} + +use self::ArgData::*; + +counted_array!(pub static ARGS: [ArgInfo; _] = [ + take_arg!("--gen_c_file_name", PathBuf, Separated, UnhashedOutput), + take_arg!("--gen_device_file_name", PathBuf, Separated, UnhashedOutput), + flag!("--gen_module_id_file", UnhashedFlag), + take_arg!("--include_file_name", OsString, Separated, PassThrough), + take_arg!("--module_id_file_name", PathBuf, Separated, UnhashedInput), + take_arg!("--stub_file_name", PathBuf, Separated, UnhashedOutput), + take_arg!("-o", PathBuf, Separated, Output), +]); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 1d4712c97..e06f89a2b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -16,6 +16,7 @@ use crate::cache::{Cache, CacheWrite, DecompressionFailure, FileObjectSource, Storage}; use crate::compiler::args::*; use crate::compiler::c::{CCompiler, CCompilerKind}; +use crate::compiler::cicc::Cicc; use crate::compiler::clang::Clang; use crate::compiler::diab::Diab; use crate::compiler::gcc::Gcc; @@ -24,6 +25,7 @@ use crate::compiler::msvc::Msvc; use crate::compiler::nvcc::Nvcc; use crate::compiler::nvcc::NvccHostCompiler; use crate::compiler::nvhpc::Nvhpc; +use crate::compiler::ptxas::Ptxas; use crate::compiler::rust::{Rust, RustupProxy}; use crate::compiler::tasking_vx::TaskingVX; #[cfg(feature = "dist-client")] @@ -219,6 +221,8 @@ pub enum Language { ObjectiveC, ObjectiveCxx, Cuda, + Ptx, + Cubin, Rust, Hip, } @@ -241,6 +245,8 @@ impl Language { Some("M") | Some("mm") => Some(Language::ObjectiveCxx), // TODO mii Some("cu") => Some(Language::Cuda), + Some("ptx") => Some(Language::Ptx), + Some("cubin") => Some(Language::Cubin), // TODO cy Some("rs") => Some(Language::Rust), Some("hip") => Some(Language::Hip), @@ -259,6 +265,8 @@ impl Language { Language::ObjectiveC => "objc", Language::ObjectiveCxx => "objc++", Language::Cuda => "cuda", + Language::Ptx => "cuda", + Language::Cubin => "cuda", Language::Rust => "rust", Language::Hip => "hip", } @@ -276,6 +284,8 @@ impl CompilerKind { | Language::ObjectiveC | Language::ObjectiveCxx => "C/C++", Language::Cuda => "CUDA", + Language::Ptx => "CUDA", + Language::Cubin => "CUDA", Language::Rust => "Rust", Language::Hip => "HIP", } @@ -288,8 +298,10 @@ impl CompilerKind { CompilerKind::C(CCompilerKind::Diab) => textual_lang + " [diab]", CompilerKind::C(CCompilerKind::Gcc) => textual_lang + " [gcc]", CompilerKind::C(CCompilerKind::Msvc) => textual_lang + " [msvc]", - CompilerKind::C(CCompilerKind::Nvhpc) => textual_lang + " [nvhpc]", CompilerKind::C(CCompilerKind::Nvcc) => textual_lang + " [nvcc]", + CompilerKind::C(CCompilerKind::Cicc) => textual_lang + " [nvcc]", + CompilerKind::C(CCompilerKind::Ptxas) => textual_lang + " [nvcc]", + CompilerKind::C(CCompilerKind::Nvhpc) => textual_lang + " [nvhpc]", CompilerKind::C(CCompilerKind::TaskingVX) => textual_lang + " [taskingvx]", CompilerKind::Rust => textual_lang, } @@ -670,7 +682,7 @@ where .generate_compile_commands(&mut path_transformer, rewrite_includes_only) .context("Failed to generate compile commands")?; - let dist_client = match dist_client { + let dist_client = match dist_compile_cmd.clone().and(dist_client) { Some(dc) => dc, None => { debug!("[{}]: Compiling locally", out_pretty); @@ -1105,6 +1117,28 @@ fn is_rustc_like>(p: P) -> bool { ) } +/// Returns true if the given path looks like cicc +fn is_nvidia_cicc>(p: P) -> bool { + matches!( + p.as_ref() + .file_stem() + .map(|s| s.to_string_lossy().to_lowercase()) + .as_deref(), + Some("cicc") + ) +} + +/// Returns true if the given path looks like ptxas +fn is_nvidia_ptxas>(p: P) -> bool { + matches!( + p.as_ref() + .file_stem() + .map(|s| s.to_string_lossy().to_lowercase()) + .as_deref(), + Some("ptxas") + ) +} + /// Returns true if the given path looks like a c compiler program /// /// This does not check c compilers, it only report programs that are definitely not rustc @@ -1166,6 +1200,30 @@ where let rustc_executable = if let Some(ref rustc_executable) = maybe_rustc_executable { rustc_executable + } else if is_nvidia_cicc(executable) { + debug!("Found cicc"); + return CCompiler::new( + Cicc { + // TODO: Use nvcc --version + version: Some(String::new()), + }, + executable.to_owned(), + &pool, + ) + .await + .map(|c| (Box::new(c) as Box>, None)); + } else if is_nvidia_ptxas(executable) { + debug!("Found ptxas"); + return CCompiler::new( + Ptxas { + // TODO: Use nvcc --version + version: Some(String::new()), + }, + executable.to_owned(), + &pool, + ) + .await + .map(|c| (Box::new(c) as Box>, None)); } else if is_known_c_compiler(executable) { let cc = detect_c_compiler(creator, executable, args, env.to_vec(), pool).await; return cc.map(|c| (c, None)); @@ -2262,7 +2320,6 @@ LLVM version: 6.0", 0, COMPILER_STDOUT.to_owned(), COMPILER_STDERR.to_owned(), - )); ); let service = server::SccacheService::mock_with_dist_client( dist_client.clone(), diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs index f6732df26..3a59470f3 100644 --- a/src/compiler/gcc.rs +++ b/src/compiler/gcc.rs @@ -686,7 +686,7 @@ where }) } -fn language_to_gcc_arg(lang: Language) -> Option<&'static str> { +pub fn language_to_gcc_arg(lang: Language) -> Option<&'static str> { match lang { Language::C => Some("c"), Language::CHeader => Some("c-header"), @@ -695,6 +695,8 @@ fn language_to_gcc_arg(lang: Language) -> Option<&'static str> { Language::ObjectiveC => Some("objective-c"), Language::ObjectiveCxx => Some("objective-c++"), Language::Cuda => Some("cu"), + Language::Ptx => None, + Language::Cubin => None, Language::Rust => None, // Let the compiler decide Language::Hip => Some("hip"), Language::GenericHeader => None, // Let the compiler decide @@ -747,7 +749,6 @@ fn preprocess_cmd( // Explicitly rewrite the -arch args to be preprocessor defines of the form // __arch__ so that they affect the preprocessor output but don't cause // clang to error. - debug!("arch args before rewrite: {:?}", parsed_args.arch_args); let rewritten_arch_args = parsed_args .arch_args .iter() @@ -765,6 +766,9 @@ fn preprocess_cmd( if unique_rewritten.len() <= 1 { // don't use rewritten arch args if there is only one arch arch_args_to_use = &parsed_args.arch_args; + } else { + debug!("-arch args before rewrite: {:?}", parsed_args.arch_args); + debug!("-arch args after rewrite: {:?}", arch_args_to_use); } cmd.args(&parsed_args.preprocessor_args) @@ -778,7 +782,6 @@ fn preprocess_cmd( .env_clear() .envs(env_vars.to_vec()) .current_dir(cwd); - debug!("cmd after -arch rewrite: {:?}", cmd); } #[allow(clippy::too_many_arguments)] diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 9c1fc471d..051ccc2c8 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -15,6 +15,7 @@ #[macro_use] mod args; mod c; +mod cicc; mod clang; #[macro_use] #[allow(clippy::module_inception)] @@ -25,6 +26,7 @@ mod msvc; mod nvcc; mod nvhpc; mod preprocessor_cache; +mod ptxas; mod rust; mod tasking_vx; #[macro_use] diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs index 91f5c0392..b9100f4ae 100644 --- a/src/compiler/nvcc.rs +++ b/src/compiler/nvcc.rs @@ -19,20 +19,28 @@ use crate::compiler::args::*; use crate::compiler::c::{ArtifactDescriptor, CCompilerImpl, CCompilerKind, ParsedArguments}; use crate::compiler::gcc::ArgData::*; use crate::compiler::{ - gcc, write_temp_file, Cacheable, CompileCommand, CompilerArguments, Language, + self, gcc, get_compiler_info, write_temp_file, CCompileCommand, Cacheable, CompileCommand, + CompileCommandImpl, CompilerArguments, Language, +}; +use crate::mock_command::{ + exit_status, CommandChild, CommandCreator, CommandCreatorSync, ExitStatusValue, RunCommand, }; -use crate::mock_command::{CommandCreator, CommandCreatorSync, RunCommand}; use crate::util::{run_input_output, OsStrExt}; -use crate::{counted_array, dist}; +use crate::{counted_array, dist, protocol, server}; use async_trait::async_trait; use fs::File; use fs_err as fs; +use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt}; +use itertools::Itertools; use log::Level::Trace; -use std::ffi::OsString; -use std::future::Future; -use std::io::{self, Write}; +use regex::Regex; +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::future::{Future, IntoFuture}; +use std::io::{self, BufRead, Read, Write}; use std::path::{Path, PathBuf}; use std::process; +use which::which_in; use crate::errors::*; @@ -66,8 +74,28 @@ impl CCompilerImpl for Nvcc { arguments: &[OsString], cwd: &Path, ) -> CompilerArguments { + let mut arguments = arguments.to_vec(); + + if let Ok(flags) = std::env::var("NVCC_PREPEND_FLAGS") { + arguments = shlex::split(&flags) + .unwrap_or_default() + .iter() + .map(|s| s.clone().into_arg_os_string()) + .chain(arguments.iter().cloned()) + .collect::>(); + } + + if let Ok(flags) = std::env::var("NVCC_APPEND_FLAGS") { + arguments.extend( + shlex::split(&flags) + .unwrap_or_default() + .iter() + .map(|s| s.clone().into_arg_os_string()), + ); + } + let parsed_args = gcc::parse_arguments( - arguments, + &arguments, cwd, (&gcc::ARGS[..], &ARGS[..]), false, @@ -75,13 +103,27 @@ impl CCompilerImpl for Nvcc { ); match parsed_args { - CompilerArguments::Ok(pargs) => { - if pargs.compilation_flag != "-c" { - let mut new_args = pargs.clone(); - new_args.common_args.push(pargs.compilation_flag); - return CompilerArguments::Ok(new_args); + CompilerArguments::Ok(mut parsed_args) => { + match parsed_args.compilation_flag.to_str() { + Some("") => { /* no compile flag is valid */ } + Some(flag) => { + // Add the compilation flag to `parsed_args.common_args` so + // it's considered when computing the hash. + // + // Consider the following cases: + // $ sccache nvcc x.cu -o x.bin + // $ sccache nvcc x.cu -o x.cu.o -c + // $ sccache nvcc x.cu -o x.ptx -ptx + // $ sccache nvcc x.cu -o x.cubin -cubin + // + // The preprocessor output for all four are identical, so + // without including the compilation flag in the hasher's + // inputs, the same hash would be generated for all four. + parsed_args.common_args.push(flag.into()); + } + _ => unreachable!(), } - CompilerArguments::Ok(pargs) + CompilerArguments::Ok(parsed_args) } CompilerArguments::CannotCache(_, _) | CompilerArguments::NotCompilation => parsed_args, } @@ -102,6 +144,12 @@ impl CCompilerImpl for Nvcc { where T: CommandCreatorSync, { + let env_vars = env_vars + .iter() + .filter(|(k, _)| k != "NVCC_PREPEND_FLAGS" && k != "NVCC_APPEND_FLAGS") + .cloned() + .collect::>(); + let language = match parsed_args.language { Language::C => Ok("c"), Language::Cxx => Ok("c++"), @@ -113,84 +161,69 @@ impl CCompilerImpl for Nvcc { let initialize_cmd_and_args = || { let mut command = creator.clone().new_command_sync(executable); - command.args(&parsed_args.preprocessor_args); - command.args(&parsed_args.common_args); - //We need to add "-rdc=true" if we are compiling with `-dc` - //So that the preprocessor has the correct implicit defines - if parsed_args.compilation_flag == "-dc" { - command.arg("-rdc=true"); - } - command.arg("-x").arg(language).arg(&parsed_args.input); - command - }; - - let dep_before_preprocessor = || { - //NVCC doesn't support generating both the dependency information - //and the preprocessor output at the same time. So if we have - //need for both we need separate compiler invocations - let mut dep_cmd = initialize_cmd_and_args(); - let mut transformed_deps = vec![]; - for item in parsed_args.dependency_args.iter() { - if item == "-MD" { - transformed_deps.push(OsString::from("-M")); - } else if item == "-MMD" { - transformed_deps.push(OsString::from("-MM")); - } else { - transformed_deps.push(item.clone()); - } - } - dep_cmd - .args(&transformed_deps) + .current_dir(cwd) .env_clear() - .envs(env_vars.to_vec()) - .current_dir(cwd); + .envs(env_vars.clone()) + .args(&parsed_args.preprocessor_args) + .args(&parsed_args.common_args) + .arg("-x") + .arg(language) + .arg(&parsed_args.input); + command + }; + let dependencies_command = || { + // NVCC doesn't support generating both the dependency information + // and the preprocessor output at the same time. So if we have + // need for both, we need separate compiler invocations + let mut dependency_cmd = initialize_cmd_and_args(); + dependency_cmd.args( + &parsed_args + .dependency_args + .iter() + .map(|arg| match arg.to_str().unwrap_or_default() { + "-MD" | "--generate-dependencies-with-compile" => "-M", + "-MMD" | "--generate-nonsystem-dependencies-with-compile" => "-MM", + arg => arg, + }) + // protect against duplicate -M and -MM flags after transform + .unique() + .collect::>(), + ); if log_enabled!(Trace) { - trace!("dep-gen command: {:?}", dep_cmd); + trace!("dependencies command: {:?}", dependency_cmd); } - dep_cmd + dependency_cmd }; - trace!("preprocess"); - let mut cmd = initialize_cmd_and_args(); - - //NVCC only supports `-E` when it comes after preprocessor - //and common flags. - // - // nvc/nvc++ don't support no line numbers to console - // msvc requires the `-EP` flag to output no line numbers to console - // other host compilers are presumed to match `gcc` behavior - let no_line_num_flag = match self.host_compiler { - NvccHostCompiler::Nvhpc => "", - NvccHostCompiler::Msvc => "-Xcompiler=-EP", - NvccHostCompiler::Gcc => "-Xcompiler=-P", + let preprocessor_command = || { + let mut preprocess_cmd = initialize_cmd_and_args(); + // NVCC only supports `-E` when it comes after preprocessor and common flags. + preprocess_cmd.arg("-E"); + preprocess_cmd.arg(match self.host_compiler { + // nvc/nvc++ don't support eliding line numbers + NvccHostCompiler::Nvhpc => "", + // msvc requires the `-EP` flag to elide line numbers + NvccHostCompiler::Msvc => "-Xcompiler=-EP", + // other host compilers are presumed to match `gcc` behavior + NvccHostCompiler::Gcc => "-Xcompiler=-P", + }); + if log_enabled!(Trace) { + trace!("preprocessor command: {:?}", preprocess_cmd); + } + preprocess_cmd }; - cmd.arg("-E") - .arg(no_line_num_flag) - .env_clear() - .envs(env_vars.to_vec()) - .current_dir(cwd); - if log_enabled!(Trace) { - trace!("preprocess: {:?}", cmd); - } - //Need to chain the dependency generation and the preprocessor - //to emulate a `proper` front end + // Chain dependency generation and the preprocessor command to emulate a `proper` front end if !parsed_args.dependency_args.is_empty() { - let first = run_input_output(dep_before_preprocessor(), None); - let second = run_input_output(cmd, None); - // TODO: If we need to chain these to emulate a frontend, shouldn't - // we explicitly wait on the first one before starting the second one? - // (rather than via which drives these concurrently) - let (_f, s) = futures::future::try_join(first, second).await?; - Ok(s) - } else { - run_input_output(cmd, None).await + run_input_output(dependencies_command(), None).await?; } + + run_input_output(preprocessor_command(), None).await } - fn generate_compile_commands( + fn generate_compile_commands( &self, path_transformer: &mut dist::PathTransformer, executable: &Path, @@ -198,16 +231,849 @@ impl CCompilerImpl for Nvcc { cwd: &Path, env_vars: &[(OsString, OsString)], rewrite_includes_only: bool, - ) -> Result<(CompileCommand, Option, Cacheable)> { - gcc::generate_compile_commands( - path_transformer, + ) -> Result<( + Box>, + Option, + Cacheable, + )> + where + T: CommandCreatorSync, + { + generate_compile_commands(parsed_args, executable, cwd, env_vars).map( + |(command, dist_command, cacheable)| { + (CCompileCommand::new(command), dist_command, Cacheable::No) + }, + ) + } +} + +pub fn generate_compile_commands( + parsed_args: &ParsedArguments, + executable: &Path, + cwd: &Path, + env_vars: &[(OsString, OsString)], +) -> Result<(NvccCompileCommand, Option, Cacheable)> { + let mut unhashed_args = parsed_args.unhashed_args.clone(); + + let keep_dir = { + let mut keep = false; + let mut keep_dir = None; + // Remove all occurrences of `-keep` and `-keep-dir`, but save the keep dir for copying to later + loop { + if let Some(idx) = unhashed_args + .iter() + .position(|x| x == "-keep-dir" || x == "--keep-dir") + { + let dir = PathBuf::from(unhashed_args[idx + 1].as_os_str()); + let dir = if dir.is_absolute() { + dir + } else { + cwd.join(dir) + }; + unhashed_args.splice(idx..(idx + 2), []); + keep_dir = Some(dir); + continue; + } else if let Some(idx) = unhashed_args.iter().position(|x| { + x == "-keep" || x == "--keep" || x == "-save-temps" || x == "--save-temps" + }) { + keep = true; + unhashed_args.splice(idx..(idx + 1), []); + if keep_dir.is_none() { + keep_dir = Some(cwd.to_path_buf()) + } + continue; + } + break; + } + // Match nvcc behavior where intermediate files are kept if: + // * Only `-keep` is specified (files copied to cwd) + // * Both `-keep -keep-dir=` are specified (files copied to ) + // nvcc does _not_ keep intermediate files if `-keep-dir=` is specified without `-keep` + keep.then_some(()).and(keep_dir) + }; + + let num_parallel = { + let mut num_parallel = 1; + // Remove all occurrences of `-t=` or `--threads` because it's incompatible with --dryrun + // Prefer the last occurrence of `-t=` or `--threads` to match nvcc behavior + loop { + if let Some(idx) = unhashed_args.iter().position(|x| x.starts_with("-t=")) { + let arg = unhashed_args.get(idx); + if let Some(arg) = arg.and_then(|arg| arg.to_str()) { + if let Ok(arg) = arg[3..arg.len()].parse::() { + num_parallel = arg; + } + } + unhashed_args.splice(idx..(idx + 1), []); + continue; + } + if let Some(idx) = unhashed_args.iter().position(|x| x == "--threads") { + let arg = unhashed_args.get(idx + 1); + if let Some(arg) = arg.and_then(|arg| arg.to_str()) { + if let Ok(arg) = arg.parse::() { + num_parallel = arg; + } + } + unhashed_args.splice(idx..(idx + 2), []); + continue; + } + break; + } + num_parallel + }; + + let env_vars = env_vars + .iter() + .filter(|(k, _)| k != "NVCC_PREPEND_FLAGS" && k != "NVCC_APPEND_FLAGS") + .cloned() + .collect::>(); + + let temp_dir = tempfile::Builder::new() + .prefix("sccache_nvcc") + .tempdir() + .unwrap() + .into_path(); + + let mut arguments = vec![]; + + if let Some(lang) = gcc::language_to_gcc_arg(parsed_args.language) { + arguments.extend(vec!["-x".into(), lang.into()]) + } + + let output = &parsed_args + .outputs + .get("obj") + .context("Missing object file output") + .unwrap() + .path; + + arguments.extend(vec![ + "-o".into(), + // Canonicalize the output path if the compile flag indicates we won't + // produce an object file. Since we run cicc and ptxas in a temp dir, + // but we run the host compiler in `cwd` (the dir from which sccache was + // executed), cicc/ptxas `-o` argument should point at the real out path + // that's potentially relative to `cwd`. + match parsed_args.compilation_flag.to_str() { + Some("-c") | Some("--compile") // compile to object + | Some("-dc") | Some("--device-c") // compile to object with -rdc=true + | Some("-dw") | Some("--device-w") // compile to object with -rdc=false + => output.clone().into(), + _ => { + if output.is_absolute() { + output.clone().into() + } else { + cwd.join(output).into() + } + } + }, + ]); + + arguments.extend_from_slice(&parsed_args.preprocessor_args); + arguments.extend_from_slice(&unhashed_args); + arguments.extend_from_slice(&parsed_args.common_args); + arguments.extend_from_slice(&parsed_args.arch_args); + if parsed_args.double_dash_input { + arguments.push("--".into()); + } + + // Canonicalize here so the absolute path to the input is in the + // preprocessor output instead of paths relative to `cwd`. + // + // Since cicc's input is the post-processed source run through cudafe++'s + // transforms, its cache key is sensitive to the preprocessor output. The + // preprocessor embeds the name of the input file in comments, so without + // canonicalizing here, cicc will get cache misses on otherwise identical + // input that should produce a cache hit. + arguments.push( + (if parsed_args.input.is_absolute() { + parsed_args.input.clone() + } else { + cwd.join(&parsed_args.input).canonicalize().unwrap() + }) + .into(), + ); + + let command = NvccCompileCommand { + temp_dir, + keep_dir, + num_parallel, + executable: executable.to_owned(), + arguments, + env_vars, + cwd: cwd.to_owned(), + }; + + Ok((command, None, Cacheable::Yes)) +} + +#[derive(Clone, Debug)] +pub struct NvccCompileCommand { + pub temp_dir: PathBuf, + pub keep_dir: Option, + pub num_parallel: usize, + pub executable: PathBuf, + pub arguments: Vec, + pub env_vars: Vec<(OsString, OsString)>, + pub cwd: PathBuf, +} + +#[async_trait] +impl CompileCommandImpl for NvccCompileCommand { + fn get_executable(&self) -> PathBuf { + self.executable.clone() + } + fn get_arguments(&self) -> Vec { + self.arguments.clone() + } + fn get_env_vars(&self) -> Vec<(OsString, OsString)> { + self.env_vars.clone() + } + fn get_cwd(&self) -> PathBuf { + self.cwd.clone() + } + + async fn execute( + &self, + service: &server::SccacheService, + creator: &T, + ) -> Result + where + T: CommandCreatorSync, + { + let NvccCompileCommand { + temp_dir, + keep_dir, + num_parallel, executable, - parsed_args, + arguments, + env_vars, + cwd, + } = self; + + let nvcc_subcommand_groups = group_nvcc_subcommands_by_compilation_stage( + creator, + executable, + arguments, cwd, + temp_dir.as_path(), + keep_dir.clone(), env_vars, - self.kind(), - rewrite_includes_only, ) + .await?; + + let maybe_keep_temps_then_clean = || { + // If the caller passed `-keep` or `-keep-dir`, copy the + // temp files to the requested location. We do this because we + // override `-keep` and `-keep-dir` in our `nvcc --dryrun` call. + let maybe_keep_temps = keep_dir.as_ref().and_then(|dst| { + fs::create_dir_all(dst) + .and_then(|_| fs::read_dir(temp_dir)) + .and_then(|files| { + files + .filter_map(|path| path.ok()) + .filter_map(|path| { + path.file_name() + .to_str() + .map(|file| (path.path(), file.to_owned())) + }) + .try_fold((), |res, (path, file)| fs::rename(path, dst.join(file))) + }) + .ok() + }); + + maybe_keep_temps + .map_or_else( + || fs::remove_dir_all(temp_dir).ok(), + |_| fs::remove_dir_all(temp_dir).ok(), + ) + .unwrap_or(()); + }; + + let mut output = process::Output { + status: process::ExitStatus::default(), + stdout: vec![], + stderr: vec![], + }; + + let n = nvcc_subcommand_groups.len(); + let cuda_front_end_range = if n < 1 { 0..0 } else { 0..1 }; + let device_compile_range = if n < 2 { 0..0 } else { 1..n - 1 }; + let final_assembly_range = if n < 3 { 0..0 } else { n - 1..n }; + + let num_parallel = device_compile_range.len().min(*num_parallel).max(1); + + for command_group_chunks in [ + nvcc_subcommand_groups[cuda_front_end_range].chunks(1), + // compile multiple device architectures in parallel when `nvcc -t=N` is specified + nvcc_subcommand_groups[device_compile_range].chunks(num_parallel), + nvcc_subcommand_groups[final_assembly_range].chunks(1), + ] { + for command_groups in command_group_chunks { + let results = + futures::future::join_all(command_groups.iter().map(|commands| { + run_nvcc_subcommands_group(service, creator, cwd, commands) + })) + .await; + + for result in results { + output = aggregate_output(output, result.unwrap_or_else(error_to_output)); + } + + if output + .status + .code() + .and_then(|c| (c != 0).then_some(c)) + .is_some() + { + output.stdout.shrink_to_fit(); + output.stderr.shrink_to_fit(); + maybe_keep_temps_then_clean(); + return Err(ProcessError(output).into()); + } + } + } + + output.stdout.shrink_to_fit(); + output.stderr.shrink_to_fit(); + maybe_keep_temps_then_clean(); + Ok(output) + } +} + +#[derive(Clone, Debug)] +pub struct NvccGeneratedSubcommand { + pub exe: PathBuf, + pub args: Vec, + pub cwd: PathBuf, + pub env_vars: Vec<(OsString, OsString)>, + pub cacheable: Cacheable, +} + +async fn group_nvcc_subcommands_by_compilation_stage( + creator: &T, + executable: &Path, + arguments: &[OsString], + cwd: &Path, + tmp: &Path, + keep_dir: Option, + env_vars: &[(OsString, OsString)], +) -> Result>> +where + T: CommandCreatorSync, +{ + // Run `nvcc --dryrun` twice to ensure the commands are correct + // relative to the directory where they're run. + // + // All the "nvcc" commands (cudafe++, cicc, ptxas, nvlink, fatbinary) + // are run in the temp dir, so their arguments should be relative to + // the temp dir, e.g. `cudafe++ [...] "x.cpp4.ii"` + // + // All the host compiler invocations are run in the original `cwd` where + // sccache was invoked. Arguments will be relative to the cwd, except + // any arguments that reference nvcc-generated files should be absolute + // to the temp dir, e.g. `gcc -E [...] x.cu -o /tmp/dir/x.cpp4.ii` + + // Roughly equivalent to: + // ```shell + // cat <(nvcc --dryrun --keep \ + // | nl -n ln -s ' ' -w 1 \ + // | grep -P "^[0-9]+ (cicc|ptxas|cudafe|nvlink|fatbinary)") \ + // \ + // <(nvcc --dryrun --keep --keep-dir /tmp/dir \ + // | nl -n ln -s ' ' -w 1 \ + // | grep -P -v "^[0-9]+ (cicc|ptxas|cudafe|nvlink|fatbinary)") \ + // \ + // | sort -k 1n + // ``` + + let mut env_vars_1 = env_vars.to_vec(); + let mut env_vars_2 = env_vars.to_vec(); + + let is_nvcc_exe = + |exe: &str| matches!(exe, "cicc" | "ptxas" | "cudafe++" | "nvlink" | "fatbinary"); + + let (nvcc_commands, host_commands) = futures::future::try_join( + // Get the nvcc compile command lines with paths relative to `tmp` + select_nvcc_subcommands( + creator, + executable, + cwd, + &mut env_vars_1, + keep_dir.is_none(), + arguments, + is_nvcc_exe, + ), + // Get the host compile command lines with paths relative to `cwd` and absolute paths to `tmp` + select_nvcc_subcommands( + creator, + executable, + cwd, + &mut env_vars_2, + keep_dir.is_none(), + &[arguments, &["--keep-dir".into(), tmp.into()][..]].concat(), + |exe| !is_nvcc_exe(exe), + ), + ) + .await?; + + drop(env_vars_2); + let env_vars = env_vars_1; + + // Now zip the two lists of commands again by sorting on original line index. + // Transform to tuples that include the dir in which each command should run. + let all_commands = nvcc_commands + .iter() + // Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `tmp` + .map(|(idx, exe, args)| (idx, tmp, exe, args)) + .chain( + host_commands + .iter() + // Run host preprocessing and compilation steps in `cwd` + .map(|(idx, exe, args)| (idx, cwd, exe, args)), + ) + .sorted_by(|a, b| Ord::cmp(&a.0, &b.0)); + + // Create groups of commands that should be run sequential relative to each other, + // but can optionally be run in parallel to other groups if the user requested via + // `nvcc --threads`. + + let mut no_more_groups = false; + let mut command_groups: Vec> = vec![]; + + for (_, dir, exe, args) in all_commands { + if log_enabled!(log::Level::Trace) { + trace!( + "transformed nvcc command: {:?}", + [ + &[exe.to_str().unwrap_or_default().to_string()][..], + &args[..] + ] + .concat() + .join(" ") + ); + } + + let cacheable = match exe.file_name().and_then(|s| s.to_str()) { + // cicc and ptxas are cacheable + Some("cicc") | Some("ptxas") => Cacheable::Yes, + // cudafe++, nvlink, and fatbinary are not cacheable + Some("cudafe++") | Some("nvlink") => Cacheable::No, + Some("fatbinary") => { + // The fatbinary command represents the start of the last group + if !no_more_groups { + command_groups.push(vec![]); + } + no_more_groups = true; + Cacheable::No + } + _ => { + // All generated host compiler commands include `-D__CUDA_ARCH_LIST__=`. + // If this definition isn't present, this command is either a new binary + // in the CTK that we don't know about, or a line like `rm x_dlink.reg.c` + // that nvcc generates in certain cases. + if !args + .iter() + .any(|arg| arg.starts_with("-D__CUDA_ARCH_LIST__")) + { + continue; + } + if args.contains(&"-E".to_owned()) { + // Each preprocessor step represents the start of a new command + // group, unless it comes after a call to fatbinary. + if !no_more_groups { + command_groups.push(vec![]); + } + // Do not run preprocessor calls through sccache + Cacheable::No + } else { + // Cache the host compiler calls, since we've marked the outer `nvcc` call + // as non-cacheable. This ensures `sccache nvcc ...` _always_ decomposes the + // nvcc call into its constituent subcommands with `--dryrun`, but only caches + // the final build product once. + // + // `nvcc --dryrun` is the most reliable way to ensure caching nvcc invocations + // is fully sound, since the host compiler could change in a way we can't detect. + // + // Always calling `nvcc --dryrun` ensures that if this happens, the final build + // product is the result of running the host compiler through sccache, which will + // detect things like compiler/version differences when computing the objects' hash. + Cacheable::Yes + } + } + }; + + // Initialize the first group in case the first command isn't a call to the host preprocessor, + // i.e. `nvcc -o test.o -c test.c` + if command_groups.is_empty() { + command_groups.push(vec![]); + } + + match command_groups.last_mut() { + None => {} + Some(group) => { + group.push(NvccGeneratedSubcommand { + exe: exe.clone(), + args: args.clone(), + cwd: dir.into(), + env_vars: env_vars.clone(), + cacheable, + }); + } + }; + } + + Ok(command_groups) +} + +async fn select_nvcc_subcommands( + creator: &T, + executable: &Path, + cwd: &Path, + env_vars: &mut Vec<(OsString, OsString)>, + remap_filenames: bool, + arguments: &[OsString], + select_subcommand: F, +) -> Result)>> +where + F: Fn(&str) -> bool, + T: CommandCreatorSync, +{ + if log_enabled!(log::Level::Trace) { + trace!( + "nvcc dryrun command: {:?}", + [ + &[executable.to_str().unwrap_or_default().to_string()][..], + &dist::osstrings_to_strings(arguments).unwrap_or_default()[..], + &["--dryrun".into(), "--keep".into()][..] + ] + .concat() + .join(" ") + ); + } + + let mut nvcc_dryrun_cmd = creator.clone().new_command_sync(executable); + + nvcc_dryrun_cmd + .args(&[arguments, &["--dryrun".into(), "--keep".into()][..]].concat()) + .env_clear() + .current_dir(cwd) + .envs(env_vars.to_vec()); + + let nvcc_dryrun_output = run_input_output(nvcc_dryrun_cmd, None).await?; + + let mut ext_counts = HashMap::::new(); + let mut old_to_new = HashMap::::new(); + let is_valid_line_re = Regex::new(r"^#\$ (.*)$").unwrap(); + let is_envvar_line_re = Regex::new(r"^([_A-Z]+)=(.*)$").unwrap(); + + let mut lines = Vec::<(usize, PathBuf, Vec)>::new(); + let reader = std::io::BufReader::new(&nvcc_dryrun_output.stderr[..]); + + for pair in reader.lines().enumerate() { + let (idx, line) = pair; + // Select lines that match the `#$ ` prefix from nvcc --dryrun + let line = select_valid_dryrun_lines(&is_valid_line_re, &line?)?; + + let maybe_exe_and_args = + fold_env_vars_or_split_into_exe_and_args(&is_envvar_line_re, env_vars, cwd, &line)?; + + let (exe, mut args) = match maybe_exe_and_args { + Some(exe_and_args) => exe_and_args, + _ => continue, + }; + + // Remap nvcc's generated file names to deterministic names + if remap_filenames { + args = remap_generated_filenames(&args, &mut old_to_new, &mut ext_counts); + } + + match exe.file_name().and_then(|s| s.to_str()) { + None => continue, + Some(exe_name) => { + if select_subcommand(exe_name) { + lines.push((idx, exe, args)); + } + } + } + } + + Ok(lines) +} + +fn select_valid_dryrun_lines(re: &Regex, line: &str) -> Result { + match re.captures(line) { + Some(caps) => { + let (_, [rest]) = caps.extract(); + Ok(rest.to_string()) + } + _ => Err(anyhow!("nvcc error: {:?}", line)), + } +} + +fn fold_env_vars_or_split_into_exe_and_args( + re: &Regex, + env_vars: &mut Vec<(OsString, OsString)>, + cwd: &Path, + line: &str, +) -> Result)>> { + // Intercept the environment variable lines and add them to the env_vars list + if let Some(var) = re.captures(line) { + let (_, [var, val]) = var.extract(); + + let loc = if let Some(idx) = env_vars.iter().position(|(key, _)| key == var) { + idx..idx + 1 + } else { + env_vars.len()..env_vars.len() + }; + + let mut pair = (var.into(), val.into()); + // Handle the special `_SPACE_= ` line + if val != " " { + pair.1 = val + .trim() + .split(' ') + .map(|x| x.trim_start_matches('\"').trim_end_matches('\"')) + .collect::>() + .join(" ") + .into(); + } + env_vars.splice(loc, [pair]); + return Ok(None); + } + + // The rest of the lines are subcommands, so parse into a vec of [cmd, args..] + + let mut line = line.to_owned(); + + // Expand envvars in nvcc subcommands, i.e. "$CICC_PATH/cicc ..." + if let Some(env_vars) = dist::osstring_tuples_to_strings(env_vars) { + for (key, val) in env_vars { + let var = "$".to_owned() + &key; + line = line.replace(&var, &val); + } + } + + let args = match shlex::split(&line) { + Some(args) => args, + None => return Err(anyhow!("Could not parse shell line")), + }; + + let (exe, args) = match args.split_first() { + Some(exe_and_args) => exe_and_args, + None => return Err(anyhow!("Could not split shell line")), + }; + + let env_path = env_vars + .iter() + .find(|(k, _)| k == "PATH") + .map(|(_, p)| p.to_owned()) + .unwrap(); + + let exe = which_in(exe, env_path.into(), cwd)?; + + Ok(Some((exe.clone(), args.to_vec()))) +} + +fn remap_generated_filenames( + args: &[String], + old_to_new: &mut HashMap, + ext_counts: &mut HashMap, +) -> Vec { + args.iter() + .map(|arg| { + // If the argument doesn't start with `-` and is a file that + // ends in one of the below extensions, rename the file to an + // auto-incrementing stable name + let maybe_extension = (!arg.starts_with('-')) + .then(|| { + [ + ".cpp1.ii", + ".cpp4.ii", + ".cudafe1.c", + ".cudafe1.cpp", + ".cudafe1.stub.c", + ] + .iter() + .find(|ext| arg.ends_with(*ext)) + .copied() + }) + .unwrap_or(None); + + // If the argument is a file that ends in one of the above extensions: + // * If it's our first time seeing this file, create a unique name for it + // * If we've seen this file before, lookup its unique name in the hash map + // + // This ensures stable names are in cudafe++ output and #include directives, + // eliminating one source of false-positive cache misses. + match maybe_extension { + Some(extension) => { + old_to_new + .entry(arg.into()) + .or_insert_with(|| { + // Initialize or update the number of files with a given extension: + // compute_70.cudafe1.stub.c -> 0.cudafe1.stub.c + // compute_60.cudafe1.stub.c -> 1.cudafe1.stub.c + // etc. + let count = ext_counts + .entry(extension.into()) + .and_modify(|c| *c += 1) + .or_insert(0) + .to_string(); + // Return `/tmp/dir/{count}.{ext}` as the new name, i.e. `/tmp/dir/0.cudafe1.stub.c` + PathBuf::from(arg) + .parent() + .unwrap_or(Path::new("")) + .join(count + extension) + .as_os_str() + .to_str() + .unwrap_or("") + .to_owned() + }) + .to_owned() + } + None => { + // If the argument isn't a file name with one of our extensions, + // it may _reference_ files we've renamed. Go through and replace + // all old names with their new stable names. + let mut arg = arg.clone(); + for (old, new) in old_to_new.iter() { + arg = arg.replace(old, new); + } + arg + } + } + }) + .collect::>() +} + +async fn run_nvcc_subcommands_group( + service: &server::SccacheService, + creator: &T, + cwd: &Path, + commands: &[NvccGeneratedSubcommand], +) -> Result +where + T: CommandCreatorSync, +{ + let mut output = process::Output { + status: process::ExitStatus::default(), + stdout: vec![], + stderr: vec![], + }; + + for cmd in commands { + let NvccGeneratedSubcommand { + exe, + args, + cwd, + env_vars, + cacheable, + } = cmd; + + if log_enabled!(log::Level::Trace) { + trace!( + "run_commands_sequential cwd={:?}, cmd={:?}", + cwd, + [ + vec![exe.clone().into_os_string().into_string().unwrap()], + args.iter() + .map(|x| shlex::try_quote(x).unwrap().to_string()) + .collect::>() + ] + .concat() + .join(" ") + ); + } + + let out = match cacheable { + Cacheable::No => { + let mut cmd = creator.clone().new_command_sync(exe); + + cmd.args(args) + .current_dir(cwd) + .env_clear() + .envs(env_vars.to_vec()); + + run_input_output(cmd, None) + .await + .unwrap_or_else(error_to_output) + } + Cacheable::Yes => { + let srvc = service.clone(); + let args = dist::strings_to_osstrings(args); + + match srvc + .compiler_info(exe.clone(), cwd.to_owned(), &args, env_vars) + .await + { + Err(err) => error_to_output(err), + Ok(compiler) => match compiler.parse_arguments(&args, cwd, env_vars) { + CompilerArguments::NotCompilation => Err(anyhow!("Not compilation")), + CompilerArguments::CannotCache(why, extra_info) => Err(extra_info + .map_or_else( + || anyhow!("Cannot cache({}): {:?} {:?}", why, exe, args), + |desc| { + anyhow!("Cannot cache({}, {}): {:?} {:?}", why, desc, exe, args) + }, + )), + CompilerArguments::Ok(hasher) => { + srvc.start_compile_task( + compiler, + hasher, + args, + cwd.to_owned(), + env_vars + .iter() + .chain([("SCCACHE_DIRECT".into(), "false".into())].iter()) + .cloned() + .collect::>(), + ) + .await + } + } + .map_or_else(error_to_output, compile_result_to_output), + } + } + }; + + output = aggregate_output(output, out); + + if output.status.code().unwrap_or(0) != 0 { + break; + } + } + + Ok(output) +} + +fn aggregate_output(lhs: process::Output, rhs: process::Output) -> process::Output { + process::Output { + status: exit_status(std::cmp::max( + lhs.status.code().unwrap_or(0), + rhs.status.code().unwrap_or(0), + ) as ExitStatusValue), + stdout: [lhs.stdout, rhs.stdout].concat(), + stderr: [lhs.stderr, rhs.stderr].concat(), + } +} + +fn error_to_output(err: Error) -> process::Output { + match err.downcast::() { + Ok(ProcessError(out)) => out, + Err(err) => process::Output { + status: exit_status(1 as ExitStatusValue), + stdout: vec![], + stderr: err.to_string().into_bytes(), + }, + } +} + +fn compile_result_to_output(res: protocol::CompileFinished) -> process::Output { + process::Output { + status: exit_status(res.retcode.or(res.signal).unwrap_or(0) as ExitStatusValue), + stdout: res.stdout, + stderr: res.stderr, } } @@ -219,14 +1085,20 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("--compiler-bindir", OsString, CanBeSeparated('='), PassThrough), take_arg!("--compiler-options", OsString, CanBeSeparated('='), PreprocessorArgument), flag!("--cubin", DoCompilation), + flag!("--device-c", DoCompilation), + flag!("--device-w", DoCompilation), flag!("--expt-extended-lambda", PreprocessorArgumentFlag), flag!("--expt-relaxed-constexpr", PreprocessorArgumentFlag), flag!("--extended-lambda", PreprocessorArgumentFlag), flag!("--fatbin", DoCompilation), take_arg!("--generate-code", OsString, CanBeSeparated('='), PassThrough), + flag!("--generate-dependencies-with-compile", NeedDepTarget), + flag!("--generate-nonsystem-dependencies-with-compile", NeedDepTarget), take_arg!("--gpu-architecture", OsString, CanBeSeparated('='), PassThrough), take_arg!("--gpu-code", OsString, CanBeSeparated('='), PassThrough), take_arg!("--include-path", PathBuf, CanBeSeparated('='), PreprocessorArgumentPath), + flag!("--keep", UnhashedFlag), + take_arg!("--keep-dir", OsString, CanBeSeparated('='), Unhashed), take_arg!("--linker-options", OsString, CanBeSeparated('='), PassThrough), take_arg!("--maxrregcount", OsString, CanBeSeparated('='), PassThrough), flag!("--no-host-device-initializer-list", PreprocessorArgumentFlag), @@ -236,6 +1108,7 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ flag!("--ptx", DoCompilation), take_arg!("--ptxas-options", OsString, CanBeSeparated('='), PassThrough), take_arg!("--relocatable-device-code", OsString, CanBeSeparated('='), PreprocessorArgument), + flag!("--save-temps", UnhashedFlag), take_arg!("--system-include", PathBuf, CanBeSeparated('='), PreprocessorArgumentPath), take_arg!("--threads", OsString, CanBeSeparated('='), Unhashed), @@ -250,17 +1123,21 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("-code", OsString, CanBeSeparated('='), PassThrough), flag!("-cubin", DoCompilation), flag!("-dc", DoCompilation), + flag!("-dw", DoCompilation), flag!("-expt-extended-lambda", PreprocessorArgumentFlag), flag!("-expt-relaxed-constexpr", PreprocessorArgumentFlag), flag!("-extended-lambda", PreprocessorArgumentFlag), flag!("-fatbin", DoCompilation), take_arg!("-gencode", OsString, CanBeSeparated('='), PassThrough), take_arg!("-isystem", PathBuf, CanBeSeparated('='), PreprocessorArgumentPath), + flag!("-keep", UnhashedFlag), + take_arg!("-keep-dir", OsString, CanBeSeparated('='), Unhashed), take_arg!("-maxrregcount", OsString, CanBeSeparated('='), PassThrough), flag!("-nohdinitlist", PreprocessorArgumentFlag), flag!("-optix-ir", DoCompilation), flag!("-ptx", DoCompilation), take_arg!("-rdc", OsString, CanBeSeparated('='), PreprocessorArgument), + flag!("-save-temps", UnhashedFlag), take_arg!("-t", OsString, CanBeSeparated('='), Unhashed), take_arg!("-x", OsString, CanBeSeparated('='), Language), ]); @@ -341,7 +1218,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert!(a.common_args.is_empty()); + assert_eq!(ovec!["-c"], a.common_args); } #[test] @@ -360,7 +1237,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert!(a.common_args.is_empty()); + assert_eq!(ovec!["-c"], a.common_args); } #[test] @@ -379,7 +1256,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert!(a.common_args.is_empty()); + assert_eq!(ovec!["-c"], a.common_args); } fn test_parse_arguments_simple_cu_msvc() { @@ -397,7 +1274,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert!(a.common_args.is_empty()); + assert_eq!(ovec!["-c"], a.common_args); } #[test] @@ -416,7 +1293,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert_eq!(ovec!["-ccbin", "gcc"], a.common_args); + assert_eq!(ovec!["-ccbin", "gcc", "-c"], a.common_args); } #[test] @@ -435,7 +1312,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert_eq!(ovec!["-ccbin", "/usr/bin/"], a.common_args); + assert_eq!(ovec!["-ccbin", "/usr/bin/", "-c"], a.common_args); } #[test] @@ -486,7 +1363,7 @@ mod test { ) ); assert!(a.preprocessor_args.is_empty()); - assert!(a.common_args.is_empty()); + assert_eq!(ovec!["-c"], a.common_args); } #[test] @@ -593,7 +1470,7 @@ mod test { a.preprocessor_args ); assert!(a.dependency_args.is_empty()); - assert_eq!(ovec!["-fabc"], a.common_args); + assert_eq!(ovec!["-fabc", "-c"], a.common_args); } #[test] @@ -626,7 +1503,7 @@ mod test { ovec!["-MD", "-MF", "foo.o.d", "-MT", "foo.o"], a.dependency_args ); - assert_eq!(ovec!["-fabc"], a.common_args); + assert_eq!(ovec!["-fabc", "-c"], a.common_args); } #[test] @@ -654,7 +1531,7 @@ mod test { ); assert!(a.preprocessor_args.is_empty()); assert_eq!( - ovec!["--generate-code", "arch=compute_61,code=sm_61"], + ovec!["--generate-code", "arch=compute_61,code=sm_61", "-c"], a.common_args ); } @@ -706,7 +1583,8 @@ mod test { "-Xnvlink", "--suppress-stack-size-warning", "-Xcudafe", - "--display_error_number" + "--display_error_number", + "-c" ], a.common_args ); @@ -743,7 +1621,7 @@ mod test { a.preprocessor_args ); assert_eq!( - ovec!["-forward-unknown-to-host-compiler", "-std=c++14"], + ovec!["-forward-unknown-to-host-compiler", "-std=c++14", "-c"], a.common_args ); } diff --git a/src/compiler/ptxas.rs b/src/compiler/ptxas.rs new file mode 100644 index 000000000..3bbcd379e --- /dev/null +++ b/src/compiler/ptxas.rs @@ -0,0 +1,109 @@ +// Copyright 2016 Mozilla Foundation +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(unused_imports, dead_code, unused_variables)] + +use crate::compiler::args::*; +use crate::compiler::c::{ArtifactDescriptor, CCompilerImpl, CCompilerKind, ParsedArguments}; +use crate::compiler::cicc; +use crate::compiler::{ + CCompileCommand, Cacheable, ColorMode, CompileCommand, CompilerArguments, Language, + SingleCompileCommand, +}; +use crate::{counted_array, dist}; + +use crate::mock_command::{CommandCreator, CommandCreatorSync, RunCommand}; + +use async_trait::async_trait; + +use std::collections::HashMap; +use std::ffi::OsString; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process; + +use crate::errors::*; + +/// A unit struct on which to implement `CCompilerImpl`. +#[derive(Clone, Debug)] +pub struct Ptxas { + pub version: Option, +} + +#[async_trait] +impl CCompilerImpl for Ptxas { + fn kind(&self) -> CCompilerKind { + CCompilerKind::Ptxas + } + fn plusplus(&self) -> bool { + true + } + fn version(&self) -> Option { + self.version.clone() + } + fn parse_arguments( + &self, + arguments: &[OsString], + cwd: &Path, + ) -> CompilerArguments { + cicc::parse_arguments(arguments, cwd, Language::Cubin, &ARGS[..]) + } + #[allow(clippy::too_many_arguments)] + async fn preprocess( + &self, + _creator: &T, + _executable: &Path, + parsed_args: &ParsedArguments, + cwd: &Path, + _env_vars: &[(OsString, OsString)], + _may_dist: bool, + _rewrite_includes_only: bool, + _preprocessor_cache_mode: bool, + ) -> Result + where + T: CommandCreatorSync, + { + cicc::preprocess(cwd, parsed_args).await + } + fn generate_compile_commands( + &self, + path_transformer: &mut dist::PathTransformer, + executable: &Path, + parsed_args: &ParsedArguments, + cwd: &Path, + env_vars: &[(OsString, OsString)], + _rewrite_includes_only: bool, + ) -> Result<( + Box>, + Option, + Cacheable, + )> + where + T: CommandCreatorSync, + { + cicc::generate_compile_commands(path_transformer, executable, parsed_args, cwd, env_vars) + .map(|(command, dist_command, cacheable)| { + (CCompileCommand::new(command), dist_command, cacheable) + }) + } +} + +use cicc::ArgData::*; + +counted_array!(pub static ARGS: [ArgInfo; _] = [ + take_arg!("-arch", OsString, CanBeSeparated, PassThrough), + take_arg!("-m", OsString, CanBeSeparated, PassThrough), + take_arg!("-o", PathBuf, Separated, Output), +]); diff --git a/src/dist/http.rs b/src/dist/http.rs index 621fe8277..3d6617a9a 100644 --- a/src/dist/http.rs +++ b/src/dist/http.rs @@ -1078,7 +1078,7 @@ mod client { use super::urls; use crate::errors::*; - const REQUEST_TIMEOUT_SECS: u64 = 600; + const REQUEST_TIMEOUT_SECS: u64 = 1200; const CONNECT_TIMEOUT_SECS: u64 = 5; pub struct Client { diff --git a/src/dist/mod.rs b/src/dist/mod.rs index 96d6936c8..a55b7c1e3 100644 --- a/src/dist/mod.rs +++ b/src/dist/mod.rs @@ -300,6 +300,7 @@ pub fn osstrings_to_strings(osstrings: &[OsString]) -> Option> { .map(|arg| arg.clone().into_string().ok()) .collect::>() } + pub fn osstring_tuples_to_strings( osstring_tuples: &[(OsString, OsString)], ) -> Option> { @@ -309,6 +310,13 @@ pub fn osstring_tuples_to_strings( .collect::>() } +pub fn strings_to_osstrings(strings: &[String]) -> Vec { + strings + .iter() + .map(|arg| std::ffi::OsStr::new(arg).to_os_string()) + .collect::>() +} + // TODO: TryFrom pub fn try_compile_command_to_dist( command: compiler::SingleCompileCommand, diff --git a/tests/system.rs b/tests/system.rs index 386805a24..5c8fa418c 100644 --- a/tests/system.rs +++ b/tests/system.rs @@ -68,7 +68,11 @@ fn adv_key_kind(lang: &str, compiler: &str) -> String { "gcc" | "g++" => language + " [gcc]", "cl.exe" => language + " [msvc]", "nvc" | "nvc++" => language + " [nvhpc]", - "nvcc" => language + " [nvcc]", + "nvcc" => match lang { + "ptx" => language + " [nvcc]", + "cubin" => language + " [nvcc]", + _ => language + " [nvcc]", + }, _ => { trace!("Unknown compiler type: {}", compiler); language + "unknown" @@ -111,20 +115,22 @@ fn compile_cmdline>( fn compile_cuda_cmdline>( compiler: &str, exe: T, + compile_flag: &str, input: &str, output: &str, mut extra_args: Vec, ) -> Vec { let mut arg = match compiler { - "nvcc" => vec_from!(OsString, exe.as_ref(), "-c", input, "-o", output), + "nvcc" => vec_from!(OsString, exe.as_ref(), compile_flag, input, "-o", output), "clang++" => { vec_from!( OsString, exe, - "-c", + compile_flag, input, "--cuda-gpu-arch=sm_50", - format!("-Fo{}", output) + "-o", + output ) } _ => panic!("Unsupported compiler: {}", compiler), @@ -207,7 +213,7 @@ fn test_basic_compile(compiler: Compiler, tempdir: &Path) { let out_file = tempdir.join(OUTPUT); trace!("compile"); sccache_command() - .args(&compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) .current_dir(tempdir) .envs(env_vars.clone()) .assert() @@ -226,7 +232,7 @@ fn test_basic_compile(compiler: Compiler, tempdir: &Path) { trace!("compile"); fs::remove_file(&out_file).unwrap(); sccache_command() - .args(&compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) .current_dir(tempdir) .envs(env_vars) .assert() @@ -551,7 +557,7 @@ fn run_sccache_command_tests(compiler: Compiler, tempdir: &Path, preprocessor_ca } } -fn test_cuda_compiles(compiler: &Compiler, tempdir: &Path) { +fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let Compiler { name, exe, @@ -564,9 +570,255 @@ fn test_cuda_compiles(compiler: &Compiler, tempdir: &Path) { let out_file = tempdir.join(OUTPUT); trace!("compile A"); sccache_command() - .args(&compile_cuda_cmdline( + .args(compile_cuda_cmdline( name, exe, + "-c", + INPUT_FOR_CUDA_A, + // relative path for output + out_file.file_name().unwrap().to_string_lossy().as_ref(), + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + fs::remove_file(&out_file).unwrap(); + trace!("compile A request stats"); + get_stats(|info| { + assert_eq!(1, info.stats.compile_requests); + assert_eq!(4, info.stats.requests_executed); + assert_eq!(0, info.stats.cache_hits.all()); + assert_eq!(4, info.stats.cache_misses.all()); + assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap()); + let adv_cuda_key = adv_key_kind("cuda", compiler.name); + assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + }); + + trace!("compile A"); + sccache_command() + .args(compile_cuda_cmdline( + name, + exe, + "-c", + INPUT_FOR_CUDA_A, + // absolute path for output + out_file.to_string_lossy().as_ref(), + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + fs::remove_file(&out_file).unwrap(); + trace!("compile A request stats"); + get_stats(|info| { + assert_eq!(2, info.stats.compile_requests); + assert_eq!(8, info.stats.requests_executed); + assert_eq!(3, info.stats.cache_hits.all()); + assert_eq!(5, info.stats.cache_misses.all()); + assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap()); + let adv_cuda_key = adv_key_kind("cuda", compiler.name); + assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + }); + + // By compiling another input source we verify that the pre-processor + // phase is correctly running and outputting text + trace!("compile B"); + sccache_command() + .args(compile_cuda_cmdline( + name, + exe, + "-c", + INPUT_FOR_CUDA_B, + // absolute path for output + out_file.to_string_lossy().as_ref(), + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + fs::remove_file(&out_file).unwrap(); + trace!("compile B request stats"); + get_stats(|info| { + assert_eq!(3, info.stats.compile_requests); + assert_eq!(12, info.stats.requests_executed); + assert_eq!(4, info.stats.cache_hits.all()); + assert_eq!(8, info.stats.cache_misses.all()); + assert_eq!(&3, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&6, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap()); + let adv_cuda_key = adv_key_kind("cuda", compiler.name); + assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&6, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + }); + + trace!("compile ptx"); + let out_file = tempdir.join("test.ptx"); + sccache_command() + .args(compile_cuda_cmdline( + name, + exe, + "-ptx", + INPUT_FOR_CUDA_A, + // relative path for output + out_file.file_name().unwrap().to_string_lossy().as_ref(), + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + fs::remove_file(&out_file).unwrap(); + trace!("compile ptx request stats"); + get_stats(|info| { + assert_eq!(4, info.stats.compile_requests); + assert_eq!(14, info.stats.requests_executed); + assert_eq!(5, info.stats.cache_hits.all()); + assert_eq!(9, info.stats.cache_misses.all()); + assert_eq!(&4, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&7, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap()); + let adv_cuda_key = adv_key_kind("cuda", compiler.name); + assert_eq!(&4, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&7, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + }); + + trace!("compile cubin"); + let out_file = tempdir.join("test.cubin"); + sccache_command() + .args(compile_cuda_cmdline( + name, + exe, + "-cubin", + INPUT_FOR_CUDA_A, + // absolute path for output + out_file.to_string_lossy().as_ref(), + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + fs::remove_file(&out_file).unwrap(); + trace!("compile cubin request stats"); + get_stats(|info| { + assert_eq!(5, info.stats.compile_requests); + assert_eq!(17, info.stats.requests_executed); + assert_eq!(7, info.stats.cache_hits.all()); + assert_eq!(10, info.stats.cache_misses.all()); + assert_eq!(&6, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&8, info.stats.cache_misses.get("CUDA").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap()); + let adv_cuda_key = adv_key_kind("cuda", compiler.name); + assert_eq!(&6, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap()); + assert_eq!(&8, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap()); + }); +} + +fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { + let Compiler { + name, + exe, + env_vars, + } = compiler; + zero_stats(); + + trace!("run_sccache_command_test: {}", name); + // Compile multiple source files. + copy_to_tempdir(&[INPUT_FOR_CUDA_C, INPUT], tempdir); + + let out_file = tempdir.join(OUTPUT); + trace!("compile CUDA A"); + sccache_command() + .args(compile_cmdline( + name, + &exe, + INPUT_FOR_CUDA_C, + OUTPUT, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + fs::remove_file(&out_file).unwrap(); + trace!("compile CUDA A"); + sccache_command() + .args(compile_cmdline( + name, + &exe, + INPUT_FOR_CUDA_C, + OUTPUT, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + fs::remove_file(&out_file).unwrap(); + trace!("compile C++ A"); + sccache_command() + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + fs::remove_file(&out_file).unwrap(); + trace!("compile C++ A"); + sccache_command() + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .current_dir(tempdir) + .envs(env_vars) + .assert() + .success(); + fs::remove_file(&out_file).unwrap(); + + trace!("request stats"); + get_stats(|info| { + assert_eq!(4, info.stats.compile_requests); + assert_eq!(12, info.stats.requests_executed); + assert_eq!(5, info.stats.cache_hits.all()); + assert_eq!(7, info.stats.cache_misses.all()); + assert_eq!(&2, info.stats.cache_hits.get("C/C++").unwrap()); + assert_eq!(&3, info.stats.cache_hits.get("CUDA").unwrap()); + assert_eq!(&4, info.stats.cache_misses.get("C/C++").unwrap()); + assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap()); + }); +} + +fn run_sccache_nvcc_cuda_command_tests(compiler: Compiler, tempdir: &Path) { + test_nvcc_cuda_compiles(&compiler, tempdir); + test_nvcc_proper_lang_stat_tracking(compiler, tempdir); +} + +fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) { + let Compiler { + name, + exe, + env_vars, + } = compiler; + trace!("run_sccache_command_test: {}", name); + // Compile multiple source files. + copy_to_tempdir(&[INPUT_FOR_CUDA_A, INPUT_FOR_CUDA_B], tempdir); + + let out_file = tempdir.join(OUTPUT); + trace!("compile A"); + sccache_command() + .args(compile_cuda_cmdline( + name, + exe, + "-c", INPUT_FOR_CUDA_A, OUTPUT, Vec::new(), @@ -589,9 +841,10 @@ fn test_cuda_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile A"); fs::remove_file(&out_file).unwrap(); sccache_command() - .args(&compile_cuda_cmdline( + .args(compile_cuda_cmdline( name, exe, + "-c", INPUT_FOR_CUDA_A, OUTPUT, Vec::new(), @@ -617,9 +870,10 @@ fn test_cuda_compiles(compiler: &Compiler, tempdir: &Path) { // phase is correctly running and outputting text trace!("compile B"); sccache_command() - .args(&compile_cuda_cmdline( + .args(compile_cuda_cmdline( name, exe, + "-c", INPUT_FOR_CUDA_B, OUTPUT, Vec::new(), @@ -643,7 +897,7 @@ fn test_cuda_compiles(compiler: &Compiler, tempdir: &Path) { }); } -fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { +fn test_clang_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { let Compiler { name, exe, @@ -658,7 +912,7 @@ fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { let out_file = tempdir.join(OUTPUT); trace!("compile CUDA A"); sccache_command() - .args(&compile_cmdline( + .args(compile_cmdline( name, &exe, INPUT_FOR_CUDA_C, @@ -672,7 +926,7 @@ fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { fs::remove_file(&out_file).unwrap(); trace!("compile CUDA A"); sccache_command() - .args(&compile_cmdline( + .args(compile_cmdline( name, &exe, INPUT_FOR_CUDA_C, @@ -686,7 +940,7 @@ fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { fs::remove_file(&out_file).unwrap(); trace!("compile C++ A"); sccache_command() - .args(&compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) .current_dir(tempdir) .envs(env_vars.clone()) .assert() @@ -694,7 +948,7 @@ fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { fs::remove_file(&out_file).unwrap(); trace!("compile C++ A"); sccache_command() - .args(&compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) + .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new())) .current_dir(tempdir) .envs(env_vars) .assert() @@ -714,9 +968,9 @@ fn test_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) { }); } -fn run_sccache_cuda_command_tests(compiler: Compiler, tempdir: &Path) { - test_cuda_compiles(&compiler, tempdir); - test_proper_lang_stat_tracking(compiler, tempdir); +fn run_sccache_clang_cuda_command_tests(compiler: Compiler, tempdir: &Path) { + test_clang_cuda_compiles(&compiler, tempdir); + test_clang_proper_lang_stat_tracking(compiler, tempdir); } fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) { @@ -734,7 +988,7 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) { let out_file = tempdir.join(OUTPUT); trace!("compile A"); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_A, @@ -760,7 +1014,7 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) { trace!("compile A"); fs::remove_file(&out_file).unwrap(); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_A, @@ -789,7 +1043,7 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) { // phase is correctly running and outputting text trace!("compile B"); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_B, @@ -831,7 +1085,7 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) { let out_file = tempdir.join(OUTPUT); trace!("compile A with gfx900 and gfx1030"); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_A, @@ -858,7 +1112,7 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) { trace!("compile A with with gfx900 and gfx1030 again"); fs::remove_file(&out_file).unwrap(); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_A, @@ -888,7 +1142,7 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) { // phase is correctly running and outputting text trace!("compile B with gfx900 and gfx1030"); sccache_command() - .args(&compile_hip_cmdline( + .args(compile_hip_cmdline( name, exe, INPUT_FOR_HIP_B, @@ -967,7 +1221,7 @@ fn test_clang_cache_whitespace_normalization( println!("compile whitespace"); sccache_command() - .args(&compile_cmdline( + .args(compile_cmdline( name, &exe, INPUT_WITH_WHITESPACE, @@ -988,7 +1242,7 @@ fn test_clang_cache_whitespace_normalization( println!("compile whitespace_alt"); sccache_command() - .args(&compile_cmdline( + .args(compile_cmdline( name, &exe, INPUT_WITH_WHITESPACE_ALT, @@ -1189,7 +1443,11 @@ fn test_cuda_sccache_command(preprocessor_cache_mode: bool) { &sccache_cached_cfg_path, ); for compiler in compilers { - run_sccache_cuda_command_tests(compiler, tempdir.path()); + match compiler.name { + "nvcc" => run_sccache_nvcc_cuda_command_tests(compiler, tempdir.path()), + "clang++" => run_sccache_clang_cuda_command_tests(compiler, tempdir.path()), + _ => {} + } zero_stats(); } stop_local_daemon();