diff --git a/docs/usage.md b/docs/usage.md index 7451a1f..6cf1016 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -109,7 +109,9 @@ This data can be noisy an it will be necessary to apply domain-specific ad-hoc f Typically, this can be a achieved with a combination of UMI and cell doublet filtering. ::: -## Config File +## Configuration + +### Config File You may generate and supply `pycashier` with a toml config file using `-c/--config`. The expected structure is each command followed by key value pairs of flags with hypens replaced by underscores: @@ -143,6 +145,18 @@ For convenience, you can update/create your config file with `pycasher COMMAND - "Explicit" will only save parameters already included in the config file or specified at runtime. "Full" will include all parameters, again, maintaining preset values in config or specified at runtime. +See the [cli reference](./cli.rst) for all options for each command. + +### Executables + +`Pycashier` depends on three executables (`cutadapt`, `starcode`, `fastp`) existing on your `$PATH`, you can force the use of a specific executable using environment variables of the form `PYCASHIER_`. +For example to override the `cutadapt` used you could use something like the below command: + +```sh +PYCASHIER_CUTADAPT="$HOME/important-software/cutadapt-v4" pycashier extract +``` + + ## Caveats Pycashier will **NOT** overwrite intermediary files. If there is an issue in the process, diff --git a/src/pycashier/_checks.py b/src/pycashier/_checks.py index 2faf9e6..9e2ce53 100644 --- a/src/pycashier/_checks.py +++ b/src/pycashier/_checks.py @@ -9,11 +9,12 @@ from rich.table import Table from rich.text import Text +from .deps import cutadapt, fastp, starcode from .term import term -PACKAGES = ["cutadapt", "fastp", "starcode", "pysam"] +PACKAGES = {"cutadapt": cutadapt, "fastp": fastp, "starcode": starcode, "pysam": ""} CMD_PACKAGES: Dict[str, List[str]] = { - "": PACKAGES, + "": sorted(PACKAGES), "receipt": [], "merge": ["fastp"], "extract": ["fastp", "cutadapt", "starcode"], @@ -59,7 +60,7 @@ def pre_run_check(command: str = "", show: bool = False) -> None: command: Name of pycashier subcommand. show: If true, show table regardless. """ - pkg_locations = {name: find_tool(name) for name in PACKAGES} + pkg_locations = {name: find_tool(name, path) for name, path in PACKAGES.items()} cmd_pkg_locations = {k: pkg_locations[k] for k in CMD_PACKAGES[command]} if None in cmd_pkg_locations.values() or show: @@ -71,9 +72,8 @@ def pre_run_check(command: str = "", show: bool = False) -> None: title="Dependencies", ), ) - term.print(f"python exe: [bold]{sys.executable}[/bold]") - term.print( + f"python exe: [bold]{sys.executable}[/bold]" "It's recommended to install pycashier within a conda environment.\n" "See the repo for details: [link]https://github.com/brocklab/pycashier[/link]", ) @@ -86,7 +86,7 @@ def pre_run_check(command: str = "", show: bool = False) -> None: check_file_permissions() -def find_tool(name: str) -> Optional[str]: +def find_tool(name: str, path: str = "") -> Optional[str]: """Check whether `name` is on PATH and marked as executable. Args: @@ -96,7 +96,7 @@ def find_tool(name: str) -> Optional[str]: """ if not name == "pysam": - return which(name) + return which(name if not path else path) spec = importlib.util.find_spec("pysam") if spec: diff --git a/src/pycashier/deps.py b/src/pycashier/deps.py new file mode 100644 index 0000000..31983c5 --- /dev/null +++ b/src/pycashier/deps.py @@ -0,0 +1,5 @@ +import os + +cutadapt = os.getenv("PYCASHIER_CUTADAPT", "cutadapt") +fastp = os.getenv("PYCASHIER_FASTP", "fastp") +starcode = os.getenv("PYCASHIER_STARCODE", "starcode") diff --git a/src/pycashier/sample.py b/src/pycashier/sample.py index ea7c31a..0a62ee6 100644 --- a/src/pycashier/sample.py +++ b/src/pycashier/sample.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any, Callable, Dict, Optional, Tuple +from .deps import cutadapt, fastp, starcode from .filters import read_filter from .options import PycashierOpts from .scrna import labeled_fastq_to_tsv, sam_to_name_labeled_fastq @@ -166,14 +167,17 @@ def _cutadapt( msg, ): command = ( - "cutadapt " - f"-e {self.opts.error} " - f"-j {self.opts.threads} " - f"--minimum-length={self.opts.length - self.opts.distance} " - f"--maximum-length={self.opts.length + self.opts.distance} " - f"{adapter_string} " - f"{self.opts.cutadapt_args or ''} " - f"-o {self.files.barcode_fastq} {self.files.quality}" + cutadapt + + " " + + ( + f"-e {self.opts.error} " + f"-j {self.opts.threads} " + f"--minimum-length={self.opts.length - self.opts.distance} " + f"--maximum-length={self.opts.length + self.opts.distance} " + f"{adapter_string} " + f"{self.opts.cutadapt_args or ''} " + f"-o {self.files.barcode_fastq} {self.files.quality}" + ) ) with term.process(msg): return run_cmd( @@ -196,8 +200,12 @@ def _starcode(self) -> bool | None: if not check_output(self.files.clustered, msg): command = ( - f"starcode -d {self.opts.distance} -r {self.opts.ratio} " - f"-t {self.opts.threads} -i {self.files.barcode_fastq} -o {self.files.clustered}" + starcode + + " " + + ( + f"-d {self.opts.distance} -r {self.opts.ratio} " + f"-t {self.opts.threads} -i {self.files.barcode_fastq} -o {self.files.clustered}" + ) ) with term.process(msg): return run_cmd( @@ -236,14 +244,17 @@ def _fastp_merge( msg = "merging paired end reads with fastp" if not check_output(self.merged, msg): command = ( - "fastp " - f"-i {self.fastqR1} " - f"-I {self.fastqR2} " - f"-w {self.opts.threads} " - f"-j {self.opts.pipeline}/merge_qc/{self.name}.json " - f"-h {self.opts.pipeline}/merge_qc/{self.name}.html " - f"--merged_out {self.merged} " - f"{self.opts.fastp_args or ''}" + fastp + + " " + + ( + f"-i {self.fastqR1} " + f"-I {self.fastqR2} " + f"-w {self.opts.threads} " + f"-j {self.opts.pipeline}/merge_qc/{self.name}.json " + f"-h {self.opts.pipeline}/merge_qc/{self.name}.html " + f"--merged_out {self.merged} " + f"{self.opts.fastp_args or ''}" + ) ) with term.process(msg): @@ -288,14 +299,17 @@ def _pysam_cutadapt( if not check_output(self.barcode_fastq, msg): command = ( - "cutadapt " - f"-e {self.opts.error} " - f"-j {self.opts.threads} " - f"--minimum-length={self.opts.minimum_length} " - f"--maximum-length={self.opts.length} " - f"{adapter_string} " - f"{self.opts.cutadapt_args or ''} " - f"-o {self.barcode_fastq} {self.fastq}" + cutadapt + + " " + + ( + f"-e {self.opts.error} " + f"-j {self.opts.threads} " + f"--minimum-length={self.opts.minimum_length} " + f"--maximum-length={self.opts.length} " + f"{adapter_string} " + f"{self.opts.cutadapt_args or ''} " + f"-o {self.barcode_fastq} {self.fastq}" + ) ) with term.process(msg): return run_cmd(