Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(#31): add support for overriding runtime dependencies #38

Merged
merged 1 commit into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ This data can be noisy an it will be necessary to apply domain-specific ad-hoc f
Typically, this can be a achieved with a combination of UMI and cell doublet filtering.
:::

## Config File
## Configuration

### Config File

You may generate and supply `pycashier` with a toml config file using `-c/--config`.
The expected structure is each command followed by key value pairs of flags with hypens replaced by underscores:
Expand Down Expand Up @@ -143,6 +145,18 @@ For convenience, you can update/create your config file with `pycasher COMMAND -
"Explicit" will only save parameters already included in the config file or specified at runtime.
"Full" will include all parameters, again, maintaining preset values in config or specified at runtime.

See the [cli reference](./cli.rst) for all options for each command.

### Executables

`Pycashier` depends on three executables (`cutadapt`, `starcode`, `fastp`) existing on your `$PATH`, you can force the use of a specific executable using environment variables of the form `PYCASHIER_<NAME>`.
For example to override the `cutadapt` used you could use something like the below command:

```sh
PYCASHIER_CUTADAPT="$HOME/important-software/cutadapt-v4" pycashier extract
```


## Caveats

Pycashier will **NOT** overwrite intermediary files. If there is an issue in the process,
Expand Down
14 changes: 7 additions & 7 deletions src/pycashier/_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
from rich.table import Table
from rich.text import Text

from .deps import cutadapt, fastp, starcode
from .term import term

PACKAGES = ["cutadapt", "fastp", "starcode", "pysam"]
PACKAGES = {"cutadapt": cutadapt, "fastp": fastp, "starcode": starcode, "pysam": ""}
CMD_PACKAGES: Dict[str, List[str]] = {
"": PACKAGES,
"": sorted(PACKAGES),
"receipt": [],
"merge": ["fastp"],
"extract": ["fastp", "cutadapt", "starcode"],
Expand Down Expand Up @@ -59,7 +60,7 @@ def pre_run_check(command: str = "", show: bool = False) -> None:
command: Name of pycashier subcommand.
show: If true, show table regardless.
"""
pkg_locations = {name: find_tool(name) for name in PACKAGES}
pkg_locations = {name: find_tool(name, path) for name, path in PACKAGES.items()}
cmd_pkg_locations = {k: pkg_locations[k] for k in CMD_PACKAGES[command]}

if None in cmd_pkg_locations.values() or show:
Expand All @@ -71,9 +72,8 @@ def pre_run_check(command: str = "", show: bool = False) -> None:
title="Dependencies",
),
)
term.print(f"python exe: [bold]{sys.executable}[/bold]")

term.print(
f"python exe: [bold]{sys.executable}[/bold]"
"It's recommended to install pycashier within a conda environment.\n"
"See the repo for details: [link]https://github.com/brocklab/pycashier[/link]",
)
Expand All @@ -86,7 +86,7 @@ def pre_run_check(command: str = "", show: bool = False) -> None:
check_file_permissions()


def find_tool(name: str) -> Optional[str]:
def find_tool(name: str, path: str = "") -> Optional[str]:
"""Check whether `name` is on PATH and marked as executable.

Args:
Expand All @@ -96,7 +96,7 @@ def find_tool(name: str) -> Optional[str]:
"""

if not name == "pysam":
return which(name)
return which(name if not path else path)

spec = importlib.util.find_spec("pysam")
if spec:
Expand Down
5 changes: 5 additions & 0 deletions src/pycashier/deps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os

cutadapt = os.getenv("PYCASHIER_CUTADAPT", "cutadapt")
fastp = os.getenv("PYCASHIER_FASTP", "fastp")
starcode = os.getenv("PYCASHIER_STARCODE", "starcode")
66 changes: 40 additions & 26 deletions src/pycashier/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple

from .deps import cutadapt, fastp, starcode
from .filters import read_filter
from .options import PycashierOpts
from .scrna import labeled_fastq_to_tsv, sam_to_name_labeled_fastq
Expand Down Expand Up @@ -166,14 +167,17 @@ def _cutadapt(
msg,
):
command = (
"cutadapt "
f"-e {self.opts.error} "
f"-j {self.opts.threads} "
f"--minimum-length={self.opts.length - self.opts.distance} "
f"--maximum-length={self.opts.length + self.opts.distance} "
f"{adapter_string} "
f"{self.opts.cutadapt_args or ''} "
f"-o {self.files.barcode_fastq} {self.files.quality}"
cutadapt
+ " "
+ (
f"-e {self.opts.error} "
f"-j {self.opts.threads} "
f"--minimum-length={self.opts.length - self.opts.distance} "
f"--maximum-length={self.opts.length + self.opts.distance} "
f"{adapter_string} "
f"{self.opts.cutadapt_args or ''} "
f"-o {self.files.barcode_fastq} {self.files.quality}"
)
)
with term.process(msg):
return run_cmd(
Expand All @@ -196,8 +200,12 @@ def _starcode(self) -> bool | None:

if not check_output(self.files.clustered, msg):
command = (
f"starcode -d {self.opts.distance} -r {self.opts.ratio} "
f"-t {self.opts.threads} -i {self.files.barcode_fastq} -o {self.files.clustered}"
starcode
+ " "
+ (
f"-d {self.opts.distance} -r {self.opts.ratio} "
f"-t {self.opts.threads} -i {self.files.barcode_fastq} -o {self.files.clustered}"
)
)
with term.process(msg):
return run_cmd(
Expand Down Expand Up @@ -236,14 +244,17 @@ def _fastp_merge(
msg = "merging paired end reads with fastp"
if not check_output(self.merged, msg):
command = (
"fastp "
f"-i {self.fastqR1} "
f"-I {self.fastqR2} "
f"-w {self.opts.threads} "
f"-j {self.opts.pipeline}/merge_qc/{self.name}.json "
f"-h {self.opts.pipeline}/merge_qc/{self.name}.html "
f"--merged_out {self.merged} "
f"{self.opts.fastp_args or ''}"
fastp
+ " "
+ (
f"-i {self.fastqR1} "
f"-I {self.fastqR2} "
f"-w {self.opts.threads} "
f"-j {self.opts.pipeline}/merge_qc/{self.name}.json "
f"-h {self.opts.pipeline}/merge_qc/{self.name}.html "
f"--merged_out {self.merged} "
f"{self.opts.fastp_args or ''}"
)
)

with term.process(msg):
Expand Down Expand Up @@ -288,14 +299,17 @@ def _pysam_cutadapt(

if not check_output(self.barcode_fastq, msg):
command = (
"cutadapt "
f"-e {self.opts.error} "
f"-j {self.opts.threads} "
f"--minimum-length={self.opts.minimum_length} "
f"--maximum-length={self.opts.length} "
f"{adapter_string} "
f"{self.opts.cutadapt_args or ''} "
f"-o {self.barcode_fastq} {self.fastq}"
cutadapt
+ " "
+ (
f"-e {self.opts.error} "
f"-j {self.opts.threads} "
f"--minimum-length={self.opts.minimum_length} "
f"--maximum-length={self.opts.length} "
f"{adapter_string} "
f"{self.opts.cutadapt_args or ''} "
f"-o {self.barcode_fastq} {self.fastq}"
)
)
with term.process(msg):
return run_cmd(
Expand Down