Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: RNA trimming for arriba #337

Merged
merged 3 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ fastp_pe:
# Default enabled trimming parameters for fastp. Specified for clarity.
extra: "--trim_poly_g --qualified_quality_phred 15 --unqualified_percent_limit 40 --n_base_limit 5 --length_required 15"

fastp_pe_arriba:
container: "docker://hydragenetics/fastp:0.20.1"
extra: "--max_len1 100"

fastqc:
container: "docker://hydragenetics/fastqc:0.11.9"

Expand Down
5 changes: 5 additions & 0 deletions config/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ fastp_pe:
mem_mb: 30720
mem_per_cpu: 6144

fastp_pe_arriba:
threads: 5
mem_mb: 30720
mem_per_cpu: 6144

fuseq_wes:
threads: 2
mem_mb: 12288
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
hydra-genetics==0.15.0
pandas>=1.3.1
snakemake==7.13.0
snakemake==7.18.0
singularity==3.0.0
jinja2==3.0.1
networkx
Expand Down
58 changes: 58 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,60 @@ module prealignment:
use rule * from prealignment as prealignment_*


use rule fastp_pe from prealignment as prealignment_fastp_pe_arriba with:
output:
trimmed=temp(
[
"prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastq1.fastq.gz",
"prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastq2.fastq.gz",
]
),
html="prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastp.html",
json="prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastp.json",
params:
adapters=lambda wildcards: " --adapter_sequence {} --adapter_sequence_r2 {} ".format(
*get_fastq_adapter(units, wildcards).split(",")
),
extra=config.get("fastp_pe_arriba", {}).get("extra", ""),
log:
"prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastq.fastq.gz.log",
benchmark:
repeat(
"prealignment/fastp_pe_arriba/{sample}_{type}_{flowcell}_{lane}_{barcode}_fastq.fastq.gz.benchmark.tsv",
config.get("fastp_pe_arriba", {}).get("benchmark_repeats", 1),
)
resources:
mem_mb=config.get("fastp_pe_arriba", {}).get("mem_mb", config["default_resources"]["mem_mb"]),
mem_per_cpu=config.get("fastp_pe_arriba", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]),
partition=config.get("fastp_pe_arriba", {}).get("partition", config["default_resources"]["partition"]),
threads=config.get("fastp_pe_arriba", {}).get("threads", config["default_resources"]["threads"]),
time=config.get("fastp_pe_arriba", {}).get("time", config["default_resources"]["time"]),
threads: config.get("fastp_pe_arriba", {}).get("threads", config["default_resources"]["threads"])


use rule merged from prealignment as prealignment_merged_arriba with:
input:
fastq=merged_input_arriba,
output:
fastq=temp("prealignment/merged_arriba/{sample}_{type}_{read}.fastq.gz"),
log:
"prealignment/merged_arriba/{sample}_{type}_{read}.fastq.gz.log",
benchmark:
repeat(
"prealignment/merged_arriba/{sample}_{type}_{read}.fastq.gz.benchmark.tsv",
config.get("merged_arriba", {}).get("benchmark_repeats", 1),
)
resources:
mem_mb=config.get("merged_arriba", {}).get("mem_mb", config["default_resources"]["mem_mb"]),
mem_per_cpu=config.get("merged_arriba", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]),
partition=config.get("merged_arriba", {}).get("partition", config["default_resources"]["partition"]),
threads=config.get("merged_arriba", {}).get("threads", config["default_resources"]["threads"]),
time=config.get("merged_arriba", {}).get("time", config["default_resources"]["time"]),
threads: config.get("merged_arriba", {}).get("threads", config["default_resources"]["threads"])
container:
config.get("merged_arriba", {}).get("container", config["default_container"])


module alignment:
snakefile:
get_module_snakefile(config, "hydra-genetics/alignment", path="workflow/Snakefile", tag="v0.3.1")
Expand All @@ -71,6 +125,10 @@ use rule * from alignment as alignment_*


use rule star from alignment as alignment_star with:
input:
fq1="prealignment/merged_arriba/{sample}_{type}_fastq1.fastq.gz",
fq2="prealignment/merged_arriba/{sample}_{type}_fastq2.fastq.gz",
idx=config.get("star", {}).get("genome_index", ""),
params:
extra=lambda wildcards: "%s %s"
% (
Expand Down
8 changes: 8 additions & 0 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ wildcard_constraints:
type="N|T|R",


merged_input_arriba = lambda wildcards: expand(
"prealignment/fastp_pe_arriba/{{sample}}_{{type}}_{flowcell_lane_barcode}_{{read}}.fastq.gz",
flowcell_lane_barcode=[
"{}_{}_{}".format(unit.flowcell, unit.lane, unit.barcode) for unit in get_units(units, wildcards, wildcards.type)
],
)


def compile_output_list(wildcards):
output_files = []
types = set([unit.type for unit in units.itertuples()])
Expand Down
Loading