From 872c3f3f17674fc4faa591b134456be39cab20f9 Mon Sep 17 00:00:00 2001 From: Christian Schudoma Date: Fri, 19 Nov 2021 20:31:11 +0100 Subject: [PATCH] --> version 0.4 * added preprocessing of raw reads * added `--preprocessed` flag to suppress preprocessing for already preprocessed datasets * R1/R2 read lengths are now assessed and homogenised separately (i.e. they can have different lengths as supported by Figaro) * initial read length distribution assessment is now performed by fastqc * fastq filenames are normalised to R1/R2 naming scheme * readlen homogenisation is now performed by bbduk * added mapseq-profiling of asv sequences * added two parameters (`--dada2_chimera_method`, `--dada2_chimera_min_fold_parent_over_abundance`) for dada2 chimera removal Bugfixes/Workarounds * fixed an R script error that would generate the wrong table format when having a single sample * in `dada2_analysis` commented vortex-code (prevalence check) as it fails for certain samples Co-authored-by: Christian Schudoma --- README.md | 56 +++- R_scripts/dada2.R | 95 ------ R_scripts/dada2_analysis.R | 121 ------- R_scripts/dada2_analysis_paired.R | 38 ++- R_scripts/dada2_analysis_single.R | 41 ++- R_scripts/dada2_preprocess.R | 82 ----- R_scripts/dada2_preprocess_single.R | 2 +- assets/adapters.fa | 317 ++++++++++++++++++ config/run.config | 76 ++++- docs/img/gaga2_flow.png | Bin 0 -> 49291 bytes main.nf | 235 ++++++++++--- gaga2_conda.yml => misc/gaga2_conda.yml | 0 gaga2_install.sh => misc/gaga2_install.sh | 0 .../legacy_scripts}/check_readlengths.py | 1 + .../legacy_scripts}/gather_fastq_files.py | 0 misc/legacy_scripts/hltrim.py | 88 +++++ {scripts => misc/legacy_scripts}/ltrim.py | 0 modules/nevermore/functions.nf | 15 + modules/nevermore/qc/bbduk.nf | 35 ++ modules/nevermore/qc/fastqc.nf | 22 ++ modules/profilers/mapseq.nf | 58 ++++ nextflow.config | 2 +- scripts/assess_readlengths.py | 64 ++++ 23 files changed, 937 insertions(+), 411 deletions(-) delete mode 100644 R_scripts/dada2.R delete mode 100644 R_scripts/dada2_analysis.R delete mode 100644 R_scripts/dada2_preprocess.R create mode 100755 assets/adapters.fa create mode 100644 docs/img/gaga2_flow.png rename gaga2_conda.yml => misc/gaga2_conda.yml (100%) rename gaga2_install.sh => misc/gaga2_install.sh (100%) rename {scripts => misc/legacy_scripts}/check_readlengths.py (94%) rename {scripts => misc/legacy_scripts}/gather_fastq_files.py (100%) create mode 100644 misc/legacy_scripts/hltrim.py rename {scripts => misc/legacy_scripts}/ltrim.py (100%) create mode 100644 modules/nevermore/functions.nf create mode 100644 modules/nevermore/qc/bbduk.nf create mode 100644 modules/nevermore/qc/fastqc.nf create mode 100644 modules/profilers/mapseq.nf create mode 100644 scripts/assess_readlengths.py diff --git a/README.md b/README.md index 498a4f7..938d5e9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,20 @@ # gaga2 - automated 16S amplicon analysis with Figaro/DADA2 +![](docs/img/gaga2_flow.png) + ## Installation instructions -`TBD` +`gaga2` requires a working `nextflow` installation (v20.4+). + +Other dependencies: +* bbmap +* fastqc +* figaro +* R v4+ with dada2, devtools, tidyverse, and cowplot installed + +For convenience, `gaga2` comes with a Singularity container with all dependencies installed. + +```singularity pull oras://ghcr.io/zellerlab/gaga2:latest``` + ## Usage instructions @@ -9,8 +22,8 @@ `gaga2` takes as input Illumina paired-end 16S amplicon sequences (e.g. sequenced on a MiSeq). #### Prerequisites -* Read files need to be named according the typical pattern `_*[12].{fastq,fq,fastq.gz,fq.gz}` -and need to be arranged in a sample-based directory structure: +* Read files need to be named according the typical pattern `_R?[12].{fastq,fq,fastq.gz,fq.gz}`. +They should, but don't have to, be arranged in a sample-based directory structure: ``` (aka "input_dir") @@ -26,29 +39,42 @@ and need to be arranged in a sample-based directory structure: |____ ``` -* If input reads have been "invasively" preprocessed (as evidenced by length differences between reads), -`gaga2` will generate an empty sentinel file `/SKIP_FIGARO`. -This will prevent `Figaro` from executing and should automatically advance to `dada2` processing. +A flat directory structure (with all read files in the same directory) or a deeply-branched (with read files scattered over multiple levels) should also work. + +If `gaga2` preprocesses the reads, it will automatically use `_R1/2` endings internally. + +* If input reads have already been preprocessed, you can set the `--preprocessed` flag. In this case, `gaga2` will do no preprocessing at all and instruct `dada2` to perform no trimming. Otherwie, `gaga2` will assess the read lengths for uniformity. If read lengths differ within and between samples, preprocessing with `figaro` is not possible and `dada2` will be run without trimming. * Samples with less than `110` reads after `dada2` preprocessing, will be discarded. ### Running gaga2 -The typical command for running `gaga2` is -`gaga2.nf --input_dir --output_dir --amplicon_length --left_primer --right_primer ` +`gaga2` can be directly run from github. + +`nextflow run zellerlab/gaga2 ` + +To obtain a newer version, do a + +`nextflow pull zellerlab/gaga2` + +before. + +In addition, you should obtain a copy of the `run.config` from the `gaga2` github repo and modify it according to your environment. #### Mandatory arguments -* `input_dir` is the project directory mentioned above. **Recommended: absolute path** -* `output_dir` will be created automatically. **Recommended: absolute path** -* `amplicon_length`, `left_primer`, and `right_primer` are derived from the experiment parameters +* `--input_dir` is the project directory mentioned above. +* `--output_dir` will be created automatically. +* `--amplicon_length` this is derived from your experiment parameters (this is not read-length, but the length of the, well, amplicon!) +* `--single_end` this is only required for single-end libraries (auto-detection of library-type is in progress) #### Optional arguments * `--min_overlap` of read pairs is `20bp` by default -* `-w, -work-dir` should be set to `/work`, otherwise it will be automatically placed in the current directory. +* `--primers ` or `--left_primer`, and `--right_primer` If primer sequences are provided via `--primers`, `gaga2` will remove primers and upstream sequences (using `bbduk`), such as adapters based on the primer sequences. If non-zero primer lengths are provided instead (via `--left_primer` and `--right_primer`), `figaro` will take those into account when determining the best trim positions. +* `--preprocessed` will prevent any further preprocessing by `gaga2` - this flag should only be used if the read data is reliably clean. ### internal beta-testing instructions -* `source /g/scb2/zeller/schudoma/software/wrappers/gaga2_wrapper` **before** submitting job to cluster -* please report issues/requests/feedback in the github issue tracker -* If you want to run `gaga2` on the cluster, `nextflow` alone requires `>4GB memory` just for 'managing'. +* The old gaga2 version can be run with `source /g/scb2/zeller/schudoma/software/wrappers/gaga2_wrapper` **before** submitting job to cluster +* Please report issues/requests/feedback in the github issue tracker +* If you want to run `gaga2` on the cluster, `nextflow` alone requires `>=5GB` memory just for 'managing'. diff --git a/R_scripts/dada2.R b/R_scripts/dada2.R deleted file mode 100644 index ab15f74..0000000 --- a/R_scripts/dada2.R +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env Rscript -.libPaths(c('/g/scb2/zeller/SHARED/software/R/3.5', .libPaths())) -library("dada2");packageVersion("dada2") - -args = commandArgs(trailingOnly=TRUE) -input_dir = args[1] -output_dir = args[2] - -length_cut = as.numeric(c(args[3], args[4])) -print(length_cut) -if (length_cut[1] == -1 || length_cut[2] == -1) { - length_cut = 0 -} -exp_err = as.numeric(c(2,2)) # get from cargs? apparently not. - -list.files(input_dir) -sample_ids = basename(list.files(input_dir)) -print(sample_ids) - -r1_raw = sort(list.files(file.path(input_dir, sample_ids), pattern="_R?1.(fastq|fq)(.gz)?", full.names=TRUE)) -r2_raw = sort(list.files(file.path(input_dir, sample_ids), pattern="_R?2.(fastq|fq)(.gz)?", full.names=TRUE)) - -print(r1_raw) -print(r2_raw) - -#TODO: this only generates one plot -pdf("read_quality.pdf", useDingbats=FALSE, width=12, height=6) -for (i in 1:length(r1_raw)) { - print(plotQualityProfile(c(r1_raw[i], r2_raw[i]))) -} -dev.off() - -r1_filtered = file.path(output_dir, "filtered", basename(r1_raw)) -r2_filtered = file.path(output_dir, "filtered", basename(r2_raw)) - -print(r1_filtered) -print(r2_filtered) - - -out = filterAndTrim(r1_raw, r1_filtered, r2_raw, r2_filtered, truncLen=length_cut, - maxN=0, maxEE=exp_err, truncQ=2, rm.phix=TRUE, - compress=TRUE, multithread=TRUE) -write.table(out, file="filter_trim_table.tsv", sep="\t") - -# update read files -keep = file.exists(r1_filtered) & file.exists(r2_filtered) & out[,"reads.out"] > 100 -r1_filtered = r1_filtered[keep] -r2_filtered = r2_filtered[keep] -sample_ids = sample_ids[keep] - -r1_error = learnErrors(r1_filtered, multithread=TRUE) -r2_error = learnErrors(r2_filtered, multithread=TRUE) - -pdf("error_model.pdf", width=12, height=12, paper='special') -print(plotErrors(r1_error, nominalQ=TRUE)) -print(plotErrors(r2_error, nominalQ=TRUE)) -dev.off() - - -r1_uniq = derepFastq(r1_filtered, verbose=TRUE) -r2_uniq = derepFastq(r2_filtered, verbose=TRUE) -names(r1_uniq) = sample_ids -names(r2_uniq) = sample_ids - -r1_dada = dada(r1_uniq, err=r1_error, multithread=TRUE) -r2_dada = dada(r2_uniq, err=r2_error, multithread=TRUE) - -#Inspecting the returned dada-class object: -r1_dada[[1]] - - -merged = mergePairs(r1_dada, r1_uniq, r2_dada, r2_uniq, verbose=TRUE) - -# Inspect the merger data.frame from the first sample -head(merged[[1]]) - -seqtab = makeSequenceTable(merged) -n_OTU = dim(seqtab)[2] -# Inspect distribution of sequence lengths -table(nchar(getSequences(seqtab))) - -seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) -n_OTU_after_removing_chimeras = dim(seqtab.nochim)[2] -# relative abundance of the chimeric sequences -rel_ab_chimeras = 1 - (sum(seqtab.nochim)/sum(seqtab)) -table(nchar(getSequences(seqtab.nochim))) - -getN = function(x) sum(getUniques(x)) -track = cbind(out[keep,], sapply(r1_dada, getN), sapply(r2_dada, getN), sapply(merged, getN), rowSums(seqtab.nochim)) -colnames(track) = c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") -rownames(track) = sample_ids -head(track) -write.table(track, file = "summary_table.tsv", sep="\t") - -save(seqtab, seqtab.nochim, r1_error, r2_error, track, file = "result.RData") diff --git a/R_scripts/dada2_analysis.R b/R_scripts/dada2_analysis.R deleted file mode 100644 index a351b7f..0000000 --- a/R_scripts/dada2_analysis.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env Rscript - -library("dada2");packageVersion("dada2") -library("tidyverse") -library("cowplot") - -print("YYY") - -# handle command line args -args = commandArgs(trailingOnly=TRUE) -input_dir = args[1] -output_dir = args[2] - -filter_table = read.table(args[3]) - -if (length(args) >= 4) { - nthreads = as.numeric(c(args[4])) -} else { - nthreads = TRUE -} - -# get the read files and sample names -list.files(input_dir) -sample_ids = basename(list.files(input_dir)) -print(sample_ids) - -r1_filtered = sort(list.files(file.path(input_dir, sample_ids), pattern = "_R?1.(fastq|fq)(.gz)?", full.names = TRUE)) -r2_filtered = sort(list.files(file.path(input_dir, sample_ids), pattern = "_R?2.(fastq|fq)(.gz)?", full.names = TRUE)) -r1_filtered = sort(list.files(file.path(input_dir), pattern = "_R?1.(fastq|fq)(.gz)?", full.names = TRUE)) -r2_filtered = sort(list.files(file.path(input_dir), pattern = "_R?2.(fastq|fq)(.gz)?", full.names = TRUE)) - -print(r1_filtered) -print(r2_filtered) - -# learn error rate -r1_error = learnErrors(r1_filtered, multithread=nthreads) -r2_error = learnErrors(r2_filtered, multithread=nthreads) - -pdf("error_model.pdf", width=12, height=12, paper='special') -print(plotErrors(r1_error, nominalQ=TRUE)) -print(plotErrors(r2_error, nominalQ=TRUE)) -dev.off() - -# remove replicates -r1_uniq = derepFastq(r1_filtered, verbose=TRUE) -r2_uniq = derepFastq(r2_filtered, verbose=TRUE) -print(r1_uniq[[1]]) -print(c("LENGTHCHECK:", length(r1_uniq), length(r2_uniq), length(sample_ids))) -#names(r1_uniq) = sample_ids -#names(r2_uniq) = sample_ids -r1_dada = dada(r1_uniq, err=r1_error, multithread=nthreads) -r2_dada = dada(r2_uniq, err=r2_error, multithread=nthreads) - -#Inspecting the returned dada-class object: -r1_dada[[1]] - -# merge read pairs -print("merging") -merged = mergePairs(r1_dada, r1_uniq, r2_dada, r2_uniq, verbose=TRUE) -# Inspect the merger data.frame from the first sample -head(merged[[1]]) - -# construct sequence table -print("making sequence table") -seqtab = makeSequenceTable(merged) -n_OTU = dim(seqtab)[2] -# Inspect distribution of sequence lengths -print(dim(seqtab)) -print("ASV length") -print(table(nchar(getSequences(seqtab)))) - -# remove chimeras -seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=nthreads, verbose=TRUE) -n_OTU_after_removing_chimeras = dim(seqtab.nochim)[2] -print(dim(seqtab.nochim)) -asv.table = t(seqtab.nochim) - -# export asv table -asvs = tibble(id=paste0('ASV_', seq_len(nrow(asv.table))), - ASV=rownames(asv.table)) -write_tsv(asvs, 'ASVs.tsv') - -saved_rownames = rownames(asv.table) -rownames(asv.table) = asvs$id -write.table(asv.table, file = 'asv_table.tsv', - sep='\t', quote = FALSE, row.names = TRUE, col.names = TRUE) -rownames(asv.table) = saved_rownames - -# relative abundance of the chimeric sequences -rel_ab_chimeras = 1 - (sum(seqtab.nochim)/sum(seqtab)) -table(nchar(getSequences(seqtab.nochim))) - -# track reads -getN = function(x) sum(getUniques(x)) -track = cbind(filter_table, sapply(r1_dada, getN), sapply(r2_dada, getN), sapply(merged, getN), rowSums(seqtab.nochim)) -print(c("TRACK", length(track), length(sample_ids))) -colnames(track) = c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") -# rownames(track) = sample_ids -print(head(track)) -print("Filtering") -print(summary(track[,6] / track[,1])) -write.table(track, file = "summary_table.tsv", sep="\t") - -save(seqtab, seqtab.nochim, r1_error, r2_error, track, file = "result.RData") - -# prevalance sanity check -pdf('dada2_figures.pdf') -temp = prop.table(asv.table, 2) -print(temp) -hist(log10(temp), 100, main='abundance') -prev = rowMeans(asv.table!=0) -hist(prev, 50, main='prevalence') -mean.ab = rowMeans(log10(temp + 1e-05)) -hist(mean.ab, 50, main='log.abundance') -print("Prevalence") -print(summary(prev)) -print("Mean abundance") -print(summary(mean.ab)) -plot(prev, mean.ab, main='prevalence vs abundance') -plot(nchar(rownames(asv.table)), prev, main='ASV length vs prevalence') -dev.off() diff --git a/R_scripts/dada2_analysis_paired.R b/R_scripts/dada2_analysis_paired.R index a351b7f..f01ed3d 100644 --- a/R_scripts/dada2_analysis_paired.R +++ b/R_scripts/dada2_analysis_paired.R @@ -19,6 +19,9 @@ if (length(args) >= 4) { nthreads = TRUE } +dada2_chimera_method = args[5] # +dada2_chimera_min_fold_parent_over_abundance = as.numeric(c(args[6])) + # get the read files and sample names list.files(input_dir) sample_ids = basename(list.files(input_dir)) @@ -70,7 +73,8 @@ print("ASV length") print(table(nchar(getSequences(seqtab)))) # remove chimeras -seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=nthreads, verbose=TRUE) +#seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=nthreads, verbose=TRUE) +seqtab.nochim = removeBimeraDenovo(seqtab, method=dada2_chimera_method, minFoldParentOverAbundance=dada2_chimera_min_fold_parent_over_abundance, multithread=nthreads, verbose=TRUE) n_OTU_after_removing_chimeras = dim(seqtab.nochim)[2] print(dim(seqtab.nochim)) asv.table = t(seqtab.nochim) @@ -103,19 +107,19 @@ write.table(track, file = "summary_table.tsv", sep="\t") save(seqtab, seqtab.nochim, r1_error, r2_error, track, file = "result.RData") -# prevalance sanity check -pdf('dada2_figures.pdf') -temp = prop.table(asv.table, 2) -print(temp) -hist(log10(temp), 100, main='abundance') -prev = rowMeans(asv.table!=0) -hist(prev, 50, main='prevalence') -mean.ab = rowMeans(log10(temp + 1e-05)) -hist(mean.ab, 50, main='log.abundance') -print("Prevalence") -print(summary(prev)) -print("Mean abundance") -print(summary(mean.ab)) -plot(prev, mean.ab, main='prevalence vs abundance') -plot(nchar(rownames(asv.table)), prev, main='ASV length vs prevalence') -dev.off() +## prevalance sanity check +#pdf('dada2_figures.pdf') +#temp = prop.table(asv.table, 2) +#print(temp) +#hist(log10(temp), 100, main='abundance') +#prev = rowMeans(asv.table!=0) +#hist(prev, 50, main='prevalence') +#mean.ab = rowMeans(log10(temp + 1e-05)) +#hist(mean.ab, 50, main='log.abundance') +#print("Prevalence") +#print(summary(prev)) +#print("Mean abundance") +#print(summary(mean.ab)) +#plot(prev, mean.ab, main='prevalence vs abundance') +#plot(nchar(rownames(asv.table)), prev, main='ASV length vs prevalence') +#dev.off() diff --git a/R_scripts/dada2_analysis_single.R b/R_scripts/dada2_analysis_single.R index 6787896..c9845a7 100644 --- a/R_scripts/dada2_analysis_single.R +++ b/R_scripts/dada2_analysis_single.R @@ -19,6 +19,9 @@ if (length(args) >= 4) { nthreads = TRUE } +dada2_chimera_method = args[5] # +dada2_chimera_min_fold_parent_over_abundance = as.numeric(c(args[6])) + # get the read files and sample names list.files(input_dir) sample_ids = basename(list.files(input_dir)) @@ -56,7 +59,8 @@ print("ASV length") print(table(nchar(getSequences(seqtab)))) # remove chimeras -seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=nthreads, verbose=TRUE) +#seqtab.nochim = removeBimeraDenovo(seqtab, method="consensus", multithread=nthreads, verbose=TRUE) +seqtab.nochim = removeBimeraDenovo(seqtab, method=dada2_chimera_method, minFoldParentOverAbundance=dada2_chimera_min_fold_parent_over_abundance, multithread=nthreads, verbose=TRUE) n_OTU_after_removing_chimeras = dim(seqtab.nochim)[2] print(dim(seqtab.nochim)) asv.table = t(seqtab.nochim) @@ -78,6 +82,9 @@ table(nchar(getSequences(seqtab.nochim))) # track reads getN = function(x) sum(getUniques(x)) +print(r1_dada) +print(filter_table) +print(seqtab.nochim) track = cbind(filter_table, sapply(r1_dada, getN), rowSums(seqtab.nochim)) print(c("TRACK", length(track), length(sample_ids))) colnames(track) = c("input", "filtered", "denoisedF", "nonchim") @@ -89,19 +96,19 @@ write.table(track, file = "summary_table.tsv", sep="\t") save(seqtab, seqtab.nochim, r1_error, track, file = "result.RData") -# prevalance sanity check -pdf('dada2_figures.pdf') -temp = prop.table(asv.table, 2) -print(temp) -hist(log10(temp), 100, main='abundance') -prev = rowMeans(asv.table!=0) -hist(prev, 50, main='prevalence') -mean.ab = rowMeans(log10(temp + 1e-05)) -hist(mean.ab, 50, main='log.abundance') -print("Prevalence") -print(summary(prev)) -print("Mean abundance") -print(summary(mean.ab)) -plot(prev, mean.ab, main='prevalence vs abundance') -plot(nchar(rownames(asv.table)), prev, main='ASV length vs prevalence') -dev.off() +## prevalance sanity check +#pdf('dada2_figures.pdf') +#temp = prop.table(asv.table, 2) +#print(temp) +#hist(log10(temp), 100, main='abundance') +#prev = rowMeans(asv.table!=0) +#hist(prev, 50, main='prevalence') +#mean.ab = rowMeans(log10(temp + 1e-05)) +#hist(mean.ab, 50, main='log.abundance') +#print("Prevalence") +#print(summary(prev)) +#print("Mean abundance") +#print(summary(mean.ab)) +#plot(prev, mean.ab, main='prevalence vs abundance') +#plot(nchar(rownames(asv.table)), prev, main='ASV length vs prevalence') +#dev.off() diff --git a/R_scripts/dada2_preprocess.R b/R_scripts/dada2_preprocess.R deleted file mode 100644 index 5c9b975..0000000 --- a/R_scripts/dada2_preprocess.R +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env Rscript - -library("dada2");packageVersion("dada2") -library("tidyverse") -library("cowplot") - -MIN_READ_THRESHOLD = 110 - -# handle command line args -args = commandArgs(trailingOnly=TRUE) -input_dir = args[1] -output_dir = args[2] - -length_cut = as.numeric(c(args[3], args[4])) -print(length_cut) -if (length_cut[1] == -1 || length_cut[2] == -1) { - length_cut = 0 -} -exp_err = as.numeric(c(2,2)) # get from cargs? apparently not. - -if (length(args) >= 5) { - nthreads = as.numeric(c(args[5])) -} else { - nthreads = TRUE -} - -# get the read files and sample names -list.files(input_dir) -#sample_ids = basename(list.files(input_dir)) -#print(sample_ids) - -r1_raw = sort(list.files(input_dir, pattern = "_R?1.(fastq|fq)(.gz)?", full.names = TRUE)) -r2_raw = sort(list.files(input_dir, pattern = "_R?2.(fastq|fq)(.gz)?", full.names = TRUE)) -print(r1_raw) -print(r2_raw) - -# get the quality profiles -x.f = plotQualityProfile(r1_raw, aggregate = TRUE) -x.r = plotQualityProfile(r2_raw, aggregate = TRUE) -g = plot_grid(x.f, x.r, labels = c('forward', 'reverse')) -ggsave(g, filename = "read_quality.pdf", - width = 12, height = 7, useDingbats=FALSE) - -# perform filtering and trimming -r1_filtered = file.path(output_dir, basename(r1_raw)) -r2_filtered = file.path(output_dir, basename(r2_raw)) - -print(r1_filtered) -print(r2_filtered) - -out = filterAndTrim(r1_raw, r1_filtered, r2_raw, r2_filtered, truncLen=length_cut, - maxN=0, maxEE=exp_err, truncQ=2, rm.phix=TRUE, - compress=TRUE, multithread=nthreads) -# colnames(out) = c("sample", "reads.in", "reads.out") -rownames(out) = str_replace(rownames(out), "_R?[12].(fastq|fq)(.gz)?", "") -write.table(out, file="filter_trim_table.tsv", sep="\t") - -# remove -keep = file.exists(r1_filtered) & file.exists(r2_filtered) & out[,"reads.out"] >= MIN_READ_THRESHOLD -r1_remove = r1_filtered[!keep] #file.exists(r1_filtered) & out["reads.out"] < MIN_READ_THRESHOLD] -r2_remove = r2_filtered[!keep] #file.exists(r2_filtered) & out["reads.out"] < MIN_READ_THRESHOLD] -sapply(r1_remove, file.remove) -sapply(r2_remove, file.remove) - -out = out[keep,] -write.table(out, file="filter_trim_table.final.tsv", sep="\t") - -r1_filtered = r1_filtered[keep] -r2_filtered = r2_filtered[keep] - -# get the quality profiles post-qc -x.f = plotQualityProfile(r1_filtered, aggregate = TRUE) -x.r = plotQualityProfile(r2_filtered, aggregate = TRUE) -g = plot_grid(x.f, x.r, labels = c('forward', 'reverse')) -ggsave(g, filename = "read_quality_postqc.pdf", - width = 12, height = 7, useDingbats=FALSE) - -# update read files -#keep = file.exists(r1_filtered) & file.exists(r2_filtered) & out[,"reads.out"] > 100 -#r1_filtered = r1_filtered[keep] -#r2_filtered = r2_filtered[keep] -#sample_ids = sample_ids[keep] diff --git a/R_scripts/dada2_preprocess_single.R b/R_scripts/dada2_preprocess_single.R index 83e9945..8ecd75e 100644 --- a/R_scripts/dada2_preprocess_single.R +++ b/R_scripts/dada2_preprocess_single.R @@ -55,7 +55,7 @@ keep = file.exists(r1_filtered) & out[,"reads.out"] >= MIN_READ_THRESHOLD r1_remove = r1_filtered[!keep] #file.exists(r1_filtered) & out["reads.out"] < MIN_READ_THRESHOLD] sapply(r1_remove, file.remove) -out = out[keep,] +out = out[keep,,drop=FALSE] write.table(out, file="filter_trim_table.final.tsv", sep="\t") r1_filtered = r1_filtered[keep] diff --git a/assets/adapters.fa b/assets/adapters.fa new file mode 100755 index 0000000..a87d425 --- /dev/null +++ b/assets/adapters.fa @@ -0,0 +1,317 @@ +>Reverse_adapter +AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Universal_Adapter +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>pcr_dimer +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG +>PCR_Primers +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTCAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +>TruSeq_Adapter_Index_1_6 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_2 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_3 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_4 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_5 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_6 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_7 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_8 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_9 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_10 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_11 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_12 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_13 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_14 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_15 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_16 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_18_7 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCACATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_19 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_20 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_21 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_22 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_23 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTGGATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_25 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_27 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTATCTCGTATGCCGTCTTCTGCTTG +>I5_Nextera_Transposase_1 +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>I7_Nextera_Transposase_1 +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>I5_Nextera_Transposase_2 +CTGTCTCTTATACACATCTCTGATGGCGCGAGGGAGGC +>I7_Nextera_Transposase_2 +CTGTCTCTTATACACATCTCTGAGCGGGCTGGCAAGGC +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]501 +GACGCTGCCGACGAGCGATCTAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]502 +GACGCTGCCGACGAATAGAGAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]503 +GACGCTGCCGACGAAGAGGATAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]504 +GACGCTGCCGACGATCTACTCTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]505 +GACGCTGCCGACGACTCCTTACGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]506 +GACGCTGCCGACGATATGCAGTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]507 +GACGCTGCCGACGATACTCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]508 +GACGCTGCCGACGAAGGCTTAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]517 +GACGCTGCCGACGATCTTACGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N701 +CCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N702 +CCGAGCCCACGAGACCGTACTAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N703 +CCGAGCCCACGAGACAGGCAGAAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N704 +CCGAGCCCACGAGACTCCTGAGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N705 +CCGAGCCCACGAGACGGACTCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N706 +CCGAGCCCACGAGACTAGGCATGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N707 +CCGAGCCCACGAGACCTCTCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N708 +CCGAGCCCACGAGACCAGAGAGGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N709 +CCGAGCCCACGAGACGCTACGCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N710 +CCGAGCCCACGAGACCGAGGCTGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N711 +CCGAGCCCACGAGACAAGAGGCAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N712 +CCGAGCCCACGAGACGTAGAGGAATCTCGTATGCCGTCTTCTGCTTG +>I5_Primer_Nextera_XT_Index_Kit_v2_S502 +GACGCTGCCGACGAATAGAGAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S503 +GACGCTGCCGACGAAGAGGATAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S505 +GACGCTGCCGACGACTCCTTACGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S506 +GACGCTGCCGACGATATGCAGTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S507 +GACGCTGCCGACGATACTCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S508 +GACGCTGCCGACGAAGGCTTAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S510 +GACGCTGCCGACGAATTAGACGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S511 +GACGCTGCCGACGACGGAGAGAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S513 +GACGCTGCCGACGACTAGTCGAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S515 +GACGCTGCCGACGAAGCTAGAAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S516 +GACGCTGCCGACGAACTCTAGGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S517 +GACGCTGCCGACGATCTTACGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S518 +GACGCTGCCGACGACTTAATAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S520 +GACGCTGCCGACGAATAGCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S521 +GACGCTGCCGACGATAAGGCTCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S522 +GACGCTGCCGACGATCGCATAAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Primer_Nextera_XT_Index_Kit_v2_N701 +CCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N702 +CCGAGCCCACGAGACCGTACTAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N703 +CCGAGCCCACGAGACAGGCAGAAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N704 +CCGAGCCCACGAGACTCCTGAGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N705 +CCGAGCCCACGAGACGGACTCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N706 +CCGAGCCCACGAGACTAGGCATGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N707 +CCGAGCCCACGAGACCTCTCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N710 +CCGAGCCCACGAGACCGAGGCTGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N711 +CCGAGCCCACGAGACAAGAGGCAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N712 +CCGAGCCCACGAGACGTAGAGGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N714 +CCGAGCCCACGAGACGCTCATGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N715 +CCGAGCCCACGAGACATCTCAGGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N716 +CCGAGCCCACGAGACACTCGCTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N718 +CCGAGCCCACGAGACGGAGCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N719 +CCGAGCCCACGAGACGCGTAGTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N720 +CCGAGCCCACGAGACCGGAGCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N721 +CCGAGCCCACGAGACTACGCTGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N722 +CCGAGCCCACGAGACATGCGCAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N723 +CCGAGCCCACGAGACTAGCGCTCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N724 +CCGAGCCCACGAGACACTGAGCGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N726 +CCGAGCCCACGAGACCCTAAGACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N727 +CCGAGCCCACGAGACCGATCAGTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N728 +CCGAGCCCACGAGACTGCAGCTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N729 +CCGAGCCCACGAGACTCGACGTCATCTCGTATGCCGTCTTCTGCTTG +>I5_Adapter_Nextera +CTGATGGCGCGAGGGAGGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Adapter_Nextera_No_Barcode +CTGAGCGGGCTGGCAAGGCAGACCGATCTCGTATGCCGTCTTCTGCTTG +>Nextera_LMP_Read1_External_Adapter +GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +>Nextera_LMP_Read2_External_Adapter +GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>RNA_Adapter_(RA5)_part_#_15013205 +GATCGTCGGACTGTAGAACTCTGAAC +>RNA_Adapter_(RA3)_part_#_15013207 +CCTTGGCACCCGAGAATTCCA +>Stop_Oligo_(STP)_8 +CCACGGGAACGTGGTGGAATTC +>RNA_RT_Primer_(RTP)_part_#_15013981 +TGGAATTCTCGGGTGCCAAGGC +>RNA_PCR_Primer_(RP1)_part_#_15013198 +TCGGACTGTAGAACTCTGAACGTGTAGATCTCGGTGGTCGCCGTATCATT +>RNA_PCR_Primer_Index_1_(RPI1)_2,9 +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_2_(RPI2) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_3_(RPI3) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_4_(RPI4) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_5_(RPI5) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_6_(RPI6) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_7_(RPI7) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_8_(RPI8) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_9_(RPI9) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_10_(RPI10) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_11_(RPI11) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_12_(RPI12) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_13_(RPI13) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACAGTCAAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_14_(RPI14) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACAGTTCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_15_(RPI15) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATGTCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_16_(RPI16) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCCGTCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_17_(RPI17) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTAGAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_18_(RPI18) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_19_(RPI19) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTGAAAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_20_(RPI20) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTGGCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_21_(RPI21) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTTTCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_22_(RPI22) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGTACGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_23_(RPI23) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGAGTGGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_24_(RPI24) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGGTAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_25_(RPI25) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_26_(RPI26) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_27_(RPI27) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATTCCTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_28_(RPI28) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAAAAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_29_(RPI29) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAACTAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_30_(RPI30) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACCGGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_31_(RPI31) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACGATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_32_(RPI32) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACTCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_33_(RPI33) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAGGCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_34_(RPI34) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCATGGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_35_(RPI35) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCATTTTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_36_(RPI36) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCCAACAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_37_(RPI37) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGGAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_38_(RPI38) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTAGCTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_39_(RPI39) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTATACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_40_(RPI40) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTCAGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_41_(RPI41) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGACGACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_42_(RPI42) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTAATCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_43_(RPI43) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTACAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_44_(RPI44) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTATAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_45_(RPI45) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCATTCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_46_(RPI46) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCCCGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_47_(RPI47) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCGAAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_48_(RPI48) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCGGCAATCTCGTATGCCGTCTTCTGCTTG +>PhiX_read1_adapter +AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTGAAA +>PhiX_read2_adapter +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAA +>Bisulfite_R1 +AGATCGGAAGAGCACACGTCTGAAC +>Bisulfite_R2 +AGATCGGAAGAGCGTCGTGTAGGGA +>Illumina Small RNA v1.5 3p Adapter +ATCTCGTATGCCGTCTTCTGCTTG +>Illumina RNA 3p Adapter (RA3) +TGGAATTCTCGGGTGCCAAGG +>Illumina RNA 5p Adapter (RA5) +GTTCAGAGTTCTACAGTCCGACGATC +>Illumina 3p RNA Adapter +TCGTATGCCGTCTTCTGCTTGT + diff --git a/config/run.config b/config/run.config index f48623b..05baa3a 100644 --- a/config/run.config +++ b/config/run.config @@ -4,6 +4,28 @@ params { min_overlap = 20 left_primer = 0 right_primer = 0 + + /* + bbduk qc parameters + s. https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/bbduk-guide/ + qtrim=rl trimq=3 : gentle quality trimming (only discard bases < phred 3; phred 2 = junk marker) on either side (rl) of the read + maq=25 : discard reads below average quality of pred 25 + minlen=45 : discard reads < 45bp + ref=?? ktrim=r k=23 mink=11 hdist=1 tpe tbo : right-side k-mer based adapter clipping with 1 mismatch allowed, try overlap-detection (tbo), and trim pairs to same length (tpe) upon adapter detection + ftm=5 : get rid off (n*5)+1st base (last sequencing cycle illumina garbage) + entropy=0.5 entropywindow=50 entropyk=5 : discard low complexity sequences + */ + + qc_params_primers = "qtrim=rl trimq=3 ktrim=l k=14 mink=1 hdist=1 cu=t" + qc_params_adapters = "qtrim=rl trimq=3 ktrim=l k=23 mink=1 hdist=1 tpe tbo cu=t" + qc_minlen = 100 + + mapseq_bin = "mapseq" + mapseq_db_path = projectDir + mapseq_db_name = "" + + dada2_chimera_method = "consensus" // can be "consensus" (default dada2 since 1.4) or "pool" + dada2_chimera_min_fold_parent_over_abundance = 2 } /* section below needs to be adjusted to local cluster */ @@ -19,39 +41,69 @@ executor { process { cache = 'lenient' - withName: ltrim { + withName: figaro { + container = "oras://ghcr.io/zellerlab/gaga2:latest" + // conda = "/g/scb/zeller/schudoma/software/conda/figaro" executor = "slurm" errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} - memory = '8.GB' + memory = '16.GB' time = '4d' maxRetries = 3 + cpus = 8 } - withName: figaro { + withLabel: dada2 { container = "oras://ghcr.io/zellerlab/gaga2:latest" - // conda = "/g/scb/zeller/schudoma/software/conda/figaro" executor = "slurm" errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} - memory = '16.GB' + memory = '16.GB' time = '4d' maxRetries = 3 + cpus = 8 + } + withLabel: bbduk { + container = "oras://ghcr.io/zellerlab/gaga2:latest" + executor = "slurm" + errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} + cpus = 4 + memory = {8.GB * task.attempt} + time = '2h' + maxRetries = 3 + } + withName: fastqc { + container = "oras://ghcr.io/zellerlab/gaga2:latest" + executor = "slurm" + errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} + cpus = 2 + memory = {4.GB * task.attempt} + time = '7d' + maxRetries = 3 } - withName: dada2_preprocess { + withName: mapseq_otutable { container = "oras://ghcr.io/zellerlab/gaga2:latest" executor = "slurm" errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} - memory = '16.GB' - time = '4d' + cpus = 1 + memory = {2.GB * task.attempt} + time = '24h' maxRetries = 3 - cpus = 8 } - withName: dada2_analysis { + withName: collate_mapseq_tables { container = "oras://ghcr.io/zellerlab/gaga2:latest" executor = "slurm" errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} - memory = '16.GB' - time = '4d' + cpus = 1 + memory = {2.GB * task.attempt} + time = '24h' maxRetries = 3 + } + withName: mapseq { + container = "oras://ghcr.io/zellerlab/gaga2:latest" + executor = "slurm" + errorStrategy = {task.attempt <= 3 ? "retry" : "ignore"} cpus = 8 + memory = {8.GB * task.attempt} + time = '7d' + maxRetries = 3 } } diff --git a/docs/img/gaga2_flow.png b/docs/img/gaga2_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..51cc16864bbb1c9fd1e210e90c3824dac690da4d GIT binary patch literal 49291 zcmdSBgO?EVwlikq;OFOOW9MY!&jxOVJe(C!v@{q!gy`n$L;2w47m3b+hHq6s>a{6ET>BR+$>cw9LiQ&AxP z@AC|PjT!VA^uNyr=g<|G8Njyd(IZ9#ch40b_G=eY~?K`JbDC z;UfKicDUX$BRvc}7$uBMXx3;bg%F<`*ISBuq(ob=alc`QXb&n7^)OR3C~HX+3hHJH zAMWLjhCfw48D~-EnJcnLQy2TC%32VM`@H5QlbQJw7JE%K8?ytAPV%ZsU|(BaZ`p zs88H*(#I#ctYNZ0i8r$N4A-}1wOY0G%<J$3u25FS_*M4B>vj{Ii`>VUSp7s!%RpF>3{F@!!Dt`x+f=s5;cP@aAxm%z zP#P#}y5C;LmMnof#JJx0>M?!~xyF^^KEIVn4{9SuZF`A%Bj3L96Ix50>auLP|Ezj!f0zx{afOa9SNk;+ z-eB=6BdcpO?#GU$fhMUzf<-p8y_&o8QE>{2LL`a^8UMG`<<&#tu`{j8fJ;|^C z^CQ5)OG89{wjd=mBaIn2Z}I7gCJXnKJbpXb?p&8pJ1s~Lzc}clFoS*-OAPz^uz0^P zaY_eYkZ&$36Ws3mJSr&N#y{nFB-AI1>OR)(brX5<>71a@DYOXp?B`%eJmJ3OR@vc4 zw4ISx{~gp`>>g9Gl&^s0&2!Z(jT5tQ-=Hy>tRhzDHu?&w5MYzQ*?%Rtyt2N$Oh3JC zKT~H5#eZ>{xTHVK{%*c=Q)r!NBlYb4iI)=sgmIuzpeC05TdPpFikg{WjLxCucu|H$ z3GTVp;@o&ap6`c=f|mcuSsvGxD%K!d~Xx zZK3mYZNB7@xVP5!v*&IN)eAbCZxxw2;F6XirPi6XQ{z>+8=iUvgaR}A`Lktu9Yvb8RyA!;iKknBpOwBa#AjCfh_CGsjX?&PzP>oHmIy#cSqvZKU7RO# z-O7BiURey?E$?pkyjXCvc0JJxB*kmiwC!}+OvUdTNL)^Z5|3InZ0Fg`mo^_yME$k1 zdUsMq#gMHW7wFF1UX&cN2bbffEl-vh#iT7$SZSZKKChQEK)3bpPF%m#Gs<+)k`(Jl zNPDyQw$wQ&!4$jplbi=Rc~y4V_y!sM{LeRNR)F?O42JacG?vTj zMWf;djdxSrFrl1pQ7nvz_&U(cRJ%ULfWzLGuYP#l-z*;L@lCt!*V99hx%y4(O|cVs zt<&~u7d_9x^_FeXU*vWf%Ly1?b<6DRW#!-djR!97hUfnDQl8<#K zaT>m1P}2Fmvn0G7j={&}aeZntZ?Sm)6jJmZ57>acjbtb`k!;O=qiY@pccUxb_xR@( z4yo*))2=?wN?;GB@L8RhTE>;y%5sxP3Nb|DC>(1~yvkzTzq?Cy^F#hY5x__=xML|e zEjV&F8R&g31~GEq_~9Z~uEb)pU7c_<22UuiDt1fIbaWm*Ic}yT;a17mrh0a{DROU0 zpJF5KY7O&i+^b10t{QvOwR4o9qEBO{2!b6vi)lxnH*$;dC;{ ztNI;l6gFd&Jj1`4V$gDbcTjAapk}0Dg0miO|MR*LQmX>YQn$-4th8W#*Kfj-`v%Sgxo`T5b_4T&CPP~1c6?nJ zNaw^4LxtM}osWCz^O@>S$7J`mvLhAPxGE2tZ=PCHW}!qbBwIHV>a?xCi{}#nvpYKZ1y+=>$>e# zi)lrY@N-u7=(F4d%U6ovF(r}T(JShlm8OdYG@ZRPA-8}ZY%RAHz-s38^SlM7h~r$_ zFizQGHJlf7E;!h0Bgr_-$9>7F&5y!13=n9kY&TKg74L)o!qFnS*Nf$1?r!6xC9Fd%~SQmoQ)zspTO(QL%cRse5KEInq zU|p99Zn-{v>~^}4yz5)5TkW!Msb$#uwdU>svr5c7yh^1QKV&7VX5nf`a7o@{@X4V= z=|rL9rX#pBH!aO_Qc*=Mn0WBN}W}mG6 zp`z&@h(boj?#FNF60|j%9f!mmXCA^7}s~s&isF($7>wrZU1^182f^EDXAAkTIb!Q$UvIkSIMn!r=Xz-F>M zr&X6A?>Z=A0IA9n@5#l=N^-@40=7PW1H`xjLg|)9fCE5V;H|Mex&M80P>DOGy7C(gk6yKN>DI99fWT7t9c0_(z>Q~li|SqzH>_@ zQz>k0!KFLU!Fo+?aXRl;9`6Fw0DJ{?zp{WGb&182*|Z`!b>gZbu^kXw!}R)@E9Jlq zkZ)6;f|ehgE05LTzch!n2bse9AQmGCamX>1!@`YuzpCXQ zB9*VS-YG|5u1Bx8r(bKC4`Cs4k-k`7;o{TE21y335K8TEV)PF*@)-(S9Gd3gx{&B? zy}BrrCcD4mJ>OGJZ!m@`G`!@^RupmXrn@~XE*>$)Wil;)D~j*>BA+T!Q4(z5vCdZ) zEYv&$?z9&&KK7d``qs_P^b4kOwx z|4u=0FV@On1AQUCo7@WmH=;s4dP)gV(Xy3XbvzQj*~M{PG~Q3B4|9%y%IVv>&<+$T zLu?xlnj80ECqtQ+i@(on*>j7bb>AxiXX$kgxT9I%s`;YljU^wsj^TLTa!26Yg4=RqM#as~81=-Zal^)0@K%ED2_zR4kNCHRqp>zQ+8M z5_f)|4z2zEL`=uwqC=pSW5)D4=Syn*L{?6GOc-_txzD8>XaNzw9ZxFi#MC=~{#cC5 zN3|}mF!|VyFfHNOY|N3e;;`bw`S|5C#6$^K&2KN{WR9frTks@RcxIZE##_#zD{)P| zv_EP#z899Dl6Gw^r@e%|a(Ldc6TG<<#x(>=u17G!fkQja7rpMy1R0NJW2YJ?d{wuA ztx1-wWDL^19>-Iv920;XfvJ#eQ6ZDq#beoWcfOr$R{>dIm_5DTDWzA<-xRrQmH42= zaPlH?YDkVNd>7l1dxa*3_i{jW)3NyVdc3gr*x1+z$kWT+TBbPAvT9VStx~221x~(# zolSzJ;(iDl8~uXJq3;N|;$IZcj#4IRSS2JeZUaZs7y|e|?5-t*;Ts8Ig{fJp(|*w- zij7=!A@k!HN~m)S3k0<*T#h6~FTR?K6($2~jO6`Sh;Zs=s)N-%?ep zw=0rZbD7pDPV|XvT*hPC~eee03;3%f5NzGllXlOnPwAn{mG*HhCWyb8;Po$}s0h zSLl-nY{54|^Euce&_OUSbO7(iRuL{C*t}u4qMy}FdDfZ{C&(u%jPf0NH{IQNZ3{-d z{}rp07)dQhZU=#mlCb+?uYnxdqIpTZQ9*Z{v6H?;j{yutMnfxIE5TCOeknR-oOciWzJk^0f0%J>~{Oyt2n4{A?T zP1pD!?*&^+H}Vfw5^NI8S=P+S76tsJuH7|(gBxBIZ%>W*-}S|LZu+b*}RyeN5eQ}VXbC8TM8hV7c;shhME+Y8Jo*PcTRiz{KKxr;^j<+R-T zNgy1b%-%d!(GnkS_E!-ezwqeqyvPapInt}gY+M@Sa=kzqBpV{6TJGGHd>7MFd&;-s z-+h?$5x&?p)eh2wsU-!}(XvJnYd8kr^M`0bvfy_{t`pPvrGyKMKU8&vKE$zvAuNNf z*Vdrnw*9>Z)K*9EmA%2|2<0SXmMe)ID+$3@VGF<10vJo32_3^~M$yQl2aoV+z1W*B z3|=$yk+|)a_ez)wcY~s2yloKM$v9b1=PtE3Tw~U~r)Lu5sOqt-sV_aTb(?cA+QK^% zH`VU(buU}=o=-7oRNzJub*SC9K?lUnl?VCT@ zyRh@_+qpt5Pj*=;;J-il7U-ZB694w4P9&Ki;)aqli=8%3ol`iPzz8y?cKW^Zjx(Rws!neUDYjmib=)nL z$M$j8D#iJqMroCUtN7&F{n5IH}u~iz>D8HfkY0< zr+hU;nKDmNbgstRo(lp|V25-3Tk^5JPF|d#kNUyEOdEr;5-{!{N@B>gEhLPW1%Ai- zBor&5ijin<%@Vbxn!uc-*7VQdUWrohLf~A@#o!jdT9->PaJv>MQoa{zFT%VT_kEzd z_fOk9|MOjgi|KiOT+%UN*Gu)ehKp!Dim$byFS!?H^XS4;@(mD98BShBnz{)eY7RPuhR1Y7u z5i|bASaz0||{_sjQ_h1co& z7nokg*09_3H>P8K{r$UF^ar)17ZA9B6WP{&`})8Jw(picPcqBpyv27nJAvD_BJQlZ zm?`-y8_{&vTZtZNj=84Z1u?5%Ec>MAHYmxu@B9P|-fjF;lcS84*sVMeu?|r$$hTuE z2~y4*cunpBry*z>GUt+N`(DbSPpm^JJf9)sB4zSqVa}x77o#5(X@c>C&I;D6v+pO( z?y7q1XkQ^;E;N(p0RA+B2x}uKj}0Od7y48$@dzRKBej=J-cg=U5gkmPlE%k;h!}^g zi*r#RDlO^&DtFM`L5ig@A)6Q9;3tbt%)?yeZ~D@XET)-_sC(mrK(KN&c+VS|41^Wv zpkW|HIxQ7OfIarLvw@-8~tXf}M*QPbTjN8=!%l9g}n6YOERY{yYbS*`cwWjC)(GT&QZH4@A) zyW^S?T)f-53eh<_3^cyWz;P#6ME3u6hSk73K^}jlw9Imz6td@uBR817f7I22N!3ax z5F^V5Eer6Kq*%!G2o{z1MR&m1JyJpTMqKr$r zvppx?z3$W=S2_;qe)oHfiJSVtce;pD&?DKRM!uMRrpb5aCeukZ{+<&*r${?c0oe1H zro&8?AWp-Q--&rsb<(i2JOYRIKb}K{#|r&*tn}>9ydY$J+&d_$al`80za@9Gx)IgJ zhR(#gDfPpYwD>6rS)FixxgrEIpG;WvYMAPFA&Y4>e{{;V>rGqc*k@Ol-*C^{8lI1s zUi)F=@U8nQJGw-_cZwS@Y*}Ha?w_=~0$St%HRoiR8oGXfJ|c zTkS9lL};zP9F0#%;ODO^MLX|H`~H_kUPJay0mGNy*L@44;yW3O9d;e>UQD#oZe%kM z+~c0LYTQ6fKJL$1!nT@f_T8&ESfQ1kire&BufXf|6Subqc6DI8+ zU9Xp*?Zlz;{Z7*=e5 zq$xaD`TBP5Js0`;m3pY-4_`2={LCrgI?qOv%XtqD*RF4*(Oe2YSw#6rm#4^ZZLOVH ziZ?#)F5(1gQ|fP-*{8%7;1!+Tvet~}L9nl!@*}4C0c?zw{}k5;>hNgrh!qD_4SrGNCbF%cb(W#>l6tsmp}nzFMD{VopmY;{{NLEyupBBu;z? z6AdhH&YSN+XY$xgb*3x){!%xc$8f`3enTqny#Vr9rHD~?t85rS0zrwN7TRA~N#Lg= zO%#q#U70eL2LEGM{p8tinWthBp#u|d$zo9wT8E7sZ_;KFF6jh-J=z+k$m&ZMC- zgNse_x}Iv+856Kje1eUa?pTFumSo(a;$}Ww57HOmS>bfQRh0HuDetKxfZ8QJJ(62klw|)U z3K~vIuDB3MF;*1fo$7)K&gIE5Im#z%Bhh1x=Q(C}Gv7DRLCfzHT^RLn7iLNfRTV9@ zW+dcH;iMAO@s9B=tqzeMa$MhhG|OMIkgFZFeuTGQ)l$)g~V}t&cWf7vAuc?K$zm!m;dTl%+xi;DO*v@DKkyEaU!wB%vj{Exx=>Z7bdS2+_!b>`lsGx~a`3jSZ|a9C zX610v%=-D+G^g8oxXbzN{l-irjuuj5l;z%0uXbpN<#*%M!~XWZg{|@VFt>Vb1u*msjD1HPwe=M zMK3S4aD2XToPnnmic8UkHWBti{PeMgX#-d?)Cp|9Aa)YgOe~P{*jL(c_d;k%uaUQ5 zO>o#VWD*-uaFfOB=dbk!>|U;_)it{(ZRg9!Ai~iCWFOz)f}$El3Bw9#&Rc1AtL{pBV>-B4qkf z*>7fBy&p?huv1$tuN!_UV^G*0NnpVsE{6xbm#TIBeYGiphc@e{n#@z&^85OH$rmdR zD`tj{XCAs5{*XB*%p@8uByrwaY-!nRzS-BCHhhXfk|E?{gnq%E&9wnjZ!0 zVd7j?M4_v3y|WOxO&_WTjen{02YZAR%mh*QYojT%_+yv6q!Rsh%C7s^is9yCIqZX> zS}YN!>U6qzWbAqBAAOTd+jXCRhZTGat9uV&rUKeNKzxCFa&Vv!V_05 zEU(AYe}4kdoH9Nn&P5w$1?^eYKYJU+1q#sPnk+n(Y5UkXMbD`wcnN3!&&Cj6Tr7*+ zNH(Mc8-M98F3pX!RnADE-?sXF&DWE=zKZ>O9m7gIN1JxZT#W;$c(F6blaoA*`zD3e zdnQcvp#2Xcv#BG}ONi!%EO)C}(;Cx}!msShWmmsie7skgY?8G#!SfQI_N%4j4$RCB zEiT(3Vht)pRTEeNA{&?WxDNI*j?;_=18jR@%d;PZKxBA;8%+_4c)>TU+Qn_F+ zFTge=NZp5nwIc&AjsX}E%UYoaB7XTeRsMAZi7CwXTQ323^m()fyD22+NXn;-V&Rnq z0Gw_|l$4V!D7`24yxFUz+zrAe4^CGkvb~0Ey+iH@#<>|j2s)=n&XC#1*>w4w3$gam z@wz)3@s5qcy3|XdwvJ$sSVSD+oiXK}?0DV`taMxHBxz#C#@&kFqG@`5Mr~Mh#Fka> zx+Zn;UF;=5IO_y2h=i-e{v=}R6_%z1m2gC2vetPk#}utVHvjjtfE- zg?ee<;3KLZZj60N9GD$inO)UEz97lJ%QOu_E3$E5xe(~=5 z@GxoECj5X&q#7snu(xA@Bq=5kC|fbg1loc&UvAveswA-c1yoGwxUs-B#)Jj3Ii6|p ztOmT{dm=US)$04q`BcM(PQDUd$2f z@l$k=?E%D(cJVoYXp>Ps5Ar~1oB{A9aSK7pS7FW4uA)*VG!egVPlmA;C4se(^oGD9 zfN#ab!6bK_K}tKw;MF34*%8kUx(XPQ0bsBmu{Afi)Q{n4a%C+>F;AL3BJ%RpOKP+1 z=yLyS&ehXZr@SETYvk6AtWZ|eX*SrGZ)%VKSN>8Iq?VrzCmmYtN zt}l&$;9vnS^c#dU5tPz6vRaoN+ ztbub^UM#mQ?Mu`pI**L?iL1<*G^?%A%t^%sJkKx_hLg+=wE3%7%Fn!2;q6hW6 zIUsN62fk2(${}ei?^8M>4ChG-afWQuJCnM2mfIGrTkb5{{ZN#@cbLCQUU@qT1S~OU z+vE9f_=|h65Ydb50D=?}8w;5(tj?f20>h=K1&9M)kU|J$q7U%X<(B10RD0@ zN{7b@X$KCF(A$V{8swgu_Q%&Dc$%~fg)c4IbYM6Ez#>iP4?acElLE{uvQ-yJh8SFb z%C>EI2)2?evrra~s|Ns{_Ja>16Z3q{#=y4wg0{~)Qr&_{58?BgfcI`+5o221<{ry_ZRl&ts) z(#W`szTb!4k@}Be4xw*-!Ri;;LpkAkd(*3$~ouM9}umX78JrTlj+l- z`cMO(O-{^bzTXsx76$6V?}i~K1BXI`&%gD0aBgA`Lj|{kh+aFvJ3^1Gpwh#Lp>xNk2w$% zd8G-ojucL$0-WHtYkrbto^zJ980(i3YN-96LV~eM9p=}++9;HdQ|gf>QQ>1p*ve<& z6Ft41G~h{mE9&*Y&!l*bvL@Gq)U+BBKR1FZSi0?N30o)Abw+U`s`{og+NA@ZBvrFw z^NOmt8b^D9bkdGC-ri~Dyz{BQ?gD?9?OrdggAxy~WHV=4f|iGHbZC-nq;IzBRO%8# z*x{SKqEk}na>a~&QRj=6YJJGO`qQBTYJ|x(ujzki9bFc z*$$V5KIQlf&Xc!pzRK&o?6nAN;~K!=MBTD5tSY&}S;}M)eoonZ`1ZJsIQX)2Jn5y* z-HA{ro`FU7R@hE&kdNVh3jkAhQt5>-(Cwg<9tS%P#vjqtKek+H9Gr_9V5SLz3KJND z!;_XVgT?^X59HH3LS8E^XjCp_nKG$+qqfcZ8>h&7NkN} z0^|xQ=?}XL`>hE>(NQ)tQj=^uA*auzurb6iy##(dD|y1S63B1;$I@RSBi2&~WoplN zX`l-XY&1jta0ikN3iO@Z8l1B!auRJ2b~AB&Z{p_YBsOLYN~?h}@@jhJ5S{I&6?|Vy zgMlrqZ(}}ZJMNUG-EI)^ZexL2c)fCC1`!|0fA2=4KhEh?j?jbYgBu2h8UXOOGpWGD z+`(P28d{^sHU0dUM#Vw9+?U=_PA8(L`=>jL4 z!gGZmu)DtF+6hXtfxobJDzlIXUe(Ukb}Zj2h%gLB>pFy~8VBj4>v;kctN_g5cb_kS zXac$vbJI>g*Fu{lr*0H0CpAtJ@s%AvtX9JB^C6w=Uputi13P9B92!q(CF$W*b8NkD zW6h+a-+jPQ&l;FizS17M*U*cYEKu00<6aAI;y5~frE8;Aavw`@uUhPw8qwL1sNp~=o3A{rB=nKHX3bJfx(9HEZaSe%WRh8GdIZ^=nP z?0G`F3+3I7=V}`qIbKp)MIt7Paha|3^hCmnGm9N8;F=Lr1Z|2SRNL-RXb1eUSf1@d zQrBa)qt4j;$Lv{&YDU+1;Pe6R_&+)0l4m`s1S`_hl7{`fH08bP!*{QDuaX?##a z?dLaDO3AT%Eobg?Hir;%bzTf2^HST{k_`WRgF;aiEiDsbOmlOuOUv$&=#oSP(fJbz z!S^R`EZc^aiYzuP%txK+%}0%inJjXj8*sP(=rWR60!Un#o8pt$D{e=kD|_pX!V9C* zndUO7?3n~fQcwhI9&mXzp|!vl+r!>tKW*@a=o_)8U71RZ%g9MmMj(N?=(w!RFyY4p z`Y*?OD!gA4=wV{b_DGu?oKku}`ww-zhZeD(J}7gpA+VanDIoVve0l8VGlfCh1Pk%!I5i{1LRoba zPO~4I9Zi&UdMg{h;RltWxsPfK;LVcFIafV)-uqN|e&qhVE&#g&8X#2T(gyF%q}D#N zOI9oN_BtGRR*1xQo<*j56D-P#9h5eZL;oeE4N6~riRvYjij(@Y=MDBA0JWlJ5`Fqu zP>4;fShNRlh|(7PKlnIQ>;P{-;EvekJq#Vy3dvWi(45b6i-N7R_RQUTvSx4cA&>Ct5u-`0QyNze^}1WDxtyENu9704LUPmoENq(`XL0InJdTF|%wv#uPOe0pL z@6Q|M`z_)Y7;kQK?U=|8aO+I8KXfp`ehU;^uAenDsMf(}YmcyZZ|k<%gX_uYj>g$) zK3>ZWGn7YH6S)=zLZRIRJ4M6ve`9#Q6dO*GxiAA_U z76KSwSbSf)Okl6J6yR`>J>j6_V5gTPTUhVF6l8|5zg76lKfGz}uK8ww6`2POFcePg z@>8|kPxj(XKFP(cwl;lI4EZ*F@+hz;AmhtV>h;vwO^+Wx8s zMUm`55HyMLe=z*)tI6nhNN(=f0nn81ao|Q6`>Xc8TkgZt8C<<*3o)gi{8@_5G`uSG zBZ*lVOqJ@Nu>_0a(21Tud!NBlvYbkTgGl)lrcY0n=B*7*g$=#_xZL~cxd0><*n+jM zqx?QlGzZDZ8);-@6VzK51V)$#S@<#N$X0xa`VKyLnS_-Ej*R1c3(|X>^SlEjNFn#x zi6*>GO`!6NPO+94{TA~?r;mAJI@oTQLx4_$0V60j1S+C16 zsZvCZk&#yhNjsh}wA1DAHSG8>dVtTjHeny?JY#%#uCPMt zjk5$Es?@iBqXIInA_vu$w$raEcpY%G1ckH>%4g$Rt1iJl7!4Ef}BK2f3EPaGmM$qstvnRhF=M zpk89AK*^^kmab}I9OdnybQQ@v__CX{h#2ypM-N;t&JSt+L_-g=M(4djAB2y{$Y5pp zWKY$ht)a-x23b>v{GfATP_RRL8)dWmib16s{#f&In`QlXdifFU6)Y2Z?c&FVLGxU?K`jJ~;ouVq<6?(MWUOL+CskZNQa>2kmKVGsCdirsVN z&5uV&!Z_~vbzf(N(P(_*12o*gkoI@V$V>-~tJ7(n0e%;Yhq~Ug=!iVPWC)Q6MrgiD zX@=6g#C@2}dCDj+vK6T1A>zfp#!>LjpR>S)Kc5~!MB(Q0{|=!j(s2Ty%=)e4mRkQW zyb<&n5Km1`{YUKde~VJe+b1@e{V%X@LSgBso4j_;sO&&>Y16cNB>lz9qt3U9RXX73h+(| zASq=kfd0yc6o5f&#G+C4ZG8KE#PQz}fjmIlR%fb9UabJMCK%h~XG1jxsX&!NaTdw? zAKEMhqI@zi730mqW~l&y0Eeaw$j<&KPb0W>#a zh}HfQ6E#gJXFT7~c zQk|wE#dP7gbEBlQzk4xH326KTr{1stMs4DQ#M#uA)GuGw`}gglPXWy#I)%pb2aCT3 z|24BJw@Q7nhX2%$4b)$}lRNoPKgU0_>ZihCsQz~!G#~n~<62Pt(1$2d^q8P=y6IQk zGJlQpQ$YI2-8xG5FuC8*y!AQ){4-OM`W_Z3fM7<7iuDzje-&~)HA&{TH!>hq=*ISb zg9J>4F@WD_=Q?V9lM`(j?SoWJK#@*wEFhBtK)*$m5j&Mtx#x-ChjFSpGd9R?2dUmZ z_NI>D%L*u-MMAifIulaU0Lp7=tT}1{HG>#Fi7!6>d+q-5)&GA;ABniJY;|HL9~6=k z!L7bn!Y1|`1HBqGs--$L8(+;e0QPr|V5ynTdQZsmmv0Da z%>oQi=egPJYv5inl;`;PngE5MkR%(3tZLd(nWp14txSPfCjrS8S6qYgk54aSK$bbio!y?vz8X^H$<-mK3DZSgWKmPaF-kO!-+Jy=LgBbI? z-llInY)7-q4(CDb7TC)0-HlNcE~x15z!I?l@VdxRyjcalE}*zs!G{;x=R<-ARS#r! z&)cKdH>>3LCLIA7l@Btg2R+i;4M3q$1F-Nf^zJQde@>dX<=I9&fMa~=j#JuG5rX@C z4@w=*P*e2>6F!arT+M>EU1}L4DM{Nfws*8>`UwjF92qlJ6!JY1xa2~shC>7Y!`W8EK^K)T{JGv06ipm z#l|tF3NXl-@sI?wpoPUX6+j}04%)lz4t-$O0!&zao=+P@&d?=W=;a z^d5ia6Z-Wndn{Q|Pf!coG-4JITKENAfD?o~r7q4>hc7r#7 zzyoN^1E5k{Z@)PUFwRwp`D`+T?oP)i8yt7u;vGQ`+mMN8Sbyomg;wJ%$MfVFJb}`4 z1^H=25tRVJ{Rtay7H`03xPT(xGLpoh$mxhg+|Wsat|!XE?t@4ma|<4fBT)~ST-!bX z%Q=8hbdDc1Sm){Yx7|{aWR@}XiZu^V6*Q9EWewZ>$9LuF`?3y9w1pjD+$Yu8{_uL; zft@R*R8?n_1yMdx)X@jp`WZotg36)MX^&{=Q7K!&EVxjHW1uTLQh4_8RQRyGTEDOvcygw9^6G0{##ZN0{TzB#XIG2tee@5y}e^fx*%}&Xbn44WlA4c&1@!Jn#Pb zTL2V(xFoi4>`b!XOO9PxW$0*V_pjb1YpMQpmywZiOm25YV(O+fc)D5n>w94U7^I&J zsHyCjSXhcHL=fwm@rDBjNG<1TimL>70ES)p2V}v535q|CvGqORj$zL;cA|hy?~MNQ zMMB-;1IOY)E5!eE*&@j>9wlzM{to!6UZ9Ue@53LWCQcy?2;|-(r|x7Kx&~$AI{|vE znn?}wPJKO%@eIm1)JZ*W1*agrp&cJI*0P*wT0u#@t6?0PC=ag05fOWD5=oNWgTb&OU zgrHdj(-eeCiN&lsKgak3r+V0zZ@0G>N_AT-I+O1j7?tv-0X@#MiYG5`Vl4IPrIGnz z9iMC{P9AR$T(X|H(G>YCBBiaE!0}?q(3Ock<}t zG_t)UEacNx`urD&v~%X`7LM!30Go3lex1z=#RuYatU1u(_rU?=Mn=;E z6$24LtDMb+ zA+%F}L`?*K23Mf6=@koMZ}sJ|oP9}+@nH;%lM_tDZT8`?thjPWKgnRi_qJ!Jt%ztAN?rLB+qW@-(!wosCIB==gHS?Nh7}kK%&M0->@_2qVC*LdjE#9Dn`nkoQ!4#qn?o- znMOLkKgC$2o#qhL7^fq4^J%0i1{*eT6}=pgn$XETBm`gRCt|<_E5fIWtAwV)m(5-7 zhUibeZd})|Yu?Drbmh}gJF}%X5HJqvn@DK)vAs#cBo!c7*zID!Mvof)I~+e`0wQ`{ z^$GL+nmH@9^_WslnYG~^tpHiW2_~t^EF9t^S2$%C)Ig^br&4zRUEi|5A<_$}QK~TS z=yLtx^P7VfksLqq&*Mh{(FI%4X-Up6HCG8E@cIV^zpCRYvX?5LU#EBT~NF?sNypPCXy=K3;+lO}p_ zVFG`w<1i{o`Tx=N)lpG(@ArzJL(hPeba&Sf(jZ7F7_`!$bPlP&&>a%eQX)u8C?dj8 zf|PX0&>|oqp$PuY_0{+Fv)1>w7Js&+2{vo8~18I3l-DF)B>G z_)umVfoUEtBqy~v0yEZG`pf#5u`_*hOS9SYo`rcFut>RScPLf*$qqEDvG?Ja$9rd1 zC9ce$Xy^FX_q+CN?q_f|D}aGmwFFz5a*m>Qrz1dDL9l=ulok??$N$Kh#1-z@L)4%J z1{H}5YU!0wOWlc&FhbMRatEN6h!ixS&_n5(ZA*Pjbx5_Xlp!%HK-V@G2|K%w4dqzn zsZJ87F{d#wwZJXSP0{zaFz|8wG;vSbOMg~v7?Tqs=EIygZ%p_^*4sNO( zcrTYK6m3%Rl?b|RucFR!cRislCXoI#f3rh5i(loe?)|S5UKQ87Hi4q|m&v2H1 zl`oIZpx*wTB!2|rW};7d@CKsntAh8@UUkTsd-bw!Ja^{w1ckc`iF&`Pby*6j_I)Ug zVRjJ4$Sfn)tPItCOO%5`A5d3E_j@l%KIa}oHVq8tgwxU!edBW?a&><%Qe9V>7?=A7 z*X@(!qx)F;Z^zGvGolq{K_WgLtN(HYBoZ`9Uz2kWLBOfK%GqHY^N=Hp#r?>@ONS)h zw_j=%?ELA4`lezv+INSPwM+yLAoSLLmz33J)W)iw>CP@7A?Z~7>CUH)`!C&HoJxwSZ(>g3$3gUI zBd{7=xO3_%9BN(C3;poS8BF(9>tMB2DHjjVRZ_-iCU#TWg|z;=L}W@oc-vwf@I8gs z*%tAn!T|KWD6~r#;xjlzDy;b(DLpJaMO8gCw2fN)#8Lk61gYC5s7~1Bv%t+sc;Vdh zkSFMlINDkK(4JEPpd!UH$*vaQ4c6r=hfjfog<&Cl zhodX#4D|tZV&uFE-J)UOT7I|^C4q%&Pjs|;2dr1&VU0WuAI!QnGvkw~jwGMu`9#e? zOTA`hCn<^GOZl;R)`M`pW~_z{E%*2n1T7%%9j)Mqh0McA+gzrLpVqou8myY_D%3V4 z#?J{#WBG~9MC=ChY|^4Cn#|r5`Iuqmlbxyo;6gkcLn?Q~L}W~Vd6*6@M?ja1`2O3b zV4pRQdf^@li!FV>9cM$L`d)!zqqj^?_RUOI(j?c@WIl3K3boZaXMQXJ(C2XG`q#WO zM|2cii6`0L2T}difSX?320`N)Vy^z%MtfH&Dg`93DM`YH6t?Hwdij~ZxVm{a7ogpk zQhIz94f`V-vHmp<**m+Ret1^wvkJM)D=dCUSNn;F`x~sxJH=gMu8^;l-VBmVXHFOG zaB;J($L_~kAe;Ezb5OTx)JAUHS&^Ge-7&!Stk}d-cW$E@+`5?$fXMb1t)wOjFUJAb zttAqx6;~~4w&Ke_9((3vEhGe7n&23atln)dn$H(3zN9s#;?ow(CF*~)4VOzXj0$aR=nD zneg?CA1?_z``%bc zsp+2dz>=%jFE6YnEdbr6RptPATT1<*BqyeY>=q^^oR8sv0X2VC`3GW(oH$=|jf9(k z=4IA|=fWPol+q71KjR@{04ONiUDTIDUt;D#OpMYvTOb8W@r5lSr6j+fz19VcH7GG6n-)(&!QM($ ztfbZs%IYa|)+bP=9MMsu=hW2^3@`*lSDboS5hfQJGFJxwoSY9l3flxyRar zkamsRtqLD;+$wt^$Stpl{93n)`7)OiHqx&xC8B865yqe^TUSV| zGBW)3;`GPGH2~esy71M+z;iWisNW4|q{XmL$}=b9KprpUQLJjfy0IvKN?FWozH86Zx)5#}q14v#oYl10)4N;+SSRQz~R ziamuX%KX6m!TG63;2*p%OIMy>k;-4HB+pWPmK z@!&@b+dQheKqPptxWluWXc|#9YV8>jGK29|F9@G!^=Gkd=PDvFvvGsmg26KAM z>Dc%@m6Y`sAunG~rL{PhBD)#V=flD7Lqe(e@^OR zjcaJ>3L!rCtuITe_UZw3x`nW$ujzo|@;^hL3>q6|hS_PbC1em(TAX#3emE&i$clb$ z;Jfd=8K7GzmpUfQ6?*>FT&KsZ{=I#=I34?`4nGAy= zt9ntYtQnDGMCpW55zT}7E9t5$gI}zk-(_M_y7hhkb6Ry8B{V8Q#SzFIa>q?=0eF;i z28Zm+1{A5(@C;%JvFc7HHTnsg`2teSRQoDDPCy=u_Nc;zH~qqF0L=mziw|uWS28 zZOnYX*l+{Z-sZspYtbe9`F@aN2mwgXnAfwucjitnh#Me}5rDXSy7$>cm8lD60wltU z56wWl0@-e$$H?RT=6qH|g+jt)>2akN68oKn&z*mgp5Kgb%S1n=Dfrr|(5~z~EV;E7 zgpq22nC)FB01+w9*Y+vUVfaMV&xcstrHij~i7?X}OuR}wAV~k!3;bY$h6O0H&ELB- z!Mt-JU#A#f`W#uNRy}@|S$R0#58(DUSJ!X6`q#Y+y${Q;kd2vAUJ0@2%^OR(mD4QN@Yex92J!-tZ=(i&+w0fgQ4c_I^of zw89*`mGxC8aE*MlrwS8I*+~*=t}k>e>sfbiei;^%5a_6pt*q@hE&%~D-8XU8Z={vo z*4g}r$%1Hx|K+p((BLcyWuVX9_urMyLT}*DpC5lC4o%%olz z|M!{7-AMSg)JXPV*CAdXphq5-aR!DgpL~A#^ZYTK6C(*yqrhxVy%2TNepf>tfqz`< zJUZ3E-g>BeOZ2Q>;3r8VQC3GsYMk!@5=6I00rqxH5+_z#=*sNllsiQ!Gnd~~H1yvB zJBAN@@XhS|Md@`u6(+K4M6PW4f0b_mcSth85B2s|B>(-?5#ae)@qaDJe;#Bv2|S3I znOAn*?>HOc7Zlh4@TUO*e}6iGB_RzOkGnOW3!$m==Z^&>PwGngzo?L3{R+%0!FRLL z(o8vj#lOFQ01k+bD`bxWSM{5|076-I@ZIL-4Qczozpb8!?6;e$!es!uchGs$_hi-G zxCRYe0FF`*)lUlC`@2z9XfT7D(LuLv&vqxW0^6p*M(%;Ye;Bsk%?V?@CLcQHvT1i9 z?!7fLd7g;+TIF7$?C~!;`p;urg2;mQ>gRfUVB5W8ZK}=YI;*Mk&uU`30fZi}l zYijj+_rK4T13i~$g&TURjrrcRS&`E7e}@0>NnizT6CL9;|&Cc^|Lb_SOH~ z#uegYNmj33;pXD1x?Z4m(};@dUl#PA*Qu}ws3<48{+n@nvw5_m&nxwUZvFe3!9VyA z;5M4LmM6TQM$6J5LHKYSU#;0MNb2|a068&R1Z}+MBUWqQ5Ye=*CR3WOh4#r@w`l+p2rX<+Oh;r!;r{u$td zdC>hYI&VHA1y$Wd%Y8Tgr*%ir|K|D>vDQ1yPLW8jF;NT5aFc2 z)hGLZo*qp2g-$80j_VMo0o<+}`jpoVSQ0q?IFSzbK1TfcA;5@=J!4WMx&z{I7C{`% z&{o71y6PKoWg-9095F{6f(*N<@hi{&rv-OG3y6sfM*ph_1jEr0e5if={i)~QeMt;L z=*FP4WxoHlA0G5KcmHQnsN@AfLrm!I&A)AE2kW2@H$NAQ{m;l33Q~KoJkz?WXSol+ z&T7$qWWkgOQ0Zqu9z2lw+%4l`X2~CbW$*&L(KO(ooB(xr`$f1k$l@K>M@qGA9!5g+ z8(=DIAtcFe4{$J+CAonwPd@2Br~##Ig;RTj=l^T89S`nqr*ce$KM3_U0X$aO7C6`J zb1o0w?nb^%tTxgD@~p_^}hoIdM=nQ^9bcJbWh_F|^ zEjU?>mk)b_`iP(T(0usr!z-EYCaxV6WkTAiqkq{K^YMf6gTD(3DKrguxVbedRgbb= z+VH(V(XA2BBZAL}M$T-HBzpHg;ZJUalzdfhek=<@l^N7~L(<+`4#0xYK0-I{vzhNF zcA13;RtLbVm{YWUds+||mHGK{0B|nb1dtMOu;iAk!-UW)Y#8k@%U1w#)qyB%Mxbel z>=p3D+XOa+v8s+Bb$J1C4pGs(EhOlKYac;O>bShToIe9dU2|5om@43bHP>=-U<~or z8()9fd9@9 zjEn99l=8`Q=C$Nsc6MVxT;T^Q8aDwGM@0W|3$O>rhi`sn3#erPQh`?%#*~HFhO)F( zDndYldmXf%BSot+5N#s`!5v&{p@O*j2qH=f1Cs6p$`nhN$A^NFc+R3k_Orj90CeIuVfIlT zj2bhTJxCqK9yZYE3av(ZQwrybVT8;U#K%oU(f3S1-LJW5+a*>p-1A`PH{K3DTpMZ! zMrd~6IgN`FA^_u3h$pl<^~(E8U3i9X_k=4%S#6MS=*kG={zIz%^Nf$lf|?04m{KW% z!V+C^zHlzY^?QKciGFc!lqVoy`-uf+GASRVY-_+?lo=I*cmk4HA2)XxFJ#90!5sHG zQlFG1Jt3@c6TErx;!w@h*gaJCoq+wDtXWuqiAR;S*FAmOO+ZN4{tY`67Y5V4lw;jS z^MsRz0jV zuFrto&-r}c$|tXu&VIsc!aThERo6jBEb47?n4KCN{L@{n&h`@h?)trqE-bZytd8mD z>r#KMaCSI8oDrfyWYbrP1hav%9a^7DYLzhko=aSvwOQ|Zrbzq?Os1DI5KjQtWfj7tCh?6~j%p{S(fTyQHV2=X%7czIvkj;ylpO@Kr>oXz z=!Nboz)D(QWr|5JjE=9Zb*@4}k`aliNgxg@sHH1iShrLTCn4^z)N*zE*yl<1T0C zt(2S9HVdl~%`@r&p-Jj)jTp`hI*`dJY_ss}^d&K-=f^_0+h9=qzJ~t>`=AMIX zVAfi+chY$<;(7HJ;eq_=(PHe;GLTi)DU((`jn`sW?l z_s+X^QJ-^#<0$#)RV9A8xxEzeq2*F#2_ zliKrkiCK>iLIY>0me@HKaeX523ltRG`_^N0+yagdUVxt4sAkw|jeB3{KP`0d@enxmm@}s}|wouna2Mkg)0rqLO+m89aP%UUhML`(z{?k2y zAa`cmki?BR*TAEOj4(HT1 zlG5uJP7$|X8Z-0TP2uhyJ}6wX!}D}D8>?%;mwZw6O%lI-Czg6_cN`! ze~ni_yvGy|qWsmNj_Agt*Yl!|Ol zH|RAmLWK|%9er(LY4T(ieR{`M#HS-zZmn<@tmXd4SA~3kpKf z)x>VEVD3y-PA)K@ehutaJ3R*T6vs;SIU#*jTZ+jWjZ887Bv2S^Yksrlay8LvzYpLI zM2WI?K+I|E>RJsIsnv<6mN`WwoB+$s_LaHUezds`lmoer03E0mA2Vo=ii%RYesltM zC3A*Kp;a=&ut$cc*TP6WUCaO|S$ZRC&qi#A;j%CK{<(AK3G*F#Q~7xxjRybsu0iG{ zAb08g!HoMrO1y8qdM?neNiTC|?dFH*gLo-0-8N$&jMgdLE%9dQ9FQ;YF5^__HN{Fy zaV@tAn{NF){qnQz0EAsUFJ|0&i+W!owISo(Qr?P2mSrX6>%gK%x2`rJb-sLgBIenK z&sk{lw5WG52LvV(K;m+zsMob0cuD%bb3ZQbz5WG{$^@SJ6SyIl^u{sD%e{bvRbUVO zU|sqXY<1dc`7HpR78iG!ZyfjO+~{J>psA7Sq}hqb_NeV6{Qt_ zJ_Z|{KIHOTwENt+LFWWSJlbj4X0RR9x&#EMG^Y^$@bHx>*V8`YQRP`|mk(tz6%P+o1Jlm2L$?R24(T8WYeM9bW#{B?@ z45!Xspt}pE!;_|?DAf%BE=W=tt*{7U1_D8l=LG`w%xH1Ti}m0!i+e@fXI#5NeL?KN z{|flDI-LIa_DgIDytc{L7xsSwVW6FWoAPDXRgk@WJl%NYfBgKwpneSMpD?NP2Td9?1 zxdQY)M4L?@LfwEQmXKVcF55h9hrzZMdR;-I5T|#rngzs*vVNc&`xsezl7nv6 zc=TSoX74I+q#!ol_*kga?h8*s|)@St}suB#gXy2=KZ?D>v;6c`(h zK=w$`Rd}MYC_Muln6Rj-!wck@ase+-z+-7qe)yAx6yiGoTol`qYnGqki-63|Y3g)d zd)>pdjhnc~(gV*$VlPY<2=>^pK#QOqF2J+`V&*v}ulyUgZrN=4ya4LPX#gpw2&|fo zGf^UZ!F!3{1QxvZPq*;3mAA$#a~+$OQ&$))+)1A{HAtq&fNI@MFePBjq~%OnIEk;J zvqJ8$92SPT!IbNVDnQ{g%SOBbr^=ap&4_begNheI6G&A&#o8FwvLt{A(8F&cj9N_v zl2Dkxp_W!y!?Q>_ut1}kzC*;4d;H7iFV~-$2kZ_2K_?X@;wb=>@%QMQfylOV&4XGg zCxIZHxwD|@_SDQVtz3#Lsk5}o6R?$zhM6*z>>W zRXJvRj^SI=2$R0;_hc<(sTXw^0Sez=(T@wxsm*md%8)bPOSz4e8w6KnF&<=^ojYWb z(cLO&i-@ZW_r1ZeySh9P2K#yqJzr`iH3cf%7aY<%krO}^e$(g+7H0oNguxI^%dpB7 zW{LfHvgK_~i3ooUguQB}*NMU(o!WxMGqTqFs#FZMm54H%Jjx7Fiech}RH~W+#j4yp zA13*7$Q?1~;z}r;MRZL;>Z3b>r-phb*tC>X6?4=<`(C+iqS`X#Zs%v?+l#VQeS6Eu za^)EEH}u4Bx$hJ2Sre{fedRDsUSacO(-0sDDzr$s>Ux^@B4V0uL!KyEGsh zBZ!2d_9C$U)N=l)#(^QykO$+RJ|l=r&XZ8i8Guw+Vxu>+!72kFSsCFIq5VThX(Y)? zzFVh6mP~m`T%VQ_(!+&p$SI3~jA0<4aK&W=i@SF%Z_;tZ`?izEa+m#`*Y?nJd9fOc@1@7#lxrDl ztp!AJm&MMGhj?8EL&ba?pPU2Nu~oc8egLEs_U4vvu){0`|R9idtP2TIZa8uq-HIo;=k~ zU5@TJt=5`3__>*Vq4`F~WcY=PHxyJ=F#=KQXL!9-tG!1iu3JWuQ4l^>iLFk<1b052 zps4m%?TBNK>J&I&@Y|putUulq4Xl;&o_sy?())03_2lGWYby(E&9b%LhY#lV`ej1N zr;=Ki8Aa}wX4Ry0#yqG}((GD6!mi-@3RiM4#*`P~--Dv~zD3Q``LC_hvDR-MBAr`C zYEn6x*%>Asn>k|8Rd_1f0QGrt;T6yZ1PKvxykfT$p=YM|m-^t{Vg>12wgJ}D!BaG) zdu*GFRk%Ya2z#1*WZi`peiAq~>{BF^vLSItQvrv}ju#h1V%e?PT3c=`aD=ohtf2j9 zOVxpy0}&2H>(S)BU9q%Ojnf&KmgF!QRt)j=5ST`%b007%q3sY?N_dqDiv|G)`%Qeh zTuayR#4kU40GapJlGqSH_`^Y;`4Og!xPnyA?_1m>0(QJHI0GE&la)fhJ-ORd3Jl%)k$+S;&R^ zzHoIdZtIYGeI%& z)xMdm3mUYQ#ow{okg`46&6u)E)?Tp1n{A(%_gBdx1)-1hg4VdyK+9>Q|zvl!^u4-m+9XrOlK>!=pJ7{a^8f%c3r9XX} zd_iW~<~xgqbfm@XoB5jmVY<+}XiHd2oIF;JABX>J%+0wX8Ku#EY zA?N`MgGqq9>NGrrV&hwIu*O)DH`7j^)7}eQHm0Bh^)rGDafC#ILO2mV!m`8O>rgO%HHPduUy)4iSNbpPUG3@v8mV09_dq#s8OBD5T96Fe z>q;k8c}n+Y8mYQIWwtgEMZ9#%bn!zol>p_@iIU4(Lf5Ostg??jHQ;j+Gfddi90M_Z zV)Z4Z)Q_6|^w+yXFEmTp21*JWu<{Uo*|fIFHxT&}IY6E(j$}m%%iTeXv&fKN|%qGLf>>Oq)-k<4keJB`pi zdV()!Q)Mw9UMl1}Neeb!G#kZQw;s8TvN4^-uM@{Xs>3}E+j+zHn0-Vi!=5U#S=et~ z^-n%d?rkA|-T5P~5H2t4&;u)BRXKCk6_}3}E&N!zC+nyIXI+A~_iXc6L}1t=5Uo9p zy*=KN46WtNIdH#?@TUbNN&@#&_F6xvf5d0(K8W8Y4OCuZd_#<`dg`l^I?q z^L%sCvSd7x`-_zG_k=b|qyP(Fr1^TnOR-4#xwIM!OLSWLM;(Fl+eD%*$SnA7UxB%k z?l4~I>;|$8M^L<9`MpC#%W9L+MtwZZr}@|0p%XH#O; zy=%pxoNQ{Ly+7?j^$mY4Te}RtT}7Z?jPul3x~+;?)S^ZMiCy>u|B95RsMT{1h0SBC z@ST*4NX2iP{MfUKotE;CXnNNdrndcfn$@3~f{>=CUikSqAJ*S$>zRbxz8aS%9&cR1*_(=HupL(?obzS&{ELHch3po)1)ZvR%{0J#w0n z^1dQ)tU$dJkj~#5WwRhto2s-pOfOzvWb&=mlOZGE+>x@uw&)KxiE7gIvf*v)VG|GYF`sr$3L@;g*ow5zN<`wr2XYYEs$|gIm zi-V!7*C!|!ugXdj!8rczhfijnM-`*OrEJ?6BWPGJ;+*Y&Box4G5{$H%+l$v^Y%*2S zqBJ`vyOnkBWZ_w&pa@2WnQnx$Gb7OLxO~tq8vg<+s|ja`=O+N?B3*|Mi8$j!aNdPe*eAV~Xwh5n)t!JTo#VZ} zFJw7uxwjGDzU1EM8S39gb!9FYbI=&U&M0K zkV?h4q_YPRCbwOS-Z&~b(Y!1gDBDPzpj~-20N1Is0|WnRLTA-ZJ#k^c`@l)k`h&=d zX?`z!9(HHz*52biG?j0#%py05%!>=-V&hzoUW~YpqQD?uyvppC$)x!95jYv7bAB$3 zkm<@pX0|4rvtkVbjRDCqzI7R=H2PA^Bq>y8h9c>cWh@m=@NkSSbFjYpxh{dPN{h40 z^%uHUDTy2OhcAfw6@{y_4`!brvi+$eUbWTs)?2~!D`W{{c`tv@N!4CC_ukJUOQ@oM zo8*J7EY(aW){F4AQ;%ir+6rTupm58>qXiprg`CLy1+SOht}s##oX5!|Bz78bAzCji z3ykY6j0I*0tY#D<3?{1Nac`T7r#(hkJGo_gf<BXduFQN*EgdT<1PP2qi4HqAwfONGF+zYhZ?0Qno8+lAzCml zBTkPV!}I3)k5f1A#EEMwPprP+EKpz;BPYydbn-X+FLCG$$p>b-_H7#{)X*}`AJMBM z?7R|rzFB&Q*oUKtv*8qB+jA7#Tcf)iI3iWTCEIYS&02{imcUZ<~t^1=$3L3 zD2hRuPAN5V2*-G5MMu3n;2kyNu_dD6q`Niws!7WX{lKCh>87JmS4&DoL?=&!m9k@# zo!=LiOOA0QC{@%dm~6bZxOFb9C79cYhbEbk>YZubwqg4kBi-E*vRmQ|7Alv18g!IT z;83CX@rzflzk|DUOf6A9Ag~+Q(mV)PT6`z_{*-?vbxyQUbr~g>&&%{y~&EtY(uJQMv0Ba;*27if;E2OtUTpT^S?zTooIgVL>G} zc>*Kmw()1)8~n93dEy}Hj-JnNvBbfYM)hpadaM1W8Y=u2p<giIGP7+0k;6Fle`J9Azuh6;CgH zjsJKXP5LJGE2~r;R~Ks^y=hoJ*LC?MB{KWy(a>gjVeYzDUmr(4x|g;q z7KZk1%W{DoefYVAR`P#xliF4cCs&@%*~NDA{8y5lS)qjr=YUvYjG_jTJ5JdHzhB6V zTr^f#BYuE7m-KsADEY~vcdl#O8e?$!Y5uFV+wM~yO{ihk%-XjvrxqAPc;gOiez3xy zy?sR8kS0{|vQb>gpM)_fY%b8wnJ9D7e`ke_KI-U&Y*8B=x(xJ%q=|^;$fSVaDKsXN1gTVjs=SiB5D%f zx%%ACpW&M`SgWK0gi(FAP0DvYhIT-EAkHczv(Ttv7^J>Nd(hy{U~=BCU+S zqNdht@!91bo;Bo8lbPWU(C`*}bmon1&*MEmydCtFI+ywY>0A0ai0fK+SG=zrdsbi2 z8)n{4a?MvzBWXi+M;rISr;f-zzdUE^Am7WAsW6Jxx*^f|(y!~0u+M3FqnZt#lZJ~Y z#!A+S-Ht2)L<-;zDQRckv?H&=Sl@iw&8_V!6Hx`^mL%><`0(Ds z7j$Rk9z6c_T+mN&v0b~RsMRm^Tp}p->a6}TS4;A*A3ace=-*lH(g56*Q%^>Eo?=9X z`6{k8UUHa}`OtbUX*JcufloxQ&tNn5M{~JEuLaq%7{}Md0TkekxmL~F$#rEBdq%Y` z1kqttF6WcwHFVS5q|;s~^ScOHMi`%YrhWCF<{KrSDSJJeW?bU~N&y@mIaNiR>-q_b zWsH}Pw58TSO`Or{y=wU%+sd*H8)OsOQfA5r0ZWNjb_wr`S-mM9TV{ENU!#-tB& zi#Aayj_EZN+ZS7(xua;p$sqarCePx}l(rj`#!-B(3q#y31?ObqHoDyHSqB~&j#H1y z^1^}}3C{Nj{Jwf!^URlR0G2Y=&w5S&5GW_pJOU_n7Mv=d=};i-!XW_GNk5m@>hRfe zTQ|Ql^xNs6YKIQ>YN&mPrxd{+UE8^1@G@f)RYa(|4E&oJ0esrn$R_UDxcn0!r&gIy z_b=(PX@0Be+XFNV#WcIgLw2;`Y1-%yyKref_pF%ZJZF%tewC2B0}k-P0{v=l8qhP} zcabdkgv^A{{`--`VH_a4FUHoK5T?p^|FP&%Wxr zM*x1cD!UkPQu*M77p~JI2Ow_iMgXEmj}^e3_W{pAHxTmu0=>tzBLH&WA6@k~ycxeK zOUT%1{&RXqJx<@BClu5`nb(d zVSDvDBMLwjys$%c2PGEdiBX)!tIb6La8Gb(>2(A5_u>U>thb?xbVdnK7y^6%#bO{L5*Ur@UrZ|3=?bPZ0A3qyTB7eSzV!G+MrmZ1-}=O;h$slM2IvgH zz5qx5bx_#r2cVc$fIj8ZL>Fck64Bo*_H9{+;(fxj+$gj9eFWg~sA@N$`O&(WMsd)s zvkx>FmlO4hPm6dXZ{Sp@|U{BkAo5cNV-bOgBN%&2FFQipFxjS>~Mp4D(%Hmas& zF#5F`TvA{PaNaYqPt9DqkxXv!;^iaq8gVJtn|pv>TIqFXd1@0zWAn5pa@OBaCt_?# zMOV8&leSmq^8U!<8NE(%`IE2uE&Tu|xbOC?goCrjauQ&EM#;!;7WmC1W#UDA*h zJzpkZsP-K7637R38U#7X_KvOY$U=sdKTX!Iv>X9 z9Iptl-E&Lcu(;l@$@qB&41&_D8 zr-z)E+5^kfm zZsS`VLO!8Ugn?7~a%3{rlTG=|{s$}WddMhx32lO0N>H{?KVyEDI_G^>Ypj|29Qpa6 zWwh&8wAaSbo6P>1tMXtmS@1gL17ee*WR31KW(zOIjCkjJa#FF+wNm>d+0&AjajjbQ zACvog3N&P`AAb2L@B3gUY(bdaiiI<<9hm#1J{vivaYogE3Q{zrcisIC(TRYTVQ44Z z{U-yE)<;AC44hlDMbhkNXVsL zl;WD=82k-p2dsuKf{Nl1chPq6Uaihxy;S031rH0UO0O1Qo?a@gMZr|lr)SQ{IcTUT z-oQIyDW8V`^asb&AKQT55dj~K_z6fHze|4Ob2GKJ14ekjDc|2!4A)J# zA|^XqLS0u=Zf9~wlsRHHD*xF%^Kj4NvL$Eh`8(Ge-P*pU!zbM2g@`DM>8MLD3{TK@ z?ZLE>C1<|kXXxM#u6JR%jom>kVoG=K?%vu;_0Q@HSFmO==H}gSFZC_W`fdWZ;_px{ zsWE}6_r*MHwZ)+&CyBfehPN}k|3V27QQ`%B#p9nK>P^_-?Z$U38DlQm8d|oo$VN8x zU)taKo-Epwl4!pKxI)sV?Wl*g^w)b;2SGs+RafOmV8=UmbI$zjCsDp8a0*_*F*WwI zlP{-g?1#fGf5x(qHV#awjin}o>guMLRd+%{^o6*5cqvK#g_H05(Kv}BPL<>ft*k!x zKi~=~Q3$nNf}5i6vng+LwC_m)Qcd-`8clMu!AP+2@l2<)4lALOMDn=>WGKvqPQ;Pv z1|l7GFXLir4O?N$Jn{P+&C*lhe8KzRd)m!*t;fX1olLav6if{gJ)fjAJ0VgfXFc?v zzeiPOWZkx-TZCLUtj=n5eiMll^of=7u2F+i22*zubO~K_~b8~X|R{-b70$UAJtF&%yOqAL9$XTT`+bv0C#IDWP1U7ZOwRhG%5z}slI2P1r ztJ@=%E^=xGT3Fmf+$>yZBai6DMC2DS1gCk}k;?yYp_cJ-xY*mYaj}}j{_XaZNM!6F zUL*FbYC<-s`x7oQC^y&+_qYR-ko~~z^r+_^T58ZHDzQ^0a$fZzS;fQglmJ8>N>@X* zvrq)-lCN-Qf-$H|YLU6i^1W0^`d!%-LbhE++FDfMyDF5s>lDQ#5G#>y%OwZLT4sX@@1ujN&d5_|L=46-G81B8 zwWO9Y^Xz=Mi&^+@^Y|$T=jmuzn!+wrX&SSKud&+B19ELVXR{mK8AQ+qPES_uc?(R^{&j^K{2E*m$4K9c`O001@H^G@6FQf*e`hM7*YcL} zxgv~nc5YlrABz(BFeP_0g!11^2IO*&K{_7}Yac@lJYLl{>?CZ1(G ztvOykP)O8{#Wk0h{C+FHvLWclzZr~6lWEdZ`iA&?yp^g@?pf0UY`$Ze^+wf6Tr zhVsB`fjA!;Mwme@0Rx9_&Wzss=?dJdWNVvjuE^}@!Oxv{djPZ} z?Vzyc^UVE7_EZ?)zwbcVj-_d;jT}%WeV*xx{j2%Um7&Mnf{u&@LO%Yet;;~?)3FO3 zjtry?s?M%MQIx+^MW}P?q`{D34ZjPL7edJ4Hro%f=gD&Ir12bcE%@s(K7+??VEPG( zsj{GGAF@LMsfq8YI^k13-Yi>v*K}?>bRui{l$XEA<`ah^##$5dgyn!MX!24N?dM#M8ALuoa8>a_lnsBKm_eB;GW^^X(l46<0B(zh$x@& z_@Iq_F>Xc}@cnDg_yYhOEU2^q8%s1)jR}>2n1ipxn?Yxoi+^1oT@n9Io(s>S-?r_xs?mg`y2IUM{^$UR#+60Wy@fV<6!vffv1g*|C z4a!E^K|q#E)td(moxwc7%ewzu%vr^_*}|k z_|N(VO@t|YXh>i11vz+}>uci46X4tzIT{_gBBmkm;t3-`9(E1N?cxjVgJ)Y1wR(B$S>MK zaLA6`ZBx&i&kGjt0*|oad)9!s4MYJjq+R-RIr*4@Wvmd84=*=BT9zI0CrtvZOE?fN zQ1VQ|X{M1n-k}k5!;bG)+o1h{o=nK#j;s~HG}-dXbILA_mi1<7`N9|1SOQ`8LjmT43tiBGjsmG`hWyzGBg#%Fw3z?{wH-Pmq?6W}j_ON#;P*R~x)Gclgt#o!f?Hr= zyGUtiS3Z8 z@reLNb-j%*>GLl_t1KNAsi=hgaA5)mR(L4xbWkTB!RilWgq;*S-ST~|sbH;2&upx! z`1>tE!NPoZGBb|u{eYN>j}Eio*j2GG0qD@eC|^D~km5hT z*+~nKC*~@BnbPuYpCMH$Vp*}jwv18`J(SoCTdpZDF6G)ZAo+LT*&Gw?S5kEkJvlt{9B`2(6B79pXmCRP{Xn^c6G~ z5DCb94(&5+8#}{Fx%!B+SVN_HSDTo$%AGpHPIi$dT_ERr~$MUw&-}gkYX} z3HRj`{@ecv3f$l`J*!{T^PhJ@1=f;Hi}cGH|NbUZkRJG~sM}zU3kTAJHv(Y~?Tbp8 z|HjQ26X;Unb;qyiV9~IcurJAOgR&b5->{t2KX-FX1ThqU+&$jx4`5>%zIx@L>fipz z+rgc3U|0Y@E@}d_XlE;z!7ycjPWXnVN>vX>=ExIq@RonAOHiRpA3!$EL1;=0*`7l{ z-rhd=q}By2Az_f1H5Gz696+s2{)3;V-=v$50PtWDhlSRgao|ISngI4UZT@@jZcAOu=;8`9*fRzZRx*IqjyM{97)_k*@1F$AlXAM}Y1pg}W`EjdEwN=LvR zr47tRr3R;2JyE$|&^#1W1Vyn8a$Ls1Y98G(;wFF=;426e*z-H4zj0={CN78Ve zf%I`)Y$aM5WN~ue*MvYey$zP~PTvT7^jBcZO3^{D^bOo0&Jz-y`Wu6?FLORi!xgCf zE2kl5J4=#a|AN9m7I3Zu{`$`<1V~ zkf7*DDNVrC!Wc5zh0xG;u=3_OM;}~?XP}Rl3H)&cLE2r>4?+2`AORLc7?RDvz0!Pl zsRs)TNMK;@#d_+UgJFRJ`*KB&B#0wG84$(^$Xw4-_JBB|8AZ1adDK)7Y|}|N7+mZg z`DH4quX~xo8*71%#zFo|m#F6r!wZax{`&kG@m zTHJ-|C8CA_Y|rGeKEj>KEDN@cLl@{2LTr2Zm7QZKb^#qJuv#h7G4WlZ%0 z5VQFn0c%-m1BpwdQaLt6=Gu24&@=&g{S+)hco?}=a3pn6sw47e-K z;;XMf5nQM5>3{jdBb~hWJ`q3$-Q@y`{X5~o4in1b6{hVV;(t=56%y=CnIB0dPJ@#w zM1T=eqmLQHldYo0;-C~Nn^qnlF?=fvIpVAmWA8{9x8f6iJRivv*yH zHLTi0OG#|_ew!5h2!c_j_~n3o>(?FXQn;W%Sb~m4`XT!NRrlrLP`2UQHKw;A`$VWF zV;9-8MG0A=T_PD_~cYr2q4b($O`%W|j}Vq%|WfKlJao_%Z6z zck{n6(}aht{DBy>bFdY;*IFeyl`y02)S9{q-&KHb&wLC>LH}hr^xB}ST9D3gReIcH z-cw1*5iKWAt=1(Dv3gus%gV?30=7i6U>DYKJDONQBm($BqTc|#bnec^j~JU_qNi}y zDR8feV6vXLzmV|e0-qER9^er|l7--B~Q@>9; z?s5bie(Q7~PJ3^=cQQ|vxxn*3dA*e5%tdM(@&qgANnzi3gCM#6dU5}h$D8X=W-4(> zf~@}CBswzs5oqGd#E}Se4*UV*%qk?WM^y{dB>zyG49mm4Md9bz=pRYB_$!m*H0!OQCb~G}tsX80J))OeM@L7SMYg-khse)#EO;k#pzW*27z1B@}Rv~Rq zYFPTdy=FimUs@`N`K~2tqfk`d>H-N)fqbxrecxID!1K_%o2HYHpox4Ru=j2JKJyKf zEn@ElCh{-;Eq;$-`3_noCe2uL?747SK^CgL%Rl5UhR$0UY=LyTl@ESo_giZ zAP={KO)lcs`RjqQ)D-_#6{(ryvYogRg;3B~Y!kqK%ONsn8s^a$v{Dq@AcK)ugvpE(>|ZrB5AM_M!X2;0fd zQqA;aNEar#BaOoq1|6y!3(14#*mMlxk0q36bkdjaE3k%U_FHH=O|oTqvH5_~hX$;+3~xHmr9<81 z7`J%{3qq{KP*7#X^X9*22o-J{)fTItsibe`jkav>1=%Vwqf8X~5!;d1K=_H&*N7u=^;kMX?PVY59z6cZIFRsSV1yuk}4&3wtBkfxRQ z^5=T0adSv*Cv0^i7NkV3u6s&4-Qqfx!uLApWhR4EyV5Aq^%w__r_s+vRvC~C8k>LH@XCvVuz zlzi+kg@}v3LB{0D<`;PcaODRE-J;H!y(2*g9d@dhHo-b^?J{3g3yqL{y$<_~lx(VS zB+Ao2P)AkFeO<&wr=l8p^9R@4SlhEjg*Rf8Pij0fw4S)5c~VqtCIMqy9xahqzx_oIOBIF{{sRkD3KXHr}h99#{jAGui z@2u)-Hw)$4@!tQ1VCkF?<4At%N4l(kuaz1uf+#xe)+>Lby{p)V$$)6bg4^(a?ia#IdloXU#gj70qziE2lrzk%TXt#~ zCu@|Q;UJB3OK7J+wcijhXxj>EgwXYJ^x^7W6Pg9s;}NS!9D4-7TQAv9P*>cJ8r)9P?jGwu_Y6-C-&^Ba!yd8uzfF)^Vl8~8CgIKc_u&97 zLKN|+`)F6{C<3qrkI6>5}n^3E2dny-%5t|2r;Mv0LL{6iC6f z*Z&^LpiH?2pKXfK3|eu=HL&20O|*5(ueg;9FqgNk71XU5z)Ulkx6sW3J66zn2if7X z3kT=;R@^a<5wMQ=I@Wb7Zl!TIe8%2L04da;UrZsv5|aAg3?BKDREZsW*vvhai}TYc zlg|zzpheEi+~Oz%qRa?hY{LRJKS{kj=y^8w_3S8$5Kt*S6|CL00WO7^{z8`aAg?%m z*5bp9Bpt}#JEAWFCT2rW^J7n;9ThmZ@rMGw!1GK&SP~SJY?KsHk{^Lg%{;(ck_JT2 ztJ)XM9e~b`qk-I>>21ucx+|nTZ2@_f4QZSi0hNljrB&=)=X7P?TZjch|uss-8kSudP$YvDo70OY`Z(dXIR5C5>6D2~2f zgET6Rj*bvT&Q`zmG)jqw8A#;Mg#AF2S#T8`BW3fYP;SMDwoJ{>#OA%bX%zG+<9x-^ z3$VVxfgwaw+7jeLtD*Fg7fQ)fTmXRwqA1AA;w#VZr8St@8~Oz1FzEt=QlL0!P(g{e zg{X?=xbPjxZ9W2}3MANcZX3g;-?_NEvdQvfdq{cWh!BR-w2;Lg^SUH2fU76pM<#kZfO^r3pQ zLwu<4eNTXv8K}CpHW*;R zlg^#xL#j|zWrXO0R29I0=h;#9XO0)BSNu}>YCC+|4aMI-&dIM8utFaC$gE<*;^g4x z#r;liYXIl1*AbfcIRLv6+wB_}Xva_>w}0o7D(jj9Ixln2swxImiSl+LvLnFs9?RPA z?XB2hthndP)(z`C%-6BC3S&G_*o}lnh+Cym<*iw=_I=gV{*K7gBz1QdU`1b;%!X`y z4v|eY#FKXm2!E?bRDC#Kf0GC-Hk^pDMP7k5Sik zSX89<^^Ukpevz@l+t4JGeq4k|SAWO{@!XJaT5tMb?7OtF_9!c1Uj$L?o-R5<+e{FG z;J)a`Y{IwNc{Vzcs;^^uE-SCYKJN3|&0-w#QjO5XhLqoeq&cBBhwt;?(`>>8xAj8h zHxCr>KJ%I#f&M$VqP_~hC7;Ya2}N}@L2yp}WHQN_>jN;puK$TgxO{noG;Q~w&72y% zQ@xABQrYG8j&2(D3RUma2?03__M#DKI%V1k`j~A7DbGF-WrBEJ4F^x_x_0&@LSNp3 z1KLVYcgf$yk;)&>a?z!!RT#klOB**nyv%OZRbfGIAIK`jh3j!|Cx*SC{x_7T0^9+#PSONIL#(#m*qH-mh@dEA9s-8v`a+1G=3WYZsK9Fd9iJJ) za3){KU|7qH6!|DZ_qZ?RiiD?BzI08x^57y6T{{u=I)u!G(!i1!x{HJ_P#SKWu=j{4 zly3~|>udKOe(L$Q=JNd~B}xU!Vc4xmovKh*bILG9VzovPG)|{DR6)a`1!cb-&m;nb z0-=vWy~%y!)M{&^Md!vTtsmPpYHidqn>b7KH@lk{M?$wo^ZZ|>?<3bHyr)K+iCx1I zMxKl^y$u<3%*vsGYYRUG?+XG->v}qzX(k1J_pY6l^;ea*8}0={WH?Q&qxk+DL(IC0 z%wEVoS*tW)DB^-RM2Mf+g*w$|a+J)cqFEw}VU5P@A zVX++Rqx7r<*|vyYH@&UA62CbCY<#Uunp=tO%@Hj4|Lpe2pR#hQ(Nox~lfk1S2wJr% zJtJB~X)!TR^)OW5CIV#k?P^&?`+K$^oEy8>60q08qmzL47P_7@P$|T4mi4-`*ON3? z##H~HS-5JC|BGC;Fs~8BeP(Ufx^foCFbn*Gy#asc4;8V%EPUziaa)0`Z4o2?4@G$; zpnryDVaKJp6D!~`KMHGBr|w%BN65k=`G0zQ9}h+sj!iyiJa3x<)){ofE-l-;lrpEm zoI)6p#%#THjf^L6`vEDT$G`cynIL?^pq6>6fMAz$4aSgglE*{rk|E>n)AywOelyqM z*f5Fyte2CFoI}V=Yde32|CpQHXbW71SJ&fI#weMv!vnn!57Y!ey*0ctil{R-kOpbV z^g;I=2}ZyZi#AyE5v%j?TCoVpf&F?LIBIy!+5^_fIi@Mac!jjyqt!l0?G}bx)5als zI2-&AIwo2G`fT)?9oydGWx71WjUY96)a;sn3^vq2PBTC%89jK7hGluX$%^r9)UrfM zt!LifAAL1o8CIVzJp_-ZRfC5sEpqnkhAR~_X9J)UecA?JYRfoq=Vsr)a+n*7bQcTO zHXNdYWK*#`fRMo`_d2u2uGlQ6!v{81XWbh{7=gByB03lF28l3+y~- zilGeD^or_8Iv)KNu{_vH@LPL0B9q_<-5R*cl@@r4RA^+<__3P_rMPjl)s$&cY6m%z~3E<-yfs&mev`F^y3wDzEP z=3QR5B#iK3W;B<`o8vBY$&2mX^Ap@$;TmirZFj}&*!t_n;16Q)$b^9k+VI#rfdjdE z7>_Q{9`rj-JVEC`uN?VP-&C2WBYcw?Lm7IRrhlv3F&}&UdL<-mjz4m>Kn!V{0mxUB z{#Vix3Cc$17XyFxXtc!tfpO{M({KGxjbM;+1Ew|t`@wm|rs0GlMyK#JyA^LfTo+RG z{%$HOHi#04mHU-zUluY}EaQ2(?M2`k!c}F74A~C&HnKoM%G(Um&D9|AFe_k!liMx( zF@V@x*@fU(Ojk!CMuR%CxXiAWFN_4qVlX7*!~u7&8G_(cvaDhw;~7%aaixb(x`#(U z0~w40_WAs9me55gZy+5#D=>jAV)^_by&Vs5|%YS^Mz|+7s_aJW>Ss?oX^h-flP!<_VXWO^7OlW zBIQq!1O{cAXdsN{MS6aj@+lhW=(se{`#m(&mjr@l8W06Zxj!Kd6aWp_q`e|q0VYfV zVe}q?SKyJz$kL&#h`&KYvNuB1)*bhdoRPRO2s-zD=|)1?_^ zHYf{&(upt6E+WwhJdyed@Uj_#yj`R_|lG{ncCtnw2JnDCePbqtE8k%iB)}GoMcX#h?(Jo5`GtpYJxuYm$^+; zAx0k@FbCv7%)McfONiIXFjEIIx1`M)rR${gv{?03fr9wzbpIb{82FS=`1at}aDyaO zSJM3NpI@qxF9A64aH&AS|IfS729un}9_~BTaz9tH7G56N>w`|i)P^{+JM3IYAu$BS zNs-fy9+g}bl#fu4pdt-K(SjGm!U?1Is?2(JEw9w`upYe93gBEkXo$smBHkgbQgn?# zau-t9r)x{SKAHljFqmh=uD0oFaVwKkM8~no<^lHd)-xMjpLN5|8G4i%GVt+xIq|&*9cdO; z!XD?RvBD`1VQVp$z;!7!`oqUJz|IJW58oI_jkyAp8hNcUA}5YIL1O;)!)6R)%d^*j zjcuf-ECcr7P%KVga6P%f*25&^*?@go1WCP^G(b!HVe5QTAiugsSMgH^bwV#wyTbQZ z=yv%Q1-dML%p>Z~Ja7aNuC?@S!snR71Wm3*vH$STD2)*%HovRV7S;(|#=9;0Db`=N zpD{wi-Dq_695=2O)%ol{m?iZTdzY1u6dQ6e5Q3;~VK3+DG38p6-N*y7twxT0PCSk( z&23DnRy3BtmLO6LrR?TZcx`^mNq&*_TQHI~hs5`=V!TvAP{vQ-z#4cLmg{R03}7?Y zz4{s0;vxd$R8b53c7#wE%Qr*Y4 z($i2a5LMFOW7Nht+0d)nV6ZPO;Zqsq(;TPQj=0%4PLJVOZV?OINlFT!rVWYjR`-X6JjV*Kr( zgVN2lTIxkbF14v)G6LXUy%|O5MHVkVz!=Dew!<=)b3i!8+akBMGPON6uP>W)#hJ3F4UH;ZxOFqw&WtR^; z2dw(L;g^mHodMpEEPJhY^`aF{H%FbBQz}9bQNLNAvGr3o_E{fu7q<)s1gC6R85N?e zmYp240@+`I7-`(7(Tnl&!bSrxi_^`k@^q&QW||1OY{OfUHkaL6R{Gi#;N>12vX^44 zJ-y1uD$Bm@U1JmrfU~B4|)l>pTadL1%1_I6~ zedKti4{5qeZ+}HZ>apynut5snLc7|VwWh^Qj~>9Hl%FfVXBfnCOgr`V9v^WQqoZIr+ zdw7nW;6h@(JosM_Fk8~#J?+Pg8)o9rkUOpNa~P>|npBZxXV0+dm0xf^1cPZ~zzt#j z(VtZ_&F=i4I+xXG_P(G`O}z5}6x*0RxV+{mu*N2UU3N}h4X;H2bU~6t#>7Wni`*N6 z-GDO0Z7zR%s}Lnb)E6d;bO%1Zk@D$3)I_lO5={#*aIXB4p07KGL3Yu$iKXDoe!vJYQ7rw zsj6Jpna2&^=|W<$Zx_BMTruiFDlG8M%3rj7!Jp3e{#&5Fq!pV5wIdfPeqrQghX50? zqAI`V`xpl>XkKB})gAhNTeOV@JH?J1b0h9@(CdU={D#d15(-Oijva#V0TVerQn!{y zq~x~emk7?~dFNakx^S1Zl?mXi^wC`t38euy2_T2YSG^ygXc)Inw%jFU31>-{!^GwV zC0**3^z!2R?BNpMtj|vvAnF9^%6_}Lnu!Jljr^w-$$9xfZ&AxM8mwkc&Av_1zC@3Z zL$#bgku1k%2^4O+hkn!X2Xr~U!(0z^>pX?Vj<6phxw~ypJN|~&du}a_3r~H)B(CZ5 z%@9|#{l08)M-{`Ccfd+t#LFz+22^{$WTc;bOeH%}o@iGWHpzbmgbxJ_ zPv=Aq<_i&s>oI!FB;%3|>vC;cp5FhCIOfm?c$s`EBp7~)k{Fr0N zm5`t#)W{~E(735FC4kPbLzU@y*UlZoUD2w)v40ZM1>ujEZkdrnTp*SVKpHr&J%zJ| zS)EX`%lA2|F+L{v*7`5!u0%{-)2{yc6KtGyU6V_Qd4>E^6bIXzZ^15}t9AS95&l+H z_rmFlp2>Yzo&h@Sa`cbFhldmR2Aydl)J*F3fSp5|7@Q#pe0TQ`?pmE(fjtgtqSpHS zCai*3F_FE4>26SP;Un<*oqPm*jJ#N!hq%tk}X;lAK-dvV=ip9U*(#l3)l1BdawCw z|1`aa>4Ln!*|LD*GG-~UcVN)a<2U0~v83^xGPtlNLz=CJiHoCISBZDB8!FV9tho3* zL!l*mgVP&U{wkoZ@q(#GtMxE*Jv7)1a?5EheAsyplUt zf_as(Ed`19NB7AT`x(XIXLkQk6>H}byD)#Z!L(YJ z&(j^ma2=^6s*K2VJWVXK$)AT}W11!WL3ZJSXH-5p#FIYGzGbJ+Lq*G*gg*0F_FP5D zKKm@9-r(B_x`tTl%V38ZqVQFCg}Wf5DofX>__m>1G3@f~d0Spy2ezMJW4z^AhfRV( zS%Os=#7B?yE^DNtw<}aQCAf4rh|X@9GRbN1iB6p|ZKQJQ{g4;Coj*kf!lfFvZPYyc zR^{&p5k_#r&bi>Aa`+)=>9t|}On zNG)Vz%CrY3zfe(5j;D zOp?)SsiW8VY>OYGzI1cJ;)UAfv2DMtN zk@(ua{k$C72dGo^G@<2J7*%>D>%VaWxYfI6HEpMXT^)-T1lFy06!GllpVf0t-Fowu z0gNWYVKYi%E6o6L%X;z3f;Qu;XS3EFMS1;N<>-od(Hyk{VsgM)0XN0z@85lZj4r=v zwOH}%?dG}Lw8o&FhZ%s?KH38hcJ!_PyPQHL^r+k@|5J)=E2!1GZKleRTS8^^PH)Cj z?G=7Y+Z3xTG^f9UoX!SbMMBD%V`VDA0dVX8`Mve@Gc}V{E9kPV5%B2kdi39nV0}5; z5rZ-q*`=B*GZ^;~r`t>EW^CTeFq>Qe+~6&q$7@&UXfhFzvUelbPjvMDTQzbJB~A*4 z=PZ~1jPVb#C~@K-O!8ZCD~(V-cfz!vuu@0U4$-falkP+$4EVplsD%z^-5NRgMAGLP<9r+_PWzk+!A({{RmsRD=Kk literal 0 HcmV?d00001 diff --git a/main.nf b/main.nf index c02871d..08024a4 100644 --- a/main.nf +++ b/main.nf @@ -2,6 +2,11 @@ nextflow.enable.dsl = 2 +include { qc_bbduk } from "./modules/nevermore/qc/bbduk" +include { fastqc } from "./modules/nevermore/qc/fastqc" +include { classify_sample } from "./modules/nevermore/functions" +include { mapseq; mapseq_otutable } from "./modules/profilers/mapseq" + //def helpMessage() { // log.info """ @@ -37,37 +42,8 @@ nextflow.enable.dsl = 2 // helpMessage() // exit 0 //} -// -if ( !params.min_overlap ) { - params.min_overlap = 20 -} - -if ( !params.left_primer ) { - params.left_primer = 0 -} - -if ( !params.right_primer ) { - params.right_primer = 0 -} -process ltrim { - publishDir "${params.output_dir}", mode: params.publish_mode - - input: - path input_reads - - output: - path("ltrim/*.{fastq,fq,fastq.gz,fq.gz}"), emit: ltrimmed_reads - - script: - """ - mkdir -p ltrim/ - python ${projectDir}/scripts/ltrim.py . ltrim/ - """ - -} - process figaro { publishDir "${params.output_dir}", mode: params.publish_mode @@ -81,28 +57,29 @@ process figaro { script: def paired_params = (is_paired_end == true) ? "-r ${params.right_primer} -m ${params.min_overlap}" : "" + """ figaro -i . -o figaro/ -a ${params.amplicon_length} -f ${params.left_primer} ${paired_params} """ - // -r ${params.right_primer} -m ${params.min_overlap} } + process extract_trim_parameters { input: path(trim_params) output: - //tuple val(left), val(right), emit: trim_params path("trim_params.txt"), emit: trim_params script: """ python $projectDir/scripts/trim_params.py $trim_params > trim_params.txt """ - } + process dada2_preprocess { + label 'dada2' publishDir "${params.output_dir}", mode: params.publish_mode input: @@ -130,6 +107,7 @@ process dada2_preprocess { process dada2_analysis { + label 'dada2' publishDir "${params.output_dir}", mode: params.publish_mode input: @@ -143,8 +121,8 @@ process dada2_analysis { path("error_model.pdf") path("summary_table.tsv") path("result.RData") - path("dada2_figures.pdf") - path("ASVs.tsv") + path("dada2_figures.pdf"), optional: true + path("ASVs.tsv"), emit: asv_sequences path("asv_table.tsv") script: @@ -152,8 +130,142 @@ process dada2_analysis { mkdir -p dada2_in/ for f in \$(find . -maxdepth 1 -type l); do ln -s ../\$f dada2_in/; done rm dada2_in/*.R dada2_in/filter_trim_table.final.tsv - Rscript --vanilla ${dada2_script} dada2_in/ dada2_analysis/ filter_trim_table.final.tsv $task.cpus > dada2_analysis.log + Rscript --vanilla ${dada2_script} dada2_in/ dada2_analysis/ filter_trim_table.final.tsv $task.cpus ${params.dada2_chimera_method} ${params.dada2_chimera_min_fold_parent_over_abundance} > dada2_analysis.log + """ +} + +process asv2fasta { + publishDir "${params.output_dir}", mode: params.publish_mode + + input: + path(asv_seqs) + + output: + tuple val(meta), path("ASVs.fasta"), emit: asv_fasta + + script: + meta = [:] + meta.id = "all" + meta.is_paired = false + """ + tail -n +2 ${asv_seqs} | sed 's/^/>/' | tr '\t' '\n' > ASVs.fasta """ + +} + +process assess_read_length_distribution { + input: + path(fastq_reports) + + output: + path("read_length_thresholds.txt"), emit: read_length + path("READSET_HOMOGENEOUS"), emit: hom_reads_marker, optional: true + + script: + """ + python ${projectDir}/scripts/assess_readlengths.py . > read_length_thresholds.txt + """ +} + + +process homogenise_readlengths { + label 'bbduk' + + input: + tuple val(sample), path(reads) + path(read_lengths) + + output: + path("${sample.id}/*.{fastq,fq,fastq.gz,fq.gz}"), emit: reads + + script: + def maxmem = task.memory.toString().replace(/ GB/, "g") + + if (sample.is_paired) { + """ + mkdir -p ${sample.id} + r1len=\$(head -n 1 ${read_lengths} | cut -f 1) + r2len=\$(head -n 1 ${read_lengths} | cut -f 4) + bbduk.sh -Xmx${maxmem} t=${task.cpus} ordered=t minlength=\$((r1len-1)) ftr=\$((r1len-1)) stats=${sample.id}/${sample.id}.homr_stats_1.txt in=${sample.id}_R1.fastq.gz out=${sample.id}/${sample.id}_R1.fastq.gz + bbduk.sh -Xmx${maxmem} t=${task.cpus} ordered=t minlength=\$((r2len-1)) ftr=\$((r2len-1)) stats=${sample.id}/${sample.id}.homr_stats_2.txt in=${sample.id}_R2.fastq.gz out=${sample.id}/${sample.id}_R2.fastq.gz + """ + } else { + """ + mkdir -p ${sample.id} + r1len=\$(head -n 1 ${read_lengths} | cut -f 1) + bbduk.sh -Xmx${maxmem} t=${task.cpus} ordered=t minlength=\$((r1len-1)) ftr=\$((r1len-1)) stats=${sample.id}/${sample.id}.homr_stats_1.txt in=${sample.id}_R1.fastq.gz out=${sample.id}/${sample.id}_R1.fastq.gz + """ + } +} + + +workflow raw_reads_figaro { + + take: + reads + run_figaro + + main: + qc_bbduk(reads, "${projectDir}/assets/adapters.fa", run_figaro) + + fastqc(qc_bbduk.out.reads) + fastqc_ch = fastqc.out.reports + .map { sample, report -> return report } + .collect() + + assess_read_length_distribution(fastqc_ch) + homogenise_readlengths(qc_bbduk.out.reads, assess_read_length_distribution.out.read_length) + + hom_reads = homogenise_readlengths.out.reads.collect() + figaro(hom_reads, !params.single_end) + extract_trim_parameters(figaro.out.trim_params) + + emit: + reads = hom_reads + trim_params = extract_trim_parameters.out.trim_params + +} + + +workflow check_for_preprocessing { + take: + reads + + main: + reads.view() + fastqc(reads) + assess_read_length_distribution( + fastqc.out.reports + .map { sample, report -> return report } + .collect() + ) + + emit: + readlen_dist = assess_read_length_distribution.out.read_length + hom_reads_marker = assess_read_length_distribution.out.hom_reads_marker +} + + +process prepare_fastqs { + input: + tuple val(sample), path(fq) + + output: + tuple val(sample), path("fastq/${sample.id}/${sample.id}_R*.fastq.gz"), emit: reads + + script: + if (sample.is_paired) { + """ + mkdir -p fastq/${sample.id} + ln -sf ../../${fq[0]} fastq/${sample.id}/${sample.id}_R1.fastq.gz + ln -sf ../../${fq[1]} fastq/${sample.id}/${sample.id}_R2.fastq.gz + """ + } else { + """ + mkdir -p fastq/${sample.id} + ln -sf ../../${fq[0]} fastq/${sample.id}/${sample.id}_R1.fastq.gz + """ + } } @@ -166,34 +278,57 @@ workflow { return tuple(sample, file) } .groupTuple(sort: true) + .map { classify_sample(it[0], it[1]) } - if (!params.single_end) { - library_layout = "PAIRED"; - dada2_preprocess_script = "$projectDir/R_scripts/dada2_preprocess_paired.R" - dada2_analysis_script = "$projectDir/R_scripts/dada2_analysis_paired.R" - is_paired_end = true - } else { + prepare_fastqs(fastq_ch) + + if (params.single_end) { library_layout = "SINGLE"; dada2_preprocess_script = "$projectDir/R_scripts/dada2_preprocess_single.R" dada2_analysis_script = "$projectDir/R_scripts/dada2_analysis_single.R" is_paired_end = false + } else { + library_layout = "PAIRED"; + dada2_preprocess_script = "$projectDir/R_scripts/dada2_preprocess_paired.R" + dada2_analysis_script = "$projectDir/R_scripts/dada2_analysis_paired.R" + is_paired_end = true } print library_layout - fastq_ch.collect().view() + trim_params_ch = Channel.empty() + dada_reads_ch = Channel.empty() + + if (!params.preprocessed) { - files_only_ch = fastq_ch - .map { sample, files -> return files } - .collect() + /* check if dataset was preprocessed */ - files_only_ch.view() + check_for_preprocessing(prepare_fastqs.out.reads) - ltrim(files_only_ch) + raw_reads_figaro(prepare_fastqs.out.reads, check_for_preprocessing.out.hom_reads_marker) + trim_params_ch = trim_params_ch + .concat(raw_reads_figaro.out.trim_params) - figaro(ltrim.out.ltrimmed_reads, is_paired_end) - extract_trim_parameters(figaro.out.trim_params) + dada_reads_ch = dada_reads_ch.concat(raw_reads_figaro.out.reads) - dada2_preprocess(ltrim.out.ltrimmed_reads, extract_trim_parameters.out.trim_params, dada2_preprocess_script, is_paired_end) + } + + trim_params = file("${workDir}/trim_params.txt") + trim_params.text = "-1 -1\n" + + trim_params_ch = trim_params_ch + .concat(Channel.fromPath("${workDir}/trim_params.txt")) + + dada_reads_ch = dada_reads_ch.concat( + prepare_fastqs.out.reads + .map { sample, reads -> reads } + .collect() + ) + + dada2_preprocess(dada_reads_ch.first(), trim_params_ch.first(), dada2_preprocess_script, is_paired_end) dada2_analysis(dada2_preprocess.out.filtered_reads, dada2_preprocess.out.trim_table, dada2_analysis_script, is_paired_end) + asv2fasta(dada2_analysis.out.asv_sequences) + + mapseq(asv2fasta.out.asv_fasta, params.mapseq_db_path, params.mapseq_db_name) + mapseq_otutable(mapseq.out.bac_ssu) } diff --git a/gaga2_conda.yml b/misc/gaga2_conda.yml similarity index 100% rename from gaga2_conda.yml rename to misc/gaga2_conda.yml diff --git a/gaga2_install.sh b/misc/gaga2_install.sh similarity index 100% rename from gaga2_install.sh rename to misc/gaga2_install.sh diff --git a/scripts/check_readlengths.py b/misc/legacy_scripts/check_readlengths.py similarity index 94% rename from scripts/check_readlengths.py rename to misc/legacy_scripts/check_readlengths.py index ad172cb..6a69878 100644 --- a/scripts/check_readlengths.py +++ b/misc/legacy_scripts/check_readlengths.py @@ -36,6 +36,7 @@ def main(): print("WARNING: read lengths are not homogenous in {fq}. Figaro will not be executed.".format(fq=fn)) open(os.path.join(args.output_dir, "SKIP_FIGARO"), "wt").close() return None + open(os.path.join(args.output_dir, "RUN_FIGARO"), "wt").close() diff --git a/scripts/gather_fastq_files.py b/misc/legacy_scripts/gather_fastq_files.py similarity index 100% rename from scripts/gather_fastq_files.py rename to misc/legacy_scripts/gather_fastq_files.py diff --git a/misc/legacy_scripts/hltrim.py b/misc/legacy_scripts/hltrim.py new file mode 100644 index 0000000..f9f572e --- /dev/null +++ b/misc/legacy_scripts/hltrim.py @@ -0,0 +1,88 @@ +import os +import sys +import gzip +import argparse +import pathlib +import contextlib + +from collections import Counter + + +def read_fastq_stream(fq_in): + for line in fq_in: + _id, seq, _, qual = line, next(fq_in), next(fq_in), next(fq_in) + yield _id.strip(), seq.strip(), qual.strip() + + +def process_fastq_record(fq_rec, cutlen): + if fq_rec is None: + return None + + _id, seq, qual = fq_rec + _len = len(seq) + + if _len < cutlen: + return None + + return _id, seq[:cutlen], qual[:cutlen] + + +def main(): + ap = argparse.ArgumentParser() + # ap.add_argument("--r1", type=str) + # ap.add_argument("--r2", type=str) + ap.add_argument("reads", nargs="*", type=str) + ap.add_argument("--outdir", "-o", type=str, default="output") + ap.add_argument("--cutlen", "-c", type=str) + args = ap.parse_args() + + pathlib.Path(args.outdir).mkdir(exist_ok=True, parents=True) + + cutlen = args.cutlen.split(",") + if len(cutlen) == 1: + r1cut, r2cut = int(cutlen[0]), None + else: + r1cut, r2cut, *_ = map(int, cutlen) + + reads = sorted(args.reads) + r1 = reads[0] + r2 = reads[1] if len(reads) > 1 else None + + _open = gzip.open if r1.endswith(".gz") else open + + + r1_in = _open(r1, "rt") + r2_in = _open(r2, "rt") if r2 else contextlib.nullcontext() + + r1_out = _open(os.path.join(args.outdir, r1), "wt") + r2_out = _open(os.path.join(args.outdir, r2), "wt") if r2 else contextlib.nullcontext() + + with r1_in, r2_in, r1_out, r2_out: + r1_stream = read_fastq_stream(r1_in) + r2_stream = read_fastq_stream(r2_in) if r2 else None + + while True: + try: + r1_rec = process_fastq_record(next(r1_stream), r1cut) + except StopIteration: + break + if r2_stream: + try: + r2_rec = process_fastq_record(next(r2_stream), r2cut) + except StopIteration: + raise ValueError("r2 cannot be exhausted before r1") + else: + r2_rec = None + + if r1_rec is not None and (r2_rec is not None or r2_stream is None): + print(*r1_rec[0:2], "+", r1_rec[-1], sep="\n", file=r1_out) + if r2_rec is not None: + print(*r2_rec[0:2], "+", r2_rec[-1], sep="\n", file=r2_out) + + + + + + +if __name__ == "__main__": + main() diff --git a/scripts/ltrim.py b/misc/legacy_scripts/ltrim.py similarity index 100% rename from scripts/ltrim.py rename to misc/legacy_scripts/ltrim.py diff --git a/modules/nevermore/functions.nf b/modules/nevermore/functions.nf new file mode 100644 index 0000000..799253e --- /dev/null +++ b/modules/nevermore/functions.nf @@ -0,0 +1,15 @@ +def classify_sample(sample, files) { + + def meta = [:] + meta.is_paired = (files instanceof Collection && files.size() == 2) + meta.id = sample + + return [meta, files] + + if (meta.is_paired) { + return [meta, files] + } + + return [meta, [files]] + +} diff --git a/modules/nevermore/qc/bbduk.nf b/modules/nevermore/qc/bbduk.nf new file mode 100644 index 0000000..22a91b6 --- /dev/null +++ b/modules/nevermore/qc/bbduk.nf @@ -0,0 +1,35 @@ + +process qc_bbduk { + label 'bbduk' + publishDir path: params.output_dir, mode: params.publish_mode, pattern: "${sample.id}/${sample.id}.bbduk_stats.txt" + + input: + tuple val(sample), path(reads) + path(adapters) + path(run_sentinel) + + output: + tuple val(sample), path("${sample.id}/${sample.id}_R*.fastq.gz"), emit: reads + tuple val(sample), path("${sample.id}/${sample.id}_O.fastq.gz"), emit: orphans, optional: true + path("${sample.id}/${sample.id}.bbduk_stats.txt") + + script: + def maxmem = task.memory.toString().replace(/ GB/, "g") + //def read1 = "in1=${sample.id}_R1.fastq.gz out1=${sample.id}/${sample.id}_R1.fastq.gz" + def read1 = "in1=${sample.id}_R1.fastq.gz out1=${sample.id}/${sample.id}_R1.fastq.gz" + //read2 = sample.is_paired ? "in2=${sample.id}_R2.fastq.gz out2=${sample.id}/${sample.id}_R2.fastq.gz outs=${sample.id}/${sample.id}_O.fastq.gz" : "" + read2 = sample.is_paired ? "in2=${sample.id}_R2.fastq.gz out2=${sample.id}/${sample.id}_R2.fastq.gz outs=${sample.id}/${sample.id}_O.fastq.gz" : "" + + if (params.primers) { + qc_params = params.qc_params_primers + trim_params = "literal=${params.primers} minlen=${params.qc_minlen}" + } else { + qc_params = params.qc_params_adapters + trim_params = "ref=${adapters} minlen=${params.qc_minlen}" + } + + """ + mkdir -p ${sample.id} + bbduk.sh -Xmx${maxmem} t=${task.cpus} ordered=t ${qc_params} ${trim_params} stats=${sample.id}/${sample.id}.bbduk_stats.txt ${read1} ${read2} + """ +} diff --git a/modules/nevermore/qc/fastqc.nf b/modules/nevermore/qc/fastqc.nf new file mode 100644 index 0000000..2afd3fa --- /dev/null +++ b/modules/nevermore/qc/fastqc.nf @@ -0,0 +1,22 @@ +process fastqc { + publishDir params.output_dir, mode: params.publish_mode, pattern: "raw_counts/*.txt" + + input: + tuple val(sample), path(reads) + + output: + tuple val(sample), path("fastqc/*/*fastqc_data.txt"), emit: reports + tuple val(sample), path("raw_counts/${sample.id}.txt"), emit: counts + + script: + def process_r2 = (sample.is_paired) ? "fastqc -t $task.cpus --extract --outdir=fastqc ${sample.id}_R2.fastq.gz && mv fastqc/${sample.id}_R2_fastqc/fastqc_data.txt fastqc/${sample.id}_R2_fastqc/${sample.id}_R2_fastqc_data.txt" : ""; + + """ + mkdir -p fastqc + mkdir -p raw_counts + fastqc -t $task.cpus --extract --outdir=fastqc ${sample.id}_R1.fastq.gz && mv fastqc/${sample.id}_R1_fastqc/fastqc_data.txt fastqc/${sample.id}_R1_fastqc/${sample.id}_R1_fastqc_data.txt + ${process_r2} + grep "Total Sequences" fastqc/*/*data.txt > seqcount.txt + echo \$(wc -l seqcount.txt)\$'\t'\$(head -n1 seqcount.txt | cut -f 2) > raw_counts/${sample.id}.txt + """ +} diff --git a/modules/profilers/mapseq.nf b/modules/profilers/mapseq.nf new file mode 100644 index 0000000..78338ca --- /dev/null +++ b/modules/profilers/mapseq.nf @@ -0,0 +1,58 @@ +process mapseq { + input: + tuple val(sample), path(seqs) + path(db_path) + val(db_name) + + output: + path("${sample.id}/${sample.id}_bac_ssu.mseq"), emit: bac_ssu + + script: + def db = (db_name == "default" || db_name == "") ? "" : "${db_path}/${db_name}.fasta ${db_path}/*.tax*" + + """ + mkdir -p ${sample.id} + ${params.mapseq_bin} ${seqs} > ${sample.id}/${sample.id}_bac_ssu.mseq + """ +} + + +process mapseq_otutable { + publishDir params.output_dir, mode: params.publish_mode + + input: + path(mapped_seqs) + + output: + path("mapseq.otucounts.txt") + + script: + """ + ${params.mapseq_bin} -otucounts ${mapped_seqs} > mapseq.otucounts.txt + """ +} + + +process collate_mapseq_tables { + publishDir params.output_dir, mode: params.publish_mode + + input: + path(mapped_seqs) + + output: + path("mapseq_tables/mapseq_counts_genus_*_bac_ssu.tsv"), emit: ssu_tables + + script: + if (mapped_seqs.size() == 2) { + """ + mkdir -p mapseq_tables + ${params.mapseq_bin} -otutable -tl 5 \$(ls *_R1_bac_ssu.mseq) | sed 's/_R1_bac_ssu.mseq//g' > mapseq_tables/mapseq_counts_genus_fwd_bac_ssu.tsv + ${params.mapseq_bin} -otutable -tl 5 \$(ls *_R2_bac_ssu.mseq) | sed 's/_R2_bac_ssu.mseq//g' > mapseq_tables/mapseq_counts_genus_rev_bac_ssu.tsv + """ + } else { + """ + mkdir -p mapseq_tables + ${params.mapseq_bin} -otutable -tl 5 \$(ls *_R1_bac_ssu.mseq) | sed 's/_R1_bac_ssu.mseq//g' > mapseq_tables/mapseq_counts_genus_fwd_bac_ssu.tsv + """ + } +} diff --git a/nextflow.config b/nextflow.config index 8d27654..904d854 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,5 +4,5 @@ manifest { description = "DADA2/figaro-based 16S amplicon analysis pipeline" name = "gaga2" nextflowVersion = ">=21.0" - version = "0.3" + version = "0.4" } diff --git a/scripts/assess_readlengths.py b/scripts/assess_readlengths.py new file mode 100644 index 0000000..3135796 --- /dev/null +++ b/scripts/assess_readlengths.py @@ -0,0 +1,64 @@ +import argparse +import glob +import os + +from collections import Counter + +def parse_fastqc_report(f): + readlengths = list() + active = False + for line in open(f): + active = active or (not active and line.startswith(">>Sequence Length Distribution")) + if active: + if line.startswith(">>END_MODULE"): + break + if line[0] not in (">", "#"): + length, count = line.split("\t") + readlengths.append((int(length.split("-")[0]), float(count))) + return dict(item for item in readlengths if item[1] > 0) + + +def main(): + + ap = argparse.ArgumentParser() + ap.add_argument("input_dir", type=str, default=".") + args = ap.parse_args() + + read_yields = {} + for r in (1, 2): + read_lengths = Counter() + for fastq_report in glob.glob(os.path.join(args.input_dir, f"*{r}_fastqc_data.txt")): + read_lengths.update(parse_fastqc_report(fastq_report)) + + yields = list() + for length, count in read_lengths.items(): + yields.append( + (length, sum(v for k, v in read_lengths.items() if k >= length), sum(v * length for k, v in read_lengths.items() if k >= length)) + ) + for length, reads, bases in sorted(yields, key=lambda x:(x[2], x[1]), reverse=True): + read_yields.setdefault(r, list()).append((length, reads, bases)) + + r1_lengths = read_yields.get(1, list()) + r2_lengths = read_yields.get(2, list()) + + if not r2_lengths: + all_lengths = r1_lengths + is_hom = len(r1_lengths) == 1 + else: + if len(r1_lengths) < len(r2_lengths): + r1_lengths.extend(("NA", "NA", "NA") for i in range(len(r2_lengths) - len(r1_lengths))) + elif len(r2_lengths) < len(r1_lengths): + r2_lengths.extend(("NA", "NA", "NA") for i in range(len(r1_lengths) - len(r2_lengths))) + all_lengths = [x + y for x, y in zip(r1_lengths, r2_lengths)] + is_hom = len(r1_lengths) == len(r2_lengths) == 1 + + for item in all_lengths: + print(*item, sep="\t") + + if is_hom: + open("READSET_HOMOGENEOUS", "wt").close() + + + +if __name__ == "__main__": + main()