Skip to content

Commit

Permalink
Merge pull request #11 from U-BDS/feature_intron_counting
Browse files Browse the repository at this point in the history
Feature intron counting
  • Loading branch information
atrull314 authored Jan 9, 2024
2 parents 61f1c66 + 85af2c3 commit 42a8665
Show file tree
Hide file tree
Showing 34 changed files with 294 additions and 1,082 deletions.
2 changes: 2 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ top_modules:
- general_stats
- custom_content
- samtools
- rseqc
- umitools
- fastqc:
name: "FastQC (raw)"
anchor: "fastqc"
Expand Down
39 changes: 15 additions & 24 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class RowChecker:
def __init__(
self,
sample_col="sample",
first_col="fastq_1",
second_col="fastq_2",
first_col="fastq",
second_col="cell_count",
single_col="single_end",
**kwargs,
):
Expand All @@ -44,9 +44,9 @@ def __init__(
sample_col (str): The name of the column that contains the sample name
(default "sample").
first_col (str): The name of the column that contains the first (or only)
FASTQ file path (default "fastq_1").
second_col (str): The name of the column that contains the second (if any)
FASTQ file path (default "fastq_2").
FASTQ file path (default "fastq").
second_col (str): The name of the column that contains the expected cell
count for the sample (default "cell_count").
single_col (str): The name of the new column that will be inserted and
records whether the sample contains single- or paired-end sequencing
reads (default "single_end").
Expand All @@ -72,7 +72,7 @@ def validate_and_transform(self, row):
self._validate_sample(row)
self._validate_first(row)
self._validate_second(row)
self._validate_pair(row)
self._set_single_end(row)
self._seen.add((row[self._sample_col], row[self._first_col]))
self.modified.append(row)

Expand All @@ -86,24 +86,17 @@ def _validate_sample(self, row):
def _validate_first(self, row):
"""Assert that the first FASTQ entry is non-empty and has the right format."""
if len(row[self._first_col]) <= 0:
raise AssertionError("At least the first FASTQ file is required.")
raise AssertionError("The FASTQ file is required.")
self._validate_fastq_format(row[self._first_col])

def _validate_second(self, row):
"""Assert that the second FASTQ entry has the right format if it exists."""
if len(row[self._second_col]) > 0:
self._validate_fastq_format(row[self._second_col])
"""Assert that the cell count entry exists."""
if len(row[self._second_col]) <= 0:
raise AssertionError("The cell count is required.")

def _validate_pair(self, row):
def _set_single_end(self, row):
"""Assert that read pairs have the same file extension. Report pair status."""
if row[self._first_col] and row[self._second_col]:
row[self._single_col] = False
first_col_suffix = Path(row[self._first_col]).suffixes[-2:]
second_col_suffix = Path(row[self._second_col]).suffixes[-2:]
if first_col_suffix != second_col_suffix:
raise AssertionError("FASTQ pairs must have the same file extensions.")
else:
row[self._single_col] = True
row[self._single_col] = True

def _validate_fastq_format(self, filename):
"""Assert that a given filename has one of the expected FASTQ extensions."""
Expand Down Expand Up @@ -179,16 +172,14 @@ def check_samplesheet(file_in, file_out):
This function checks that the samplesheet follows the following structure,
see also the `viral recon samplesheet`_::
sample,fastq_1,fastq_2
SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz
SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz
SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,
sample,fastq,cell_count
SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,1000
.. _viral recon samplesheet:
https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
"""
required_columns = {"sample", "fastq_1", "fastq_2"}
required_columns = {"sample", "fastq", "cell_count"}
# See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
with file_in.open(newline="") as in_handle:
reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
Expand Down
195 changes: 4 additions & 191 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -192,16 +192,6 @@ process {
}
}

if (params.intron_retention_method){
process {
withName: '.*:PREPARE_REFERENCE_FILES:PREPARE_GTF' {
publishDir = [
enabled: false
]
}
}
}

// PAFTOOLS
process {
withName: '.*:PAFTOOLS' {
Expand Down Expand Up @@ -288,7 +278,7 @@ if (!params.skip_trimming) {
}

// NANOFILT
if ( params.trimming_software == 'nanofilt' ){
if ( !params.skip_trimming ){
process {
withName:'.*:NANOFILT' {
publishDir = [
Expand All @@ -307,28 +297,7 @@ if (!params.skip_trimming) {
}
}

// PROWLERTRIMMER
} else if (params.trimming_software == 'prowler') {

process {
withName:'.*:PROWLERTRIMMER' {
publishDir = [
enabled: false
]
}
}

process {
withName: '.*:ZIP_TRIM' {
publishDir = [
path: { "${params.outdir}/${meta.id}/fastq/trimmed_prowlertrimmer" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
}

}

// PREEXTRACT_FASTQ
Expand Down Expand Up @@ -514,7 +483,6 @@ process {
// UMI DEDUPLICATION //
///////////////////////

// UMITOOLS_DEDUP
process {
withName: '.*:UMITOOLS_DEDUP' {
ext.prefix = { "${meta.id}.dedup" }
Expand All @@ -526,174 +494,19 @@ process {
}
}

//////////////////////////////
// INTRON RETENTION METHODS //
//////////////////////////////

if (params.intron_retention_method == "1"){

////////////////////////
// TRANSCRIPT_TO_EXON //
////////////////////////

process {
withName: '.*:TRANSCRIPT_TO_EXON' {
publishDir = [
path: { "${params.outdir}/references/gtf/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
} else if (params.intron_retention_method == "2"){

///////////////////
// SORT_BEDTOOLS //
///////////////////

process {
withName: '.*:SORT_BEDTOOLS' {
publishDir = [
enabled: false
]
}
}

///////////////////////
// CREATE_INTRON_GTF //
///////////////////////

process {
withName: '.*:CREATE_INTRON_GTF' {
publishDir = [
enabled: false
]
}
}

//////////////////////
// GET_GTF_FEATURES //
//////////////////////

process {
withName: '.*:GET_GTF_FEATURES' {
publishDir = [
enabled: false
]
}
}

/////////////
// GTF2BED //
/////////////

process {
withName: '.*:GTF2BED' {
publishDir = [
enabled: false
]
}
}

////////////////////////
// UCSC_BEDTOGENEPRED //
////////////////////////

process {
withName: '.*:UCSC_BEDTOGENEPRED' {
publishDir = [
enabled: false
]
}
}

////////////////////////
// UCSC_GENEPREDTOGTF //
////////////////////////

process {
withName: '.*:UCSC_GENEPREDTOGTF' {
publishDir = [
enabled: false
]
}
}

//////////////////////////
// CUSTOM_GETCHROMSIZES //
//////////////////////////

process {
withName: '.*:CUSTOM_GETCHROMSIZES' {
publishDir = [
enabled: false
]
}
}

////////////////////
// COMPLEMENT_GTF //
////////////////////

process {
withName: '.*:COMPLEMENT_GTF' {
publishDir = [
enabled: false
]
}
}

//////////////////////////
// COMPLEMENT_NONINTRON //
//////////////////////////

process {
withName: '.*:COMPLEMENT_NONINTRON' {
publishDir = [
enabled: false
]
}
}

/////////////
// CAT_BED //
/////////////

process {
withName: '.*:CAT_BED' {
publishDir = [
enabled: false
]
}
}

/////////////
// CAT_GTF //
/////////////
process {
withName: '.*:CAT_GTF' {
publishDir = [
path: { "${params.outdir}/references/gtf/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
}




//////////////
// ISOQUANT //
//////////////

// NOTE: with_inconsistent will include introns within the counts, where as unique_only will only include exons
process {
withName: '.*:ISOQUANT' {
ext.args = {
[
"--complete_genedb",
params.stranded == "forward" ? "--stranded forward" : params.stranded == "reverse" ? "--stranded reverse" : "--stranded none",
params.retain_introns ? "--gene_quantification with_inconsistent ": "--gene_quantification unique_only ",
params.retain_introns ? "--transcript_quantification with_inconsistent ": "--transcript_quantification unique_only "
].join(' ').trim()
}
publishDir = [
Expand Down
2 changes: 0 additions & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ params {

// Barcode options
cell_barcode_pattern = ""
cell_amount = 400
identifier_pattern = "fixed_seq_1,cell_barcode_1,umi_1,fixed_seq_2"
cell_barcode_lengths = "16"
umi_lengths = "12"
fixed_seqs = "CTACACGACGCTCTTCCGATCT, TTTTTTTTTT"
barcode_caller = "blaze"

}
25 changes: 5 additions & 20 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"bedtools/complement": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"cat/cat": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand All @@ -25,11 +20,6 @@
"git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4",
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53",
Expand All @@ -50,6 +40,11 @@
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"rseqc/readdistribution": {
"branch": "master",
"git_sha": "7bb1b295a359bcbf0a0ea03d19d40a00916805ee",
"installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
Expand Down Expand Up @@ -85,16 +80,6 @@
"git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f",
"installed_by": ["modules"]
},
"stringtie/merge": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"stringtie/stringtie": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"umitools/dedup": {
"branch": "master",
"git_sha": "7297204bf49273300a3dbfa4b7a4027c8683f1bd",
Expand Down
Loading

0 comments on commit 42a8665

Please sign in to comment.