Skip to content

Commit

Permalink
style(grohmm): Clean up
Browse files Browse the repository at this point in the history
Editorconfig, styler, consistant naming
  • Loading branch information
edmundmiller committed Oct 21, 2024
1 parent afbb7b6 commit 54759ca
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 301 deletions.
6 changes: 5 additions & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ trim_trailing_whitespace = true
indent_size = 4
indent_style = space

[*.{md,yml,yaml,html,css,scss,js}]
[*.{md,yml,yaml,html,css,scss,js,R,Rmd}]
indent_size = 2

# These files are edited and tested upstream in nf-core/modules
Expand All @@ -31,3 +31,7 @@ indent_size = unset
# ignore python and markdown
[*.{py,md}]
indent_style = unset

# Follow tidyverse style for R
[*.{R,Rmd}]
indent_size = 2
285 changes: 0 additions & 285 deletions bin/custom_makeConsensusAnnotations.R

This file was deleted.

4 changes: 2 additions & 2 deletions bin/parameter_tuning.R → bin/grohmm_parametertuning.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ tune <- data.frame(
LtProbB = args$ltprobb,
UTS = args$uts
)
Fp <- windowAnalysis(alignments, strand = "+", windowSize = 50)
Fm <- windowAnalysis(alignments, strand = "-", windowSize = 50)
fp <- windowAnalysis(alignments, strand = "+", windowSize = 50)
fm <- windowAnalysis(alignments, strand = "-", windowSize = 50)
hmm <- detectTranscripts(
Fp = Fp,
Fm = Fm,
Expand Down
27 changes: 19 additions & 8 deletions bin/transcriptcalling_grohmm.R → bin/grohmm_transcriptcalling.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ write.table(
print("Input transcript annotations")
kg_db <- makeTxDbFromGFF(args$gxf)
kg_tx <- transcripts(kg_db, columns = c("gene_id", "tx_id", "tx_name"))
# TODO I wonder if I could speed things up by filtering by chromosome at the Nextflow level
# TODO I wonder if I could speed things up by filtering
# by chromosome at the Nextflow level...
# https://github.com/google/deepvariant/issues/744
# filter=list(tx_chrom="chr7"))
# exclude any transcripts that are located on chromosomes labeled with "random".
Expand All @@ -170,8 +171,10 @@ get_expressed_annotations <- function(features, reads) {
f_limit <- limitToXkb(features)
count <- countOverlaps(f_limit, reads)
features <- features[count != 0, ]
return(features[(quantile(width(features), .05) < width(features)) &
(width(features) < quantile(width(features), .95)), ])
return(features[
(quantile(width(features), .05) < width(features)) &
(width(features) < quantile(width(features), .95)),
])
}
con_expressed <- get_expressed_annotations(
features = kg_consensus,
Expand All @@ -181,15 +184,20 @@ b_plus <- breakTranscriptsOnGenes(tx_hmm, kg_consensus, strand = "+")
b_minus <- breakTranscriptsOnGenes(tx_hmm, kg_consensus, strand = "-")
tx_broken <- c(b_plus, b_minus)
# Assign unique IDs if they're missing
if (is.null(mcols(tx_broken)$transcript_id) || any(is.na(mcols(tx_broken)$transcript_id))) {
mcols(tx_broken)$transcript_id <- paste0("TX", seq_along(tx_broken))
if (
is.null(mcols(tx_broken)$transcript_id) ||
any(is.na(mcols(tx_broken)$transcript_id))
) {
mcols(tx_broken)$transcript_id <- paste0("TX", seq_along(tx_broken))
}

# Filter out any transcripts with NA values in start or end positions
tx_broken_filtered <- tx_broken[!is.na(start(tx_broken)) & !is.na(end(tx_broken))]
tx_broken_filtered <-
tx_broken[!is.na(start(tx_broken)) & !is.na(end(tx_broken))]

# Ensure that kg_consensus also doesn't contain NA values
kg_consensus_filtered <- kg_consensus[!is.na(start(kg_consensus)) & !is.na(end(kg_consensus))]
kg_consensus_filtered <-
kg_consensus[!is.na(start(kg_consensus)) & !is.na(end(kg_consensus))]

# Now call combineTranscripts with the filtered data
tx_final <- combineTranscripts(tx_broken_filtered, kg_consensus_filtered)
Expand All @@ -212,7 +220,10 @@ capture.output(td_final, file = paste0(args$outprefix, ".tdFinal.txt"))

# Write the data used in the plot to a CSV file
data_to_write <- data.frame(x = td_final$x, profile = td_final$profile)
write.csv(data_to_write, file = paste0(args$outprefix, ".tdFinal_mqc.csv"), row.names = FALSE)
write.csv(data_to_write,
file = paste0(args$outprefix, ".tdFinal_mqc.csv"),
row.names = FALSE
)

########################
## CITE PACKAGES USED ##
Expand Down
2 changes: 1 addition & 1 deletion modules/local/grohmm/parametertuning/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ process GROHMM_PARAMETERTUNING {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_${UTS}_${LtProbB}"
"""
parameter_tuning.R \\
grohmm_parametertuning.R \\
--bam_file ${bams} \\
--outprefix ${prefix} \\
--gxf $gxf \\
Expand Down
2 changes: 1 addition & 1 deletion modules/local/grohmm/transcriptcalling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ process GROHMM_TRANSCRIPTCALLING {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
transcriptcalling_grohmm.R \\
grohmm_transcriptcalling.R \\
--bam_file ${bams} \\
--tuning_file ${tuning_file} \\
--outprefix ${prefix} \\
Expand Down
Loading

0 comments on commit 54759ca

Please sign in to comment.